youtube_transcript2020 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba1ffdf17cf508bdd7a8f62eba92956b0eb7ab08cb41833161f0144b9ca845de
4
- data.tar.gz: de5a51c14348e42338974e4ef938f0df0c4aa6e3dba4882e93ec5160f95b7866
3
+ metadata.gz: 9462fe85e21ee061f717e564efb7b66bec2e62b1c454228d6fb8f31633f7363d
4
+ data.tar.gz: bb04a03be0cb61058682ce4d9c1159666e41feb2a794fe65d07fbae418412056
5
5
  SHA512:
6
- metadata.gz: d7a8c54b09548c1ac4b0d8a3419e839fca9c2c4f3388b3355be109ce96b88622c00fdcffa7415bb76e20980123915db753c8611945e9e91f273a1494f8e6f11b
7
- data.tar.gz: a5d38c9e2f51dc8871622b0cce5a788cc80cfc788e4e841e31ff37e3ae36a5ade3a42ad7b8c3795a0747daecc4886bc78f81fd11b1792402e72bd3ed063270b0
6
+ metadata.gz: d8d4f57bb617e07f647727eaf56c81f1a7efc5eaccc71c323b54e887a270691f01f1d58cc64fe505a40ad6bd5ebd2cea3ea5fc87b7950abe773697acb5e7cc4b
7
+ data.tar.gz: 9a9a4e72f99f70aca736171490bba54d563aa722576ce6d91435177ed9f0fb327080d84ead963474ab8e46cd1c2dfc4709d2ab93c20853c2a376b091399ed6ad
checksums.yaml.gz.sig CHANGED
Binary file
@@ -12,21 +12,25 @@ class YoutubeTranscript2020
12
12
 
13
13
  attr_reader :to_a, :author, :id, :title
14
14
 
15
- def initialize(id=nil, debug: false)
15
+ def initialize(id=nil, debug: false)
16
16
 
17
17
  return unless id
18
-
18
+
19
19
  @debug = debug
20
20
 
21
21
  @id = id[/https?:\/\//] ? YoutubeID.from(id) : id
22
22
 
23
- s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
24
- @s = parse(s) unless s.empty?
23
+ # Fetching the transcript from the following statement no longer works.
24
+ # Instead, copy and paste the transcript from the YouTube video page into
25
+ # a text file and import it.
26
+ #
27
+ #s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
28
+ #@s = parse(s) unless s.empty?
25
29
 
26
30
  fetch_info(@id)
27
31
 
28
32
  end
29
-
33
+
30
34
  def to_a()
31
35
  @a
32
36
  end
@@ -38,7 +42,7 @@ class YoutubeTranscript2020
38
42
  h = {id: @id, title: @title, author: @author}
39
43
  SimpleConfig.new(h).to_s + "\n#{'-'*78}\n\n" + @s
40
44
  end
41
-
45
+
42
46
  def to_text()
43
47
  @a.map(&:last).join("\n")
44
48
  end
@@ -47,7 +51,7 @@ class YoutubeTranscript2020
47
51
  #
48
52
  def import(obj)
49
53
 
50
- s = RXFHelper.read(obj).first
54
+ s = RXFReader.read(obj).first
51
55
 
52
56
  if s =~ /------+/ then
53
57
  header, body = s.split(/-----+/,2)
@@ -61,7 +65,7 @@ class YoutubeTranscript2020
61
65
  end
62
66
 
63
67
  puts 'body: ' + body[0..400] if @debug
64
- a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }
68
+ a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }
65
69
  @a = a[0].zip(a[1])
66
70
 
67
71
  @s = join_sentences(@a) if raw_transcript
@@ -75,13 +79,13 @@ class YoutubeTranscript2020
75
79
  url = 'https://www.youtube.com/embed/' + @id
76
80
 
77
81
  links = @a.map do |timestamp, s|
78
-
79
- seconds = Subunit.new(units={minutes:60, hours:60},
82
+
83
+ seconds = Subunit.new(units={minutes:60, hours:60},
80
84
  timestamp.split(':').map(&:to_i)).to_i
81
85
  "<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
82
86
  % [url, seconds, timestamp, s]
83
87
  end
84
-
88
+
85
89
  puts '@html_embed: ' + @html_embed.inspect if @debug
86
90
  doc = Rexle.new(@html_embed.to_s)
87
91
  puts 'before attributes'
@@ -116,9 +120,9 @@ EOF
116
120
  # Outputs plain text containing the headings including timestamps
117
121
  # note: This can be helpful for copyng and pasting directly into a YouTube comment
118
122
  #
119
- def to_headings()
120
-
121
- @to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)
123
+ def to_headings()
124
+
125
+ @to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)
122
126
 
123
127
  end
124
128
 
@@ -133,53 +137,53 @@ EOF
133
137
  private
134
138
 
135
139
  def fetch_info(id)
136
-
137
- url = "http://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=#{id}&format=xml"
140
+
141
+ url = "https://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=#{id}&format=xml"
138
142
  s = Net::HTTP.get(URI(url))
139
-
143
+
140
144
  e = Rexle.new(s).root
141
-
145
+
142
146
  @title = e.text('title')
143
147
  @author = e.text('author_name')
144
148
  @html_embed = e.text('html').unescape
145
149
  puts '@html_embed: ' + @html_embed.inspect if @debug
146
-
150
+
147
151
  end
148
-
152
+
149
153
  def join_sentences(a)
150
-
154
+
151
155
  if @debug then
152
156
  puts 'inside join_sentence'
153
157
  puts 'a: ' + a.take(3).inspect
154
158
  end
155
-
159
+
156
160
  a2 = []
157
161
 
158
162
  # the following cleans up sentences that start with And, Or, But, So etc.
159
163
 
160
164
  (0..a.length - 1).each do |n|
161
-
165
+
162
166
  time, s = a[n]
163
167
 
164
- puts 's: ' + s.inspect if @debug
165
-
168
+ puts 's: ' + s.inspect if @debug
169
+
166
170
  if s[/^[a-z|0-9]|I\b|I'/] then
167
-
171
+
168
172
  if a2.any? then
169
-
170
- # only join two parts together if there was no full stop in
173
+
174
+ # only join two parts together if there was no full stop in
171
175
  # the previous line
172
-
176
+
173
177
  if a2[-1][-1] != /\.$/ then
174
- a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
178
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
175
179
  else
176
180
  a2 << [time, s]
177
181
  end
178
-
179
- else
182
+
183
+ else
180
184
  a2 << [time, s.capitalize]
181
185
  end
182
-
186
+
183
187
  elsif s[/^And,? /]
184
188
  a2[-1][-1] += ' ' + s.sub(/^And,? /,'').capitalize
185
189
  elsif s[/^Or,? /]
@@ -191,40 +195,40 @@ EOF
191
195
  elsif s[/^So,? /]
192
196
  a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
193
197
  elsif s[/^\[(?:Music|Applause)\]/i]
194
-
198
+
195
199
  # ignore it
196
200
  puts 'ignoring action commentary' if @debug
197
201
  a2 << [time, '.']
198
-
199
- # To promote the next sentence to a new timestamp we
202
+
203
+ # To promote the next sentence to a new timestamp we
200
204
  # capitalize the 1st letter
201
205
  a[n+1][-1] = a[n+1][-1].capitalize if a[n+1]
202
206
  else
203
-
207
+
204
208
  if a2.any? and not a2[-1][-1] =~ /\.\s*$/ then
205
- a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
209
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
206
210
  else
207
211
  a2 << [time, s]
208
212
  end
209
-
213
+
210
214
  end
211
215
 
212
216
  end
213
-
217
+
214
218
  # Remove those modified entries which were labelled [Music] etc
215
219
  a2.reject! {|time, s| s.length < 2}
216
220
 
217
221
  # formats the paragraph with the timestamp appearing above
218
222
  @a = a2
219
- a2.map {|time, s| "\n%s\n\n%s" % [time, s]}.join("\n")
220
-
223
+ a2.map {|time, s| "\n%s\n\n%s" % [time, s]}.join("\n")
224
+
221
225
  end
222
226
 
223
227
  def parse(s)
224
228
 
225
229
  doc = Rexle.new(s)
226
230
 
227
- a = doc.root.elements.each.map do |x|
231
+ a = doc.root.elements.each.map do |x|
228
232
  timestamp = Subunit.new(units={minutes:60, hours:60}, \
229
233
  seconds: x.attributes[:start].to_f).to_s(verbose: false)
230
234
  [timestamp, x.text.unescape.gsub("\n", ' ').gsub('&#39;',"'").gsub('&quot;','"')]
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: youtube_transcript2020
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,72 +11,72 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwNzI3MjI1MTUyWhcN
15
- MjEwNzI3MjI1MTUyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCxfRw+
17
- xg525jF+UNFVTtUrON2issNxWgDTq1efjPq9yMzqYrIDZREFE/3fgYbtAqA1Ut94
18
- 2h8mAKnAg1CC4plPA8o15f+h30TPRaxZXFmYUMxTkaLHL4Lvzd1D7eXqRYf9SFQM
19
- EvoYbncj9QwR57WcVF/MTdwbyyiZo3CGzwmWNb9OCIZtvs8m/UOzAmbfF3lIKz9k
20
- +ZK03KqYhyjuAiVhF39LdWUc1AWqu5i+JpFE+Lzfqv1uAjjgshmUkHOXkpWOorHc
21
- uxL0+xZXWgTwpa1QCw3cQY1LW45QjZt4ckA9lOub1LvUTDCvZocNS+dlIUMdW0mP
22
- jFII/nX/KWxW+NOmkWBpdGbXmY5QTppwx88r+VRpTdhepVcNiiHhMsYQsLI/fzVo
23
- kWTib/aBnAoahtlbaldC+e03GPsLPmpTl4ZjOFqUuAyq47h42NYt6kPY/y7Gj8To
24
- fx4pNgddR/r/WABaNao8Q+tzIxgQwCf1rijvfJP+u04GCmIeFm8oQ1x0XkUCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1nkRML1E
26
- Q0PgH/jEHBOQSUTi4MYwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMzIxMjAxOTMzWhcN
15
+ MjMwMzIxMjAxOTMzWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCesuFG
17
+ FfZDH7Xm8kEyH9B3OczqfuJW5yZOfANTnUsB864vtSWe6Vghp6JUI0bOcOQdMvIx
18
+ HO4wfaGwvKJtjWCXYdZo2QYXjf6caY007R10GVxzsBYh8Swym0SYf33ljxX+R9DS
19
+ WwdWIv9SU0T7quNEbeXa9dtZJqlFCspmni8MB284ZpqhP2bpvfhBT58dLEUnjcRP
20
+ rcnCBEueIWYkwoZ8K4/BlYrBfgWcm9hxfBimsID0CIDqD2mhOJo/NQSrJJNWTmOt
21
+ oBZg4K2Y/GCmpxS9wQCrM4pBlTjy/mfNWIxDa9xdrIEmQtSng+7X6wvWAiJmFG7Y
22
+ HYN+ARNOx6ODVGYa/GrLWTBr4EL6RJuOD6eqpxD0hjvTczS12RFIGZh9kKXVT7wy
23
+ gkF5vdtR8uyR8Eo8mJM39Nv7yzuj8cRhCAto6aWOx+srVP/woM96qSQ7Ro0/YaeM
24
+ PHHcgZfU4HGdkCJ5Y8gaO9AzioExf2uFfV/m4+pPcBRbNkymj0+qgT/UFyMCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUTmoD5rx8
26
+ rZ1imkIWMgtbUzNAn4YwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAWjyRzOnO0k/P2YHBsie5hNyJq6q7zb9bto2WYF1L
29
- N0/cvumuBsJMDUuPlD9RFvzncZbu//hbnZbK6cxiptm9HUN+m7zNi8XUcDHQw4Ba
30
- 17ZyHWKM2pkf+PJb4waQVeqyUXjbM9r6L8cVa1gkalU6ZpqEtBmkEzJCDZVf0Fll
31
- KrPYWAW5cC7EWeDm1yxusOqzxnkBcXMnKYNJm8KU4YfVpgPXJy9bTLWhm482BlJm
32
- v6wUZwYOM9B7x3dWbbsQXSuKmFqoxiNRWaA41qUS6eVjXpd4Gn/diSzntaX/Whew
33
- dCXyioQY49CVGJg8LpX/zSYUk9dns+fCSeUUfKjv2K8WuzVkS/uMA8DxSeYBfxf5
34
- ON+xcGIy3Nk7FHwY+CuIIa4WCJYB+1bVFeyCaRlCpwHK8DGUxP5PzCb44USGTI2V
35
- 42/R+mfGUgXXd9e36R3+wmfHZSFR6p6I6XKToCKca7buvgP2XgO9I04lTYUr0KLi
36
- 6ZSQYo0XuSVg3by/5kp1TrrS
28
+ BgkqhkiG9w0BAQsFAAOCAYEAAnULhDB7LFrqhVw2ms+IyRTJcJSfFxFcTPG5/mEW
29
+ r8pyTmXvBOr9WIid7QNaUcHTMVlt03v/XCrEex+GajjDspH+rL0iw3poTyvQHeNt
30
+ WgMJiYJH1AZYTSIPnkdkoo6ok4jb9S4B6mgX7tGcBXMq0q3B2o8YZIwRPzajDvyf
31
+ ZgP+vWq4HfkE7/sLTPRoz+WF6c+0w6NAvCPh/LT9qQjwXhtKquprkPfR3+G9tyNO
32
+ rWGzBuj63YgqWsNTF0wZLXDMAGHsJvJa2plhhkMGU7/SMxxdG25A7THeTVMNH7kM
33
+ 041VYN5fokzIIVKn38M4giKliDGEWvnFnEKEeb6Hrgser85Z+P7GjC642k1FHGvb
34
+ T8Jyb5XNJAWcNTk2AspDthbjYwOYAPP1KSLoCbhABW2Dqb6Y+pDOtoHoVbQtx7Ja
35
+ Eh31Azzsjb9JoMQLQliugChaXNzGUL7z5A4jmxeBd91yoD6odSGqLbGuUwjMfyd/
36
+ bYe6x24BppPTKnvGv7iKJQHe
37
37
  -----END CERTIFICATE-----
38
- date: 2020-07-29 00:00:00.000000000 Z
38
+ date: 2022-03-21 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: yawc
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
- - - ">="
45
- - !ruby/object:Gem::Version
46
- version: 0.2.0
47
44
  - - "~>"
48
45
  - !ruby/object:Gem::Version
49
- version: '0.2'
46
+ version: '0.3'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 0.3.0
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- version: 0.2.0
57
54
  - - "~>"
58
55
  - !ruby/object:Gem::Version
59
- version: '0.2'
56
+ version: '0.3'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 0.3.0
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: subunit
62
62
  requirement: !ruby/object:Gem::Requirement
63
63
  requirements:
64
64
  - - "~>"
65
65
  - !ruby/object:Gem::Version
66
- version: '0.5'
66
+ version: '0.8'
67
67
  - - ">="
68
68
  - !ruby/object:Gem::Version
69
- version: 0.5.2
69
+ version: 0.8.5
70
70
  type: :runtime
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - "~>"
75
75
  - !ruby/object:Gem::Version
76
- version: '0.5'
76
+ version: '0.8'
77
77
  - - ">="
78
78
  - !ruby/object:Gem::Version
79
- version: 0.5.2
79
+ version: 0.8.5
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: simple-config
82
82
  requirement: !ruby/object:Gem::Requirement
@@ -86,7 +86,7 @@ dependencies:
86
86
  version: '0.7'
87
87
  - - ">="
88
88
  - !ruby/object:Gem::Version
89
- version: 0.7.1
89
+ version: 0.7.2
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
@@ -96,29 +96,29 @@ dependencies:
96
96
  version: '0.7'
97
97
  - - ">="
98
98
  - !ruby/object:Gem::Version
99
- version: 0.7.1
99
+ version: 0.7.2
100
100
  - !ruby/object:Gem::Dependency
101
101
  name: youtube_id
102
102
  requirement: !ruby/object:Gem::Requirement
103
103
  requirements:
104
- - - ">="
105
- - !ruby/object:Gem::Version
106
- version: 0.1.0
107
104
  - - "~>"
108
105
  - !ruby/object:Gem::Version
109
106
  version: '0.1'
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: 0.1.0
110
110
  type: :runtime
111
111
  prerelease: false
112
112
  version_requirements: !ruby/object:Gem::Requirement
113
113
  requirements:
114
- - - ">="
115
- - !ruby/object:Gem::Version
116
- version: 0.1.0
117
114
  - - "~>"
118
115
  - !ruby/object:Gem::Version
119
116
  version: '0.1'
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: 0.1.0
120
120
  description:
121
- email: james@jamesrobertson.eu
121
+ email: digital.robertson@gmail.com
122
122
  executables: []
123
123
  extensions: []
124
124
  extra_rdoc_files: []
@@ -143,7 +143,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
143
143
  - !ruby/object:Gem::Version
144
144
  version: '0'
145
145
  requirements: []
146
- rubygems_version: 3.0.3
146
+ rubygems_version: 3.2.22
147
147
  signing_key:
148
148
  specification_version: 4
149
149
  summary: Makes it easier to digest a Youtube video by reading the transcript.
metadata.gz.sig CHANGED
Binary file