youtube_transcript2020 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba1ffdf17cf508bdd7a8f62eba92956b0eb7ab08cb41833161f0144b9ca845de
4
- data.tar.gz: de5a51c14348e42338974e4ef938f0df0c4aa6e3dba4882e93ec5160f95b7866
3
+ metadata.gz: 9462fe85e21ee061f717e564efb7b66bec2e62b1c454228d6fb8f31633f7363d
4
+ data.tar.gz: bb04a03be0cb61058682ce4d9c1159666e41feb2a794fe65d07fbae418412056
5
5
  SHA512:
6
- metadata.gz: d7a8c54b09548c1ac4b0d8a3419e839fca9c2c4f3388b3355be109ce96b88622c00fdcffa7415bb76e20980123915db753c8611945e9e91f273a1494f8e6f11b
7
- data.tar.gz: a5d38c9e2f51dc8871622b0cce5a788cc80cfc788e4e841e31ff37e3ae36a5ade3a42ad7b8c3795a0747daecc4886bc78f81fd11b1792402e72bd3ed063270b0
6
+ metadata.gz: d8d4f57bb617e07f647727eaf56c81f1a7efc5eaccc71c323b54e887a270691f01f1d58cc64fe505a40ad6bd5ebd2cea3ea5fc87b7950abe773697acb5e7cc4b
7
+ data.tar.gz: 9a9a4e72f99f70aca736171490bba54d563aa722576ce6d91435177ed9f0fb327080d84ead963474ab8e46cd1c2dfc4709d2ab93c20853c2a376b091399ed6ad
checksums.yaml.gz.sig CHANGED
Binary file
@@ -12,21 +12,25 @@ class YoutubeTranscript2020
12
12
 
13
13
  attr_reader :to_a, :author, :id, :title
14
14
 
15
- def initialize(id=nil, debug: false)
15
+ def initialize(id=nil, debug: false)
16
16
 
17
17
  return unless id
18
-
18
+
19
19
  @debug = debug
20
20
 
21
21
  @id = id[/https?:\/\//] ? YoutubeID.from(id) : id
22
22
 
23
- s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
24
- @s = parse(s) unless s.empty?
23
+ # Fetching the transcript from the following statement no longer works.
24
+ # Instead, copy and paste the transcript from the YouTube video page into
25
+ # a text file and import it.
26
+ #
27
+ #s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
28
+ #@s = parse(s) unless s.empty?
25
29
 
26
30
  fetch_info(@id)
27
31
 
28
32
  end
29
-
33
+
30
34
  def to_a()
31
35
  @a
32
36
  end
@@ -38,7 +42,7 @@ class YoutubeTranscript2020
38
42
  h = {id: @id, title: @title, author: @author}
39
43
  SimpleConfig.new(h).to_s + "\n#{'-'*78}\n\n" + @s
40
44
  end
41
-
45
+
42
46
  def to_text()
43
47
  @a.map(&:last).join("\n")
44
48
  end
@@ -47,7 +51,7 @@ class YoutubeTranscript2020
47
51
  #
48
52
  def import(obj)
49
53
 
50
- s = RXFHelper.read(obj).first
54
+ s = RXFReader.read(obj).first
51
55
 
52
56
  if s =~ /------+/ then
53
57
  header, body = s.split(/-----+/,2)
@@ -61,7 +65,7 @@ class YoutubeTranscript2020
61
65
  end
62
66
 
63
67
  puts 'body: ' + body[0..400] if @debug
64
- a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }
68
+ a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }
65
69
  @a = a[0].zip(a[1])
66
70
 
67
71
  @s = join_sentences(@a) if raw_transcript
@@ -75,13 +79,13 @@ class YoutubeTranscript2020
75
79
  url = 'https://www.youtube.com/embed/' + @id
76
80
 
77
81
  links = @a.map do |timestamp, s|
78
-
79
- seconds = Subunit.new(units={minutes:60, hours:60},
82
+
83
+ seconds = Subunit.new(units={minutes:60, hours:60},
80
84
  timestamp.split(':').map(&:to_i)).to_i
81
85
  "<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
82
86
  % [url, seconds, timestamp, s]
83
87
  end
84
-
88
+
85
89
  puts '@html_embed: ' + @html_embed.inspect if @debug
86
90
  doc = Rexle.new(@html_embed.to_s)
87
91
  puts 'before attributes'
@@ -116,9 +120,9 @@ EOF
116
120
  # Outputs plain text containing the headings including timestamps
117
121
  # note: This can be helpful for copyng and pasting directly into a YouTube comment
118
122
  #
119
- def to_headings()
120
-
121
- @to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)
123
+ def to_headings()
124
+
125
+ @to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)
122
126
 
123
127
  end
124
128
 
@@ -133,53 +137,53 @@ EOF
133
137
  private
134
138
 
135
139
  def fetch_info(id)
136
-
137
- url = "http://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=#{id}&format=xml"
140
+
141
+ url = "https://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=#{id}&format=xml"
138
142
  s = Net::HTTP.get(URI(url))
139
-
143
+
140
144
  e = Rexle.new(s).root
141
-
145
+
142
146
  @title = e.text('title')
143
147
  @author = e.text('author_name')
144
148
  @html_embed = e.text('html').unescape
145
149
  puts '@html_embed: ' + @html_embed.inspect if @debug
146
-
150
+
147
151
  end
148
-
152
+
149
153
  def join_sentences(a)
150
-
154
+
151
155
  if @debug then
152
156
  puts 'inside join_sentence'
153
157
  puts 'a: ' + a.take(3).inspect
154
158
  end
155
-
159
+
156
160
  a2 = []
157
161
 
158
162
  # the following cleans up sentences that start with And, Or, But, So etc.
159
163
 
160
164
  (0..a.length - 1).each do |n|
161
-
165
+
162
166
  time, s = a[n]
163
167
 
164
- puts 's: ' + s.inspect if @debug
165
-
168
+ puts 's: ' + s.inspect if @debug
169
+
166
170
  if s[/^[a-z|0-9]|I\b|I'/] then
167
-
171
+
168
172
  if a2.any? then
169
-
170
- # only join two parts together if there was no full stop in
173
+
174
+ # only join two parts together if there was no full stop in
171
175
  # the previous line
172
-
176
+
173
177
  if a2[-1][-1] != /\.$/ then
174
- a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
178
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
175
179
  else
176
180
  a2 << [time, s]
177
181
  end
178
-
179
- else
182
+
183
+ else
180
184
  a2 << [time, s.capitalize]
181
185
  end
182
-
186
+
183
187
  elsif s[/^And,? /]
184
188
  a2[-1][-1] += ' ' + s.sub(/^And,? /,'').capitalize
185
189
  elsif s[/^Or,? /]
@@ -191,40 +195,40 @@ EOF
191
195
  elsif s[/^So,? /]
192
196
  a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
193
197
  elsif s[/^\[(?:Music|Applause)\]/i]
194
-
198
+
195
199
  # ignore it
196
200
  puts 'ignoring action commentary' if @debug
197
201
  a2 << [time, '.']
198
-
199
- # To promote the next sentence to a new timestamp we
202
+
203
+ # To promote the next sentence to a new timestamp we
200
204
  # capitalize the 1st letter
201
205
  a[n+1][-1] = a[n+1][-1].capitalize if a[n+1]
202
206
  else
203
-
207
+
204
208
  if a2.any? and not a2[-1][-1] =~ /\.\s*$/ then
205
- a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
209
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
206
210
  else
207
211
  a2 << [time, s]
208
212
  end
209
-
213
+
210
214
  end
211
215
 
212
216
  end
213
-
217
+
214
218
  # Remove those modified entries which were labelled [Music] etc
215
219
  a2.reject! {|time, s| s.length < 2}
216
220
 
217
221
  # formats the paragraph with the timestamp appearing above
218
222
  @a = a2
219
- a2.map {|time, s| "\n%s\n\n%s" % [time, s]}.join("\n")
220
-
223
+ a2.map {|time, s| "\n%s\n\n%s" % [time, s]}.join("\n")
224
+
221
225
  end
222
226
 
223
227
  def parse(s)
224
228
 
225
229
  doc = Rexle.new(s)
226
230
 
227
- a = doc.root.elements.each.map do |x|
231
+ a = doc.root.elements.each.map do |x|
228
232
  timestamp = Subunit.new(units={minutes:60, hours:60}, \
229
233
  seconds: x.attributes[:start].to_f).to_s(verbose: false)
230
234
  [timestamp, x.text.unescape.gsub("\n", ' ').gsub('&#39;',"'").gsub('&quot;','"')]
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: youtube_transcript2020
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,72 +11,72 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwNzI3MjI1MTUyWhcN
15
- MjEwNzI3MjI1MTUyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCxfRw+
17
- xg525jF+UNFVTtUrON2issNxWgDTq1efjPq9yMzqYrIDZREFE/3fgYbtAqA1Ut94
18
- 2h8mAKnAg1CC4plPA8o15f+h30TPRaxZXFmYUMxTkaLHL4Lvzd1D7eXqRYf9SFQM
19
- EvoYbncj9QwR57WcVF/MTdwbyyiZo3CGzwmWNb9OCIZtvs8m/UOzAmbfF3lIKz9k
20
- +ZK03KqYhyjuAiVhF39LdWUc1AWqu5i+JpFE+Lzfqv1uAjjgshmUkHOXkpWOorHc
21
- uxL0+xZXWgTwpa1QCw3cQY1LW45QjZt4ckA9lOub1LvUTDCvZocNS+dlIUMdW0mP
22
- jFII/nX/KWxW+NOmkWBpdGbXmY5QTppwx88r+VRpTdhepVcNiiHhMsYQsLI/fzVo
23
- kWTib/aBnAoahtlbaldC+e03GPsLPmpTl4ZjOFqUuAyq47h42NYt6kPY/y7Gj8To
24
- fx4pNgddR/r/WABaNao8Q+tzIxgQwCf1rijvfJP+u04GCmIeFm8oQ1x0XkUCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1nkRML1E
26
- Q0PgH/jEHBOQSUTi4MYwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMzIxMjAxOTMzWhcN
15
+ MjMwMzIxMjAxOTMzWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCesuFG
17
+ FfZDH7Xm8kEyH9B3OczqfuJW5yZOfANTnUsB864vtSWe6Vghp6JUI0bOcOQdMvIx
18
+ HO4wfaGwvKJtjWCXYdZo2QYXjf6caY007R10GVxzsBYh8Swym0SYf33ljxX+R9DS
19
+ WwdWIv9SU0T7quNEbeXa9dtZJqlFCspmni8MB284ZpqhP2bpvfhBT58dLEUnjcRP
20
+ rcnCBEueIWYkwoZ8K4/BlYrBfgWcm9hxfBimsID0CIDqD2mhOJo/NQSrJJNWTmOt
21
+ oBZg4K2Y/GCmpxS9wQCrM4pBlTjy/mfNWIxDa9xdrIEmQtSng+7X6wvWAiJmFG7Y
22
+ HYN+ARNOx6ODVGYa/GrLWTBr4EL6RJuOD6eqpxD0hjvTczS12RFIGZh9kKXVT7wy
23
+ gkF5vdtR8uyR8Eo8mJM39Nv7yzuj8cRhCAto6aWOx+srVP/woM96qSQ7Ro0/YaeM
24
+ PHHcgZfU4HGdkCJ5Y8gaO9AzioExf2uFfV/m4+pPcBRbNkymj0+qgT/UFyMCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUTmoD5rx8
26
+ rZ1imkIWMgtbUzNAn4YwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAWjyRzOnO0k/P2YHBsie5hNyJq6q7zb9bto2WYF1L
29
- N0/cvumuBsJMDUuPlD9RFvzncZbu//hbnZbK6cxiptm9HUN+m7zNi8XUcDHQw4Ba
30
- 17ZyHWKM2pkf+PJb4waQVeqyUXjbM9r6L8cVa1gkalU6ZpqEtBmkEzJCDZVf0Fll
31
- KrPYWAW5cC7EWeDm1yxusOqzxnkBcXMnKYNJm8KU4YfVpgPXJy9bTLWhm482BlJm
32
- v6wUZwYOM9B7x3dWbbsQXSuKmFqoxiNRWaA41qUS6eVjXpd4Gn/diSzntaX/Whew
33
- dCXyioQY49CVGJg8LpX/zSYUk9dns+fCSeUUfKjv2K8WuzVkS/uMA8DxSeYBfxf5
34
- ON+xcGIy3Nk7FHwY+CuIIa4WCJYB+1bVFeyCaRlCpwHK8DGUxP5PzCb44USGTI2V
35
- 42/R+mfGUgXXd9e36R3+wmfHZSFR6p6I6XKToCKca7buvgP2XgO9I04lTYUr0KLi
36
- 6ZSQYo0XuSVg3by/5kp1TrrS
28
+ BgkqhkiG9w0BAQsFAAOCAYEAAnULhDB7LFrqhVw2ms+IyRTJcJSfFxFcTPG5/mEW
29
+ r8pyTmXvBOr9WIid7QNaUcHTMVlt03v/XCrEex+GajjDspH+rL0iw3poTyvQHeNt
30
+ WgMJiYJH1AZYTSIPnkdkoo6ok4jb9S4B6mgX7tGcBXMq0q3B2o8YZIwRPzajDvyf
31
+ ZgP+vWq4HfkE7/sLTPRoz+WF6c+0w6NAvCPh/LT9qQjwXhtKquprkPfR3+G9tyNO
32
+ rWGzBuj63YgqWsNTF0wZLXDMAGHsJvJa2plhhkMGU7/SMxxdG25A7THeTVMNH7kM
33
+ 041VYN5fokzIIVKn38M4giKliDGEWvnFnEKEeb6Hrgser85Z+P7GjC642k1FHGvb
34
+ T8Jyb5XNJAWcNTk2AspDthbjYwOYAPP1KSLoCbhABW2Dqb6Y+pDOtoHoVbQtx7Ja
35
+ Eh31Azzsjb9JoMQLQliugChaXNzGUL7z5A4jmxeBd91yoD6odSGqLbGuUwjMfyd/
36
+ bYe6x24BppPTKnvGv7iKJQHe
37
37
  -----END CERTIFICATE-----
38
- date: 2020-07-29 00:00:00.000000000 Z
38
+ date: 2022-03-21 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: yawc
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
- - - ">="
45
- - !ruby/object:Gem::Version
46
- version: 0.2.0
47
44
  - - "~>"
48
45
  - !ruby/object:Gem::Version
49
- version: '0.2'
46
+ version: '0.3'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 0.3.0
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- version: 0.2.0
57
54
  - - "~>"
58
55
  - !ruby/object:Gem::Version
59
- version: '0.2'
56
+ version: '0.3'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 0.3.0
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: subunit
62
62
  requirement: !ruby/object:Gem::Requirement
63
63
  requirements:
64
64
  - - "~>"
65
65
  - !ruby/object:Gem::Version
66
- version: '0.5'
66
+ version: '0.8'
67
67
  - - ">="
68
68
  - !ruby/object:Gem::Version
69
- version: 0.5.2
69
+ version: 0.8.5
70
70
  type: :runtime
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - "~>"
75
75
  - !ruby/object:Gem::Version
76
- version: '0.5'
76
+ version: '0.8'
77
77
  - - ">="
78
78
  - !ruby/object:Gem::Version
79
- version: 0.5.2
79
+ version: 0.8.5
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: simple-config
82
82
  requirement: !ruby/object:Gem::Requirement
@@ -86,7 +86,7 @@ dependencies:
86
86
  version: '0.7'
87
87
  - - ">="
88
88
  - !ruby/object:Gem::Version
89
- version: 0.7.1
89
+ version: 0.7.2
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
@@ -96,29 +96,29 @@ dependencies:
96
96
  version: '0.7'
97
97
  - - ">="
98
98
  - !ruby/object:Gem::Version
99
- version: 0.7.1
99
+ version: 0.7.2
100
100
  - !ruby/object:Gem::Dependency
101
101
  name: youtube_id
102
102
  requirement: !ruby/object:Gem::Requirement
103
103
  requirements:
104
- - - ">="
105
- - !ruby/object:Gem::Version
106
- version: 0.1.0
107
104
  - - "~>"
108
105
  - !ruby/object:Gem::Version
109
106
  version: '0.1'
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: 0.1.0
110
110
  type: :runtime
111
111
  prerelease: false
112
112
  version_requirements: !ruby/object:Gem::Requirement
113
113
  requirements:
114
- - - ">="
115
- - !ruby/object:Gem::Version
116
- version: 0.1.0
117
114
  - - "~>"
118
115
  - !ruby/object:Gem::Version
119
116
  version: '0.1'
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: 0.1.0
120
120
  description:
121
- email: james@jamesrobertson.eu
121
+ email: digital.robertson@gmail.com
122
122
  executables: []
123
123
  extensions: []
124
124
  extra_rdoc_files: []
@@ -143,7 +143,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
143
143
  - !ruby/object:Gem::Version
144
144
  version: '0'
145
145
  requirements: []
146
- rubygems_version: 3.0.3
146
+ rubygems_version: 3.2.22
147
147
  signing_key:
148
148
  specification_version: 4
149
149
  summary: Makes it easier to digest a Youtube video by reading the transcript.
metadata.gz.sig CHANGED
Binary file