youtube_transcript2020 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eaec0007026f2086f0ed0ed41e5c4d6de5c2e64aea17cf21dfab2a201b5228c5
4
- data.tar.gz: 14953c7cf8156785e5413d17a6e02373935c368cd4f22be7ace93378517f8480
3
+ metadata.gz: ba1ffdf17cf508bdd7a8f62eba92956b0eb7ab08cb41833161f0144b9ca845de
4
+ data.tar.gz: de5a51c14348e42338974e4ef938f0df0c4aa6e3dba4882e93ec5160f95b7866
5
5
  SHA512:
6
- metadata.gz: 294f44e6db60fc35b8e9cdc0600d1d098a5d4eebe5cb09da0e9bc325b1ea489c5faf03ca9557fb7920672d82758313c162406b09b31aa617fcc3402282f8a61b
7
- data.tar.gz: 143628d6cde724dd466d779f8a5796ab02facede423e34d01cf31ed1a52841f56cc09633fc3160c8d68d2502edf2989d0dc3962901fbcb86ac3124d788ff535a
6
+ metadata.gz: d7a8c54b09548c1ac4b0d8a3419e839fca9c2c4f3388b3355be109ce96b88622c00fdcffa7415bb76e20980123915db753c8611945e9e91f273a1494f8e6f11b
7
+ data.tar.gz: a5d38c9e2f51dc8871622b0cce5a788cc80cfc788e4e841e31ff37e3ae36a5ade3a42ad7b8c3795a0747daecc4886bc78f81fd11b1792402e72bd3ed063270b0
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -18,11 +18,7 @@ class YoutubeTranscript2020
18
18
 
19
19
  @debug = debug
20
20
 
21
- @id = if id[/https?:\/\//] then
22
- YoutubeID.from(id)
23
- else
24
- id
25
- end
21
+ @id = id[/https?:\/\//] ? YoutubeID.from(id) : id
26
22
 
27
23
  s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
28
24
  @s = parse(s) unless s.empty?
@@ -85,6 +81,14 @@ class YoutubeTranscript2020
85
81
  "<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
86
82
  % [url, seconds, timestamp, s]
87
83
  end
84
+
85
+ puts '@html_embed: ' + @html_embed.inspect if @debug
86
+ doc = Rexle.new(@html_embed.to_s)
87
+ puts 'before attributes'
88
+ doc.root.attributes[:name] = 'video'
89
+ embed = doc.xml(declaration: false)
90
+ puts 'embed: ' + embed.inspect if @debug
91
+ #embed = @html_embed
88
92
 
89
93
  <<EOF
90
94
  <!DOCTYPE html>
@@ -96,7 +100,7 @@ class YoutubeTranscript2020
96
100
  <body>
97
101
  <div style="width: 1080px; background: white">
98
102
  <div style="float:left; width: 580px; background: white">
99
- #{@html_embed}
103
+ #{embed}
100
104
  <h1>#{@title}</h1>
101
105
  </div>
102
106
  <div style="float:right; width: 500px; overflow-y: scroll; height: 400px">
@@ -138,7 +142,8 @@ EOF
138
142
  @title = e.text('title')
139
143
  @author = e.text('author_name')
140
144
  @html_embed = e.text('html').unescape
141
-
145
+ puts '@html_embed: ' + @html_embed.inspect if @debug
146
+
142
147
  end
143
148
 
144
149
  def join_sentences(a)
@@ -152,13 +157,13 @@ EOF
152
157
 
153
158
  # the following cleans up sentences that start with And, Or, But, So etc.
154
159
 
155
- a.each do |time, raws|
156
-
157
- puts 'raws: ' + raws.inspect if @debug
160
+ (0..a.length - 1).each do |n|
158
161
 
159
- s = raws.sub(/^\W+/,'')
162
+ time, s = a[n]
163
+
164
+ puts 's: ' + s.inspect if @debug
160
165
 
161
- if s[/^[a-z|0-9]|I\b|I'/]then
166
+ if s[/^[a-z|0-9]|I\b|I'/] then
162
167
 
163
168
  if a2.any? then
164
169
 
@@ -185,8 +190,15 @@ EOF
185
190
  a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
186
191
  elsif s[/^So,? /]
187
192
  a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
188
- elsif s[/^\[Music|Applause\]/i]
193
+ elsif s[/^\[(?:Music|Applause)\]/i]
194
+
189
195
  # ignore it
196
+ puts 'ignoring action commentary' if @debug
197
+ a2 << [time, '.']
198
+
199
+ # To promote the next sentence to a new timestamp we
200
+ # capitalize the 1st letter
201
+ a[n+1][-1] = a[n+1][-1].capitalize if a[n+1]
190
202
  else
191
203
 
192
204
  if a2.any? and not a2[-1][-1] =~ /\.\s*$/ then
@@ -198,6 +210,9 @@ EOF
198
210
  end
199
211
 
200
212
  end
213
+
214
+ # Remove those modified entries which were labelled [Music] etc
215
+ a2.reject! {|time, s| s.length < 2}
201
216
 
202
217
  # formats the paragraph with the timestamp appearing above
203
218
  @a = a2
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: youtube_transcript2020
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
metadata.gz.sig CHANGED
Binary file