youtube_transcript2020 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/youtube_transcript2020.rb +28 -13
- metadata +1 -1
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba1ffdf17cf508bdd7a8f62eba92956b0eb7ab08cb41833161f0144b9ca845de
|
4
|
+
data.tar.gz: de5a51c14348e42338974e4ef938f0df0c4aa6e3dba4882e93ec5160f95b7866
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7a8c54b09548c1ac4b0d8a3419e839fca9c2c4f3388b3355be109ce96b88622c00fdcffa7415bb76e20980123915db753c8611945e9e91f273a1494f8e6f11b
|
7
|
+
data.tar.gz: a5d38c9e2f51dc8871622b0cce5a788cc80cfc788e4e841e31ff37e3ae36a5ade3a42ad7b8c3795a0747daecc4886bc78f81fd11b1792402e72bd3ed063270b0
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data.tar.gz.sig
CHANGED
Binary file
|
@@ -18,11 +18,7 @@ class YoutubeTranscript2020
|
|
18
18
|
|
19
19
|
@debug = debug
|
20
20
|
|
21
|
-
@id =
|
22
|
-
YoutubeID.from(id)
|
23
|
-
else
|
24
|
-
id
|
25
|
-
end
|
21
|
+
@id = id[/https?:\/\//] ? YoutubeID.from(id) : id
|
26
22
|
|
27
23
|
s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
|
28
24
|
@s = parse(s) unless s.empty?
|
@@ -85,6 +81,14 @@ class YoutubeTranscript2020
|
|
85
81
|
"<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
|
86
82
|
% [url, seconds, timestamp, s]
|
87
83
|
end
|
84
|
+
|
85
|
+
puts '@html_embed: ' + @html_embed.inspect if @debug
|
86
|
+
doc = Rexle.new(@html_embed.to_s)
|
87
|
+
puts 'before attributes'
|
88
|
+
doc.root.attributes[:name] = 'video'
|
89
|
+
embed = doc.xml(declaration: false)
|
90
|
+
puts 'embed: ' + embed.inspect if @debug
|
91
|
+
#embed = @html_embed
|
88
92
|
|
89
93
|
<<EOF
|
90
94
|
<!DOCTYPE html>
|
@@ -96,7 +100,7 @@ class YoutubeTranscript2020
|
|
96
100
|
<body>
|
97
101
|
<div style="width: 1080px; background: white">
|
98
102
|
<div style="float:left; width: 580px; background: white">
|
99
|
-
#{
|
103
|
+
#{embed}
|
100
104
|
<h1>#{@title}</h1>
|
101
105
|
</div>
|
102
106
|
<div style="float:right; width: 500px; overflow-y: scroll; height: 400px">
|
@@ -138,7 +142,8 @@ EOF
|
|
138
142
|
@title = e.text('title')
|
139
143
|
@author = e.text('author_name')
|
140
144
|
@html_embed = e.text('html').unescape
|
141
|
-
|
145
|
+
puts '@html_embed: ' + @html_embed.inspect if @debug
|
146
|
+
|
142
147
|
end
|
143
148
|
|
144
149
|
def join_sentences(a)
|
@@ -152,13 +157,13 @@ EOF
|
|
152
157
|
|
153
158
|
# the following cleans up sentences that start with And, Or, But, So etc.
|
154
159
|
|
155
|
-
a.each do |
|
156
|
-
|
157
|
-
puts 'raws: ' + raws.inspect if @debug
|
160
|
+
(0..a.length - 1).each do |n|
|
158
161
|
|
159
|
-
s =
|
162
|
+
time, s = a[n]
|
163
|
+
|
164
|
+
puts 's: ' + s.inspect if @debug
|
160
165
|
|
161
|
-
if s[/^[a-z|0-9]|I\b|I'/]then
|
166
|
+
if s[/^[a-z|0-9]|I\b|I'/] then
|
162
167
|
|
163
168
|
if a2.any? then
|
164
169
|
|
@@ -185,8 +190,15 @@ EOF
|
|
185
190
|
a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
|
186
191
|
elsif s[/^So,? /]
|
187
192
|
a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
|
188
|
-
elsif s[/^\[Music|Applause\]/i]
|
193
|
+
elsif s[/^\[(?:Music|Applause)\]/i]
|
194
|
+
|
189
195
|
# ignore it
|
196
|
+
puts 'ignoring action commentary' if @debug
|
197
|
+
a2 << [time, '.']
|
198
|
+
|
199
|
+
# To promote the next sentence to a new timestamp we
|
200
|
+
# capitalize the 1st letter
|
201
|
+
a[n+1][-1] = a[n+1][-1].capitalize if a[n+1]
|
190
202
|
else
|
191
203
|
|
192
204
|
if a2.any? and not a2[-1][-1] =~ /\.\s*$/ then
|
@@ -198,6 +210,9 @@ EOF
|
|
198
210
|
end
|
199
211
|
|
200
212
|
end
|
213
|
+
|
214
|
+
# Remove those modified entries which were labelled [Music] etc
|
215
|
+
a2.reject! {|time, s| s.length < 2}
|
201
216
|
|
202
217
|
# formats the paragraph with the timestamp appearing above
|
203
218
|
@a = a2
|
metadata
CHANGED
metadata.gz.sig
CHANGED
Binary file
|