youtube_transcript2020 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/youtube_transcript2020.rb +28 -13
- metadata +1 -1
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba1ffdf17cf508bdd7a8f62eba92956b0eb7ab08cb41833161f0144b9ca845de
|
4
|
+
data.tar.gz: de5a51c14348e42338974e4ef938f0df0c4aa6e3dba4882e93ec5160f95b7866
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7a8c54b09548c1ac4b0d8a3419e839fca9c2c4f3388b3355be109ce96b88622c00fdcffa7415bb76e20980123915db753c8611945e9e91f273a1494f8e6f11b
|
7
|
+
data.tar.gz: a5d38c9e2f51dc8871622b0cce5a788cc80cfc788e4e841e31ff37e3ae36a5ade3a42ad7b8c3795a0747daecc4886bc78f81fd11b1792402e72bd3ed063270b0
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data.tar.gz.sig
CHANGED
Binary file
|
@@ -18,11 +18,7 @@ class YoutubeTranscript2020
|
|
18
18
|
|
19
19
|
@debug = debug
|
20
20
|
|
21
|
-
@id =
|
22
|
-
YoutubeID.from(id)
|
23
|
-
else
|
24
|
-
id
|
25
|
-
end
|
21
|
+
@id = id[/https?:\/\//] ? YoutubeID.from(id) : id
|
26
22
|
|
27
23
|
s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
|
28
24
|
@s = parse(s) unless s.empty?
|
@@ -85,6 +81,14 @@ class YoutubeTranscript2020
|
|
85
81
|
"<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
|
86
82
|
% [url, seconds, timestamp, s]
|
87
83
|
end
|
84
|
+
|
85
|
+
puts '@html_embed: ' + @html_embed.inspect if @debug
|
86
|
+
doc = Rexle.new(@html_embed.to_s)
|
87
|
+
puts 'before attributes'
|
88
|
+
doc.root.attributes[:name] = 'video'
|
89
|
+
embed = doc.xml(declaration: false)
|
90
|
+
puts 'embed: ' + embed.inspect if @debug
|
91
|
+
#embed = @html_embed
|
88
92
|
|
89
93
|
<<EOF
|
90
94
|
<!DOCTYPE html>
|
@@ -96,7 +100,7 @@ class YoutubeTranscript2020
|
|
96
100
|
<body>
|
97
101
|
<div style="width: 1080px; background: white">
|
98
102
|
<div style="float:left; width: 580px; background: white">
|
99
|
-
#{
|
103
|
+
#{embed}
|
100
104
|
<h1>#{@title}</h1>
|
101
105
|
</div>
|
102
106
|
<div style="float:right; width: 500px; overflow-y: scroll; height: 400px">
|
@@ -138,7 +142,8 @@ EOF
|
|
138
142
|
@title = e.text('title')
|
139
143
|
@author = e.text('author_name')
|
140
144
|
@html_embed = e.text('html').unescape
|
141
|
-
|
145
|
+
puts '@html_embed: ' + @html_embed.inspect if @debug
|
146
|
+
|
142
147
|
end
|
143
148
|
|
144
149
|
def join_sentences(a)
|
@@ -152,13 +157,13 @@ EOF
|
|
152
157
|
|
153
158
|
# the following cleans up sentences that start with And, Or, But, So etc.
|
154
159
|
|
155
|
-
a.each do |
|
156
|
-
|
157
|
-
puts 'raws: ' + raws.inspect if @debug
|
160
|
+
(0..a.length - 1).each do |n|
|
158
161
|
|
159
|
-
s =
|
162
|
+
time, s = a[n]
|
163
|
+
|
164
|
+
puts 's: ' + s.inspect if @debug
|
160
165
|
|
161
|
-
if s[/^[a-z|0-9]|I\b|I'/]then
|
166
|
+
if s[/^[a-z|0-9]|I\b|I'/] then
|
162
167
|
|
163
168
|
if a2.any? then
|
164
169
|
|
@@ -185,8 +190,15 @@ EOF
|
|
185
190
|
a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
|
186
191
|
elsif s[/^So,? /]
|
187
192
|
a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
|
188
|
-
elsif s[/^\[Music|Applause\]/i]
|
193
|
+
elsif s[/^\[(?:Music|Applause)\]/i]
|
194
|
+
|
189
195
|
# ignore it
|
196
|
+
puts 'ignoring action commentary' if @debug
|
197
|
+
a2 << [time, '.']
|
198
|
+
|
199
|
+
# To promote the next sentence to a new timestamp we
|
200
|
+
# capitalize the 1st letter
|
201
|
+
a[n+1][-1] = a[n+1][-1].capitalize if a[n+1]
|
190
202
|
else
|
191
203
|
|
192
204
|
if a2.any? and not a2[-1][-1] =~ /\.\s*$/ then
|
@@ -198,6 +210,9 @@ EOF
|
|
198
210
|
end
|
199
211
|
|
200
212
|
end
|
213
|
+
|
214
|
+
# Remove those modified entries which were labelled [Music] etc
|
215
|
+
a2.reject! {|time, s| s.length < 2}
|
201
216
|
|
202
217
|
# formats the paragraph with the timestamp appearing above
|
203
218
|
@a = a2
|
metadata
CHANGED
metadata.gz.sig
CHANGED
Binary file
|