youtube_transcript2020 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/youtube_transcript2020.rb +33 -2
- data.tar.gz.sig +0 -0
- metadata +4 -4
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10d150f7396c9e0e18db51381fc5f262eb83595b4f3d5aad313192d86041071c
|
4
|
+
data.tar.gz: 607ae0291272ff40d08db5398f465d99872747e300500c214602b754c43811bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 244d0242415308c65b07d9bfb09f6776fe3f67817dadbeb5189b4bb5756a4733668fde8b139f636c361598cb6e623ffbdfbad40c3675fd78b74df17701514183
|
7
|
+
data.tar.gz: '07884cdc2b52ad2ac5cf86814f8ae88aa864109d24584e2b256b65b0b8c8168ac7d08f71ce5f2343751b4bbcfe6d81960d4b81b50d4c49109d5b0855198761b1'
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
@@ -3,10 +3,12 @@
|
|
3
3
|
# file: youtube_transcript2020.rb
|
4
4
|
|
5
5
|
require 'yawc'
|
6
|
+
require 'json'
|
6
7
|
require 'subunit'
|
7
8
|
require 'youtube_id'
|
8
9
|
require 'simple-config'
|
9
10
|
|
11
|
+
# https://github.com/jdepoix/youtube-transcript-api
|
10
12
|
|
11
13
|
class YoutubeTranscript2020
|
12
14
|
|
@@ -54,14 +56,26 @@ class YoutubeTranscript2020
|
|
54
56
|
s = RXFReader.read(obj).first
|
55
57
|
|
56
58
|
if s =~ /------+/ then
|
59
|
+
|
57
60
|
header, body = s.split(/-----+/,2)
|
58
61
|
|
59
62
|
h = SimpleConfig.new(header).to_h
|
60
63
|
@id, @author, @title = h[:id], h[:author], h[:title]
|
61
64
|
@s = body
|
65
|
+
|
66
|
+
elsif File.extname(obj) == '.json'
|
67
|
+
|
68
|
+
r = JSON.parse(s)
|
69
|
+
@a = r.map {|x| [x['start'], x['text']]}
|
70
|
+
@s = join_sentences(@a)
|
71
|
+
|
72
|
+
return
|
73
|
+
|
62
74
|
else
|
75
|
+
|
63
76
|
body = obj
|
64
77
|
raw_transcript = true
|
78
|
+
|
65
79
|
end
|
66
80
|
|
67
81
|
puts 'body: ' + body[0..400] if @debug
|
@@ -193,7 +207,14 @@ EOF
|
|
193
207
|
elsif s[/^"/]
|
194
208
|
a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
|
195
209
|
elsif s[/^So,? /]
|
196
|
-
|
210
|
+
|
211
|
+
puts 'so? a2[-1]' + a2[-1].inspect if @debug
|
212
|
+
|
213
|
+
if a2.empty? then
|
214
|
+
a2 << [time, s.sub(/^So,? /,'').capitalize]
|
215
|
+
else
|
216
|
+
a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
|
217
|
+
end
|
197
218
|
elsif s[/^\[(?:Music|Applause)\]/i]
|
198
219
|
|
199
220
|
# ignore it
|
@@ -220,7 +241,17 @@ EOF
|
|
220
241
|
|
221
242
|
# formats the paragraph with the timestamp appearing above
|
222
243
|
@a = a2
|
223
|
-
|
244
|
+
|
245
|
+
a2.map do |rawtime, s|
|
246
|
+
|
247
|
+
time = if rawtime.is_a? Float then
|
248
|
+
Subunit.seconds(rawtime).strfunit("%sc")
|
249
|
+
else
|
250
|
+
time
|
251
|
+
end
|
252
|
+
|
253
|
+
"\n%s\n\n%s" % [time, s]
|
254
|
+
end.join("\n")
|
224
255
|
|
225
256
|
end
|
226
257
|
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: youtube_transcript2020
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Eh31Azzsjb9JoMQLQliugChaXNzGUL7z5A4jmxeBd91yoD6odSGqLbGuUwjMfyd/
|
36
36
|
bYe6x24BppPTKnvGv7iKJQHe
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-03-
|
38
|
+
date: 2022-03-22 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: yawc
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: '0.8'
|
67
67
|
- - ">="
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: 0.8.
|
69
|
+
version: 0.8.7
|
70
70
|
type: :runtime
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '0.8'
|
77
77
|
- - ">="
|
78
78
|
- !ruby/object:Gem::Version
|
79
|
-
version: 0.8.
|
79
|
+
version: 0.8.7
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: simple-config
|
82
82
|
requirement: !ruby/object:Gem::Requirement
|
metadata.gz.sig
CHANGED
Binary file
|