youtube_transcript2020 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/youtube_transcript2020.rb +33 -2
- data.tar.gz.sig +0 -0
- metadata +4 -4
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10d150f7396c9e0e18db51381fc5f262eb83595b4f3d5aad313192d86041071c
|
4
|
+
data.tar.gz: 607ae0291272ff40d08db5398f465d99872747e300500c214602b754c43811bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 244d0242415308c65b07d9bfb09f6776fe3f67817dadbeb5189b4bb5756a4733668fde8b139f636c361598cb6e623ffbdfbad40c3675fd78b74df17701514183
|
7
|
+
data.tar.gz: '07884cdc2b52ad2ac5cf86814f8ae88aa864109d24584e2b256b65b0b8c8168ac7d08f71ce5f2343751b4bbcfe6d81960d4b81b50d4c49109d5b0855198761b1'
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
@@ -3,10 +3,12 @@
|
|
3
3
|
# file: youtube_transcript2020.rb
|
4
4
|
|
5
5
|
require 'yawc'
|
6
|
+
require 'json'
|
6
7
|
require 'subunit'
|
7
8
|
require 'youtube_id'
|
8
9
|
require 'simple-config'
|
9
10
|
|
11
|
+
# https://github.com/jdepoix/youtube-transcript-api
|
10
12
|
|
11
13
|
class YoutubeTranscript2020
|
12
14
|
|
@@ -54,14 +56,26 @@ class YoutubeTranscript2020
|
|
54
56
|
s = RXFReader.read(obj).first
|
55
57
|
|
56
58
|
if s =~ /------+/ then
|
59
|
+
|
57
60
|
header, body = s.split(/-----+/,2)
|
58
61
|
|
59
62
|
h = SimpleConfig.new(header).to_h
|
60
63
|
@id, @author, @title = h[:id], h[:author], h[:title]
|
61
64
|
@s = body
|
65
|
+
|
66
|
+
elsif File.extname(obj) == '.json'
|
67
|
+
|
68
|
+
r = JSON.parse(s)
|
69
|
+
@a = r.map {|x| [x['start'], x['text']]}
|
70
|
+
@s = join_sentences(@a)
|
71
|
+
|
72
|
+
return
|
73
|
+
|
62
74
|
else
|
75
|
+
|
63
76
|
body = obj
|
64
77
|
raw_transcript = true
|
78
|
+
|
65
79
|
end
|
66
80
|
|
67
81
|
puts 'body: ' + body[0..400] if @debug
|
@@ -193,7 +207,14 @@ EOF
|
|
193
207
|
elsif s[/^"/]
|
194
208
|
a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
|
195
209
|
elsif s[/^So,? /]
|
196
|
-
|
210
|
+
|
211
|
+
puts 'so? a2[-1]' + a2[-1].inspect if @debug
|
212
|
+
|
213
|
+
if a2.empty? then
|
214
|
+
a2 << [time, s.sub(/^So,? /,'').capitalize]
|
215
|
+
else
|
216
|
+
a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
|
217
|
+
end
|
197
218
|
elsif s[/^\[(?:Music|Applause)\]/i]
|
198
219
|
|
199
220
|
# ignore it
|
@@ -220,7 +241,17 @@ EOF
|
|
220
241
|
|
221
242
|
# formats the paragraph with the timestamp appearing above
|
222
243
|
@a = a2
|
223
|
-
|
244
|
+
|
245
|
+
a2.map do |rawtime, s|
|
246
|
+
|
247
|
+
time = if rawtime.is_a? Float then
|
248
|
+
Subunit.seconds(rawtime).strfunit("%sc")
|
249
|
+
else
|
250
|
+
time
|
251
|
+
end
|
252
|
+
|
253
|
+
"\n%s\n\n%s" % [time, s]
|
254
|
+
end.join("\n")
|
224
255
|
|
225
256
|
end
|
226
257
|
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: youtube_transcript2020
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Eh31Azzsjb9JoMQLQliugChaXNzGUL7z5A4jmxeBd91yoD6odSGqLbGuUwjMfyd/
|
36
36
|
bYe6x24BppPTKnvGv7iKJQHe
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-03-
|
38
|
+
date: 2022-03-22 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: yawc
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: '0.8'
|
67
67
|
- - ">="
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: 0.8.
|
69
|
+
version: 0.8.7
|
70
70
|
type: :runtime
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '0.8'
|
77
77
|
- - ">="
|
78
78
|
- !ruby/object:Gem::Version
|
79
|
-
version: 0.8.
|
79
|
+
version: 0.8.7
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: simple-config
|
82
82
|
requirement: !ruby/object:Gem::Requirement
|
metadata.gz.sig
CHANGED
Binary file
|