youtube_transcript2020 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9462fe85e21ee061f717e564efb7b66bec2e62b1c454228d6fb8f31633f7363d
4
- data.tar.gz: bb04a03be0cb61058682ce4d9c1159666e41feb2a794fe65d07fbae418412056
3
+ metadata.gz: 10d150f7396c9e0e18db51381fc5f262eb83595b4f3d5aad313192d86041071c
4
+ data.tar.gz: 607ae0291272ff40d08db5398f465d99872747e300500c214602b754c43811bb
5
5
  SHA512:
6
- metadata.gz: d8d4f57bb617e07f647727eaf56c81f1a7efc5eaccc71c323b54e887a270691f01f1d58cc64fe505a40ad6bd5ebd2cea3ea5fc87b7950abe773697acb5e7cc4b
7
- data.tar.gz: 9a9a4e72f99f70aca736171490bba54d563aa722576ce6d91435177ed9f0fb327080d84ead963474ab8e46cd1c2dfc4709d2ab93c20853c2a376b091399ed6ad
6
+ metadata.gz: 244d0242415308c65b07d9bfb09f6776fe3f67817dadbeb5189b4bb5756a4733668fde8b139f636c361598cb6e623ffbdfbad40c3675fd78b74df17701514183
7
+ data.tar.gz: '07884cdc2b52ad2ac5cf86814f8ae88aa864109d24584e2b256b65b0b8c8168ac7d08f71ce5f2343751b4bbcfe6d81960d4b81b50d4c49109d5b0855198761b1'
checksums.yaml.gz.sig CHANGED
Binary file
@@ -3,10 +3,12 @@
3
3
  # file: youtube_transcript2020.rb
4
4
 
5
5
  require 'yawc'
6
+ require 'json'
6
7
  require 'subunit'
7
8
  require 'youtube_id'
8
9
  require 'simple-config'
9
10
 
11
+ # https://github.com/jdepoix/youtube-transcript-api
10
12
 
11
13
  class YoutubeTranscript2020
12
14
 
@@ -54,14 +56,26 @@ class YoutubeTranscript2020
54
56
  s = RXFReader.read(obj).first
55
57
 
56
58
  if s =~ /------+/ then
59
+
57
60
  header, body = s.split(/-----+/,2)
58
61
 
59
62
  h = SimpleConfig.new(header).to_h
60
63
  @id, @author, @title = h[:id], h[:author], h[:title]
61
64
  @s = body
65
+
66
+ elsif File.extname(obj) == '.json'
67
+
68
+ r = JSON.parse(s)
69
+ @a = r.map {|x| [x['start'], x['text']]}
70
+ @s = join_sentences(@a)
71
+
72
+ return
73
+
62
74
  else
75
+
63
76
  body = obj
64
77
  raw_transcript = true
78
+
65
79
  end
66
80
 
67
81
  puts 'body: ' + body[0..400] if @debug
@@ -193,7 +207,14 @@ EOF
193
207
  elsif s[/^"/]
194
208
  a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
195
209
  elsif s[/^So,? /]
196
- a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
210
+
211
+ puts 'so? a2[-1]' + a2[-1].inspect if @debug
212
+
213
+ if a2.empty? then
214
+ a2 << [time, s.sub(/^So,? /,'').capitalize]
215
+ else
216
+ a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
217
+ end
197
218
  elsif s[/^\[(?:Music|Applause)\]/i]
198
219
 
199
220
  # ignore it
@@ -220,7 +241,17 @@ EOF
220
241
 
221
242
  # formats the paragraph with the timestamp appearing above
222
243
  @a = a2
223
- a2.map {|time, s| "\n%s\n\n%s" % [time, s]}.join("\n")
244
+
245
+ a2.map do |rawtime, s|
246
+
247
+ time = if rawtime.is_a? Float then
248
+ Subunit.seconds(rawtime).strfunit("%sc")
249
+ else
250
+ time
251
+ end
252
+
253
+ "\n%s\n\n%s" % [time, s]
254
+ end.join("\n")
224
255
 
225
256
  end
226
257
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: youtube_transcript2020
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Eh31Azzsjb9JoMQLQliugChaXNzGUL7z5A4jmxeBd91yoD6odSGqLbGuUwjMfyd/
36
36
  bYe6x24BppPTKnvGv7iKJQHe
37
37
  -----END CERTIFICATE-----
38
- date: 2022-03-21 00:00:00.000000000 Z
38
+ date: 2022-03-22 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: yawc
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: '0.8'
67
67
  - - ">="
68
68
  - !ruby/object:Gem::Version
69
- version: 0.8.5
69
+ version: 0.8.7
70
70
  type: :runtime
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0.8'
77
77
  - - ">="
78
78
  - !ruby/object:Gem::Version
79
- version: 0.8.5
79
+ version: 0.8.7
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: simple-config
82
82
  requirement: !ruby/object:Gem::Requirement
metadata.gz.sig CHANGED
Binary file