youtube_transcript2020 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9462fe85e21ee061f717e564efb7b66bec2e62b1c454228d6fb8f31633f7363d
4
- data.tar.gz: bb04a03be0cb61058682ce4d9c1159666e41feb2a794fe65d07fbae418412056
3
+ metadata.gz: 10d150f7396c9e0e18db51381fc5f262eb83595b4f3d5aad313192d86041071c
4
+ data.tar.gz: 607ae0291272ff40d08db5398f465d99872747e300500c214602b754c43811bb
5
5
  SHA512:
6
- metadata.gz: d8d4f57bb617e07f647727eaf56c81f1a7efc5eaccc71c323b54e887a270691f01f1d58cc64fe505a40ad6bd5ebd2cea3ea5fc87b7950abe773697acb5e7cc4b
7
- data.tar.gz: 9a9a4e72f99f70aca736171490bba54d563aa722576ce6d91435177ed9f0fb327080d84ead963474ab8e46cd1c2dfc4709d2ab93c20853c2a376b091399ed6ad
6
+ metadata.gz: 244d0242415308c65b07d9bfb09f6776fe3f67817dadbeb5189b4bb5756a4733668fde8b139f636c361598cb6e623ffbdfbad40c3675fd78b74df17701514183
7
+ data.tar.gz: '07884cdc2b52ad2ac5cf86814f8ae88aa864109d24584e2b256b65b0b8c8168ac7d08f71ce5f2343751b4bbcfe6d81960d4b81b50d4c49109d5b0855198761b1'
checksums.yaml.gz.sig CHANGED
Binary file
@@ -3,10 +3,12 @@
3
3
  # file: youtube_transcript2020.rb
4
4
 
5
5
  require 'yawc'
6
+ require 'json'
6
7
  require 'subunit'
7
8
  require 'youtube_id'
8
9
  require 'simple-config'
9
10
 
11
+ # https://github.com/jdepoix/youtube-transcript-api
10
12
 
11
13
  class YoutubeTranscript2020
12
14
 
@@ -54,14 +56,26 @@ class YoutubeTranscript2020
54
56
  s = RXFReader.read(obj).first
55
57
 
56
58
  if s =~ /------+/ then
59
+
57
60
  header, body = s.split(/-----+/,2)
58
61
 
59
62
  h = SimpleConfig.new(header).to_h
60
63
  @id, @author, @title = h[:id], h[:author], h[:title]
61
64
  @s = body
65
+
66
+ elsif File.extname(obj) == '.json'
67
+
68
+ r = JSON.parse(s)
69
+ @a = r.map {|x| [x['start'], x['text']]}
70
+ @s = join_sentences(@a)
71
+
72
+ return
73
+
62
74
  else
75
+
63
76
  body = obj
64
77
  raw_transcript = true
78
+
65
79
  end
66
80
 
67
81
  puts 'body: ' + body[0..400] if @debug
@@ -193,7 +207,14 @@ EOF
193
207
  elsif s[/^"/]
194
208
  a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
195
209
  elsif s[/^So,? /]
196
- a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
210
+
211
+ puts 'so? a2[-1]' + a2[-1].inspect if @debug
212
+
213
+ if a2.empty? then
214
+ a2 << [time, s.sub(/^So,? /,'').capitalize]
215
+ else
216
+ a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
217
+ end
197
218
  elsif s[/^\[(?:Music|Applause)\]/i]
198
219
 
199
220
  # ignore it
@@ -220,7 +241,17 @@ EOF
220
241
 
221
242
  # formats the paragraph with the timestamp appearing above
222
243
  @a = a2
223
- a2.map {|time, s| "\n%s\n\n%s" % [time, s]}.join("\n")
244
+
245
+ a2.map do |rawtime, s|
246
+
247
+ time = if rawtime.is_a? Float then
248
+ Subunit.seconds(rawtime).strfunit("%sc")
249
+ else
250
+ time
251
+ end
252
+
253
+ "\n%s\n\n%s" % [time, s]
254
+ end.join("\n")
224
255
 
225
256
  end
226
257
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: youtube_transcript2020
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Eh31Azzsjb9JoMQLQliugChaXNzGUL7z5A4jmxeBd91yoD6odSGqLbGuUwjMfyd/
36
36
  bYe6x24BppPTKnvGv7iKJQHe
37
37
  -----END CERTIFICATE-----
38
- date: 2022-03-21 00:00:00.000000000 Z
38
+ date: 2022-03-22 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: yawc
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: '0.8'
67
67
  - - ">="
68
68
  - !ruby/object:Gem::Version
69
- version: 0.8.5
69
+ version: 0.8.7
70
70
  type: :runtime
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0.8'
77
77
  - - ">="
78
78
  - !ruby/object:Gem::Version
79
- version: 0.8.5
79
+ version: 0.8.7
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: simple-config
82
82
  requirement: !ruby/object:Gem::Requirement
metadata.gz.sig CHANGED
Binary file