youtube-transcript-rb 0.1.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -0
  3. data/.rubocop_todo.yml +166 -0
  4. data/README.md +42 -42
  5. data/lib/youtube-transcript-rb.rb +4 -0
  6. data/lib/youtube_rb/formatters.rb +263 -0
  7. data/lib/youtube_rb/transcript/api.rb +144 -0
  8. data/lib/youtube_rb/transcript/errors.rb +215 -0
  9. data/lib/youtube_rb/transcript/settings.rb +26 -0
  10. data/lib/youtube_rb/transcript/transcript.rb +237 -0
  11. data/lib/youtube_rb/transcript/transcript_list.rb +168 -0
  12. data/lib/youtube_rb/transcript/transcript_list_fetcher.rb +220 -0
  13. data/lib/youtube_rb/transcript/transcript_parser.rb +81 -0
  14. data/lib/youtube_rb/transcript.rb +33 -0
  15. data/lib/youtube_rb/version.rb +5 -0
  16. data/sig/youtube_rb/transcript.rbs +4 -0
  17. data/spec/api_spec.rb +27 -27
  18. data/spec/errors_spec.rb +41 -41
  19. data/spec/formatters_spec.rb +45 -46
  20. data/spec/integration_spec.rb +39 -48
  21. data/spec/settings_spec.rb +16 -16
  22. data/spec/spec_helper.rb +52 -52
  23. data/spec/transcript_list_fetcher_spec.rb +38 -33
  24. data/spec/transcript_list_spec.rb +16 -19
  25. data/spec/transcript_parser_spec.rb +3 -3
  26. data/spec/transcript_spec.rb +23 -24
  27. metadata +17 -13
  28. data/lib/youtube/transcript/rb/api.rb +0 -150
  29. data/lib/youtube/transcript/rb/errors.rb +0 -217
  30. data/lib/youtube/transcript/rb/formatters.rb +0 -269
  31. data/lib/youtube/transcript/rb/settings.rb +0 -28
  32. data/lib/youtube/transcript/rb/transcript.rb +0 -239
  33. data/lib/youtube/transcript/rb/transcript_list.rb +0 -170
  34. data/lib/youtube/transcript/rb/transcript_list_fetcher.rb +0 -225
  35. data/lib/youtube/transcript/rb/transcript_parser.rb +0 -83
  36. data/lib/youtube/transcript/rb/version.rb +0 -9
  37. data/lib/youtube/transcript/rb.rb +0 -37
  38. data/sig/youtube/transcript/rb.rbs +0 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a3a1c99bcacf517440c8be67c4e72f29406c2a7cee87cb844317a5693d5f1aea
4
- data.tar.gz: d19bc462f35d6d50dd13c452b3be468c5efb57bf5cdb894c22ad99be409485da
3
+ metadata.gz: 60cad31d1d80bf186d231cf3eed48cd1599f41000a3de1a185e24480421ea0dd
4
+ data.tar.gz: cc370e6e42208f18a0ed456800de0f2e8b470754c63908149171c5558e15500a
5
5
  SHA512:
6
- metadata.gz: b87fab280855a4f3f3b22786789085349492d42dd8fff3b14284b762c656757c8bd07fcfbc084ed551bd489c60ab175e4e57216edcd93c27345cfbeac53507f8
7
- data.tar.gz: bba30d381a9a685e8c6f1bfeae9fdb40a81a758142a3c0c1659cc1de182266bcb6d85cff575ffade9559c945c9ae23e4c829e10821955605974ee12d499e61e3
6
+ metadata.gz: 42f16cf9961a05528f4289886ebb08b2b06cb9060fbecfc6b41ffd4267920ef0d2123afec048c231a4f507bd44a8f1df9e383addbb3390e4e9076d9617bb22ba
7
+ data.tar.gz: b529273917d15dca2f50d28b5c7ea6f04d359c111346bd0ef4547db86b7a016dae83fdefce80b75784b43849931c6595c67833d4977e8b816176bca027883491
data/.rubocop.yml ADDED
@@ -0,0 +1,9 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ plugins:
4
+ - rubocop-rake
5
+ - rubocop-rspec
6
+
7
+ AllCops:
8
+ NewCops: enable
9
+ TargetRubyVersion: 3.2
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,166 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2026-01-09 13:39:24 UTC using RuboCop version 1.82.1.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 3
10
+ # This cop supports safe autocorrection (--autocorrect).
11
+ # Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods, NotImplementedExceptions.
12
+ # NotImplementedExceptions: NotImplementedError
13
+ Lint/UnusedMethodArgument:
14
+ Exclude:
15
+ - 'lib/youtube_rb/formatters.rb'
16
+
17
+ # Offense count: 3
18
+ # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
19
+ Metrics/AbcSize:
20
+ Max: 25
21
+
22
+ # Offense count: 1
23
+ # Configuration parameters: CountComments, CountAsOne.
24
+ Metrics/ClassLength:
25
+ Max: 103
26
+
27
+ # Offense count: 2
28
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
29
+ Metrics/CyclomaticComplexity:
30
+ Max: 14
31
+
32
+ # Offense count: 7
33
+ # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
34
+ Metrics/MethodLength:
35
+ Max: 29
36
+
37
+ # Offense count: 1
38
+ # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
39
+ Metrics/ParameterLists:
40
+ Max: 7
41
+
42
+ # Offense count: 2
43
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
44
+ Metrics/PerceivedComplexity:
45
+ Max: 15
46
+
47
+ # Offense count: 1
48
+ # Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
49
+ # CheckDefinitionPathHierarchyRoots: lib, spec, test, src
50
+ # AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
51
+ Naming/FileName:
52
+ Exclude:
53
+ - 'Rakefile.rb'
54
+ - 'lib/youtube-transcript-rb.rb'
55
+
56
+ # Offense count: 3
57
+ # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
58
+ # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
59
+ Naming/MethodParameterName:
60
+ Exclude:
61
+ - 'lib/youtube_rb/formatters.rb'
62
+
63
+ # Offense count: 2
64
+ RSpec/BeforeAfterAll:
65
+ Exclude:
66
+ - '**/spec/spec_helper.rb'
67
+ - '**/spec/rails_helper.rb'
68
+ - '**/spec/support/**/*.rb'
69
+ - 'spec/integration_spec.rb'
70
+
71
+ # Offense count: 2
72
+ # Configuration parameters: IgnoredMetadata.
73
+ RSpec/DescribeClass:
74
+ Exclude:
75
+ - '**/spec/features/**/*'
76
+ - '**/spec/requests/**/*'
77
+ - '**/spec/routing/**/*'
78
+ - '**/spec/system/**/*'
79
+ - '**/spec/views/**/*'
80
+ - 'spec/integration_spec.rb'
81
+ - 'spec/settings_spec.rb'
82
+
83
+ # Offense count: 30
84
+ # Configuration parameters: CountAsOne.
85
+ RSpec/ExampleLength:
86
+ Max: 22
87
+
88
+ # Offense count: 4
89
+ # This cop supports safe autocorrection (--autocorrect).
90
+ RSpec/ExpectActual:
91
+ Exclude:
92
+ - '**/spec/routing/**/*'
93
+ - 'spec/integration_spec.rb'
94
+
95
+ # Offense count: 2
96
+ # Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
97
+ RSpec/IndexedLet:
98
+ Exclude:
99
+ - 'spec/transcript_spec.rb'
100
+
101
+ # Offense count: 91
102
+ RSpec/MultipleExpectations:
103
+ Max: 7
104
+
105
+ # Offense count: 44
106
+ # Configuration parameters: AllowSubject.
107
+ RSpec/MultipleMemoizedHelpers:
108
+ Max: 11
109
+
110
+ # Offense count: 3
111
+ # Configuration parameters: AllowedGroups.
112
+ RSpec/NestedGroups:
113
+ Max: 4
114
+
115
+ # Offense count: 7
116
+ # Configuration parameters: CustomTransform, IgnoreMethods, IgnoreMetadata, InflectorPath, EnforcedInflector.
117
+ # SupportedInflectors: default, active_support
118
+ RSpec/SpecFilePathFormat:
119
+ Exclude:
120
+ - '**/spec/routing/**/*'
121
+ - 'spec/api_spec.rb'
122
+ - 'spec/errors_spec.rb'
123
+ - 'spec/formatters_spec.rb'
124
+ - 'spec/transcript_list_fetcher_spec.rb'
125
+ - 'spec/transcript_list_spec.rb'
126
+ - 'spec/transcript_parser_spec.rb'
127
+ - 'spec/transcript_spec.rb'
128
+
129
+ # Offense count: 10
130
+ # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
131
+ RSpec/VerifiedDoubles:
132
+ Exclude:
133
+ - 'spec/api_spec.rb'
134
+ - 'spec/errors_spec.rb'
135
+ - 'spec/transcript_list_fetcher_spec.rb'
136
+ - 'spec/transcript_spec.rb'
137
+
138
+ # Offense count: 1
139
+ # Configuration parameters: AllowedConstants.
140
+ Style/Documentation:
141
+ Exclude:
142
+ - 'spec/**/*'
143
+ - 'test/**/*'
144
+ - 'lib/youtube_rb/transcript.rb'
145
+
146
+ # Offense count: 8
147
+ # This cop supports safe autocorrection (--autocorrect).
148
+ # Configuration parameters: EnforcedStyle, MaxUnannotatedPlaceholdersAllowed, Mode, AllowedMethods, AllowedPatterns.
149
+ # SupportedStyles: annotated, template, unannotated
150
+ Style/FormatStringToken:
151
+ Exclude:
152
+ - 'lib/youtube_rb/formatters.rb'
153
+
154
+ # Offense count: 1168
155
+ # This cop supports safe autocorrection (--autocorrect).
156
+ # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
157
+ # SupportedStyles: single_quotes, double_quotes
158
+ Style/StringLiterals:
159
+ Enabled: false
160
+
161
+ # Offense count: 6
162
+ # This cop supports safe autocorrection (--autocorrect).
163
+ # Configuration parameters: AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
164
+ # URISchemes: http, https
165
+ Layout/LineLength:
166
+ Max: 142
data/README.md CHANGED
@@ -47,9 +47,9 @@ gem install youtube-transcript-rb
47
47
  The easiest way to get a transcript for a given video is to execute:
48
48
 
49
49
  ```ruby
50
- require 'youtube/transcript/rb'
50
+ require 'youtube_rb/transcript'
51
51
 
52
- api = Youtube::Transcript::Rb::YouTubeTranscriptApi.new
52
+ api = YoutubeRb::Transcript::YouTubeTranscriptApi.new
53
53
  api.fetch(video_id)
54
54
  ```
55
55
 
@@ -62,14 +62,14 @@ api.fetch(video_id)
62
62
  This will return a `FetchedTranscript` object looking somewhat like this:
63
63
 
64
64
  ```ruby
65
- #<Youtube::Transcript::Rb::FetchedTranscript
65
+ #<YoutubeRb::Transcript::FetchedTranscript
66
66
  @video_id="12345",
67
67
  @language="English",
68
68
  @language_code="en",
69
69
  @is_generated=false,
70
70
  @snippets=[
71
- #<Youtube::Transcript::Rb::TranscriptSnippet @text="Hey there", @start=0.0, @duration=1.54>,
72
- #<Youtube::Transcript::Rb::TranscriptSnippet @text="how are you", @start=1.54, @duration=4.16>,
71
+ #<YoutubeRb::Transcript::TranscriptSnippet @text="Hey there", @start=0.0, @duration=1.54>,
72
+ #<YoutubeRb::Transcript::TranscriptSnippet @text="how are you", @start=1.54, @duration=4.16>,
73
73
  # ...
74
74
  ]
75
75
  >
@@ -78,7 +78,7 @@ This will return a `FetchedTranscript` object looking somewhat like this:
78
78
  This object implements `Enumerable`, so you can iterate over it:
79
79
 
80
80
  ```ruby
81
- api = Youtube::Transcript::Rb::YouTubeTranscriptApi.new
81
+ api = YoutubeRb::Transcript::YouTubeTranscriptApi.new
82
82
  fetched_transcript = api.fetch(video_id)
83
83
 
84
84
  # is iterable
@@ -117,13 +117,13 @@ an array of hashes:
117
117
  You can also use the convenience methods on the module directly:
118
118
 
119
119
  ```ruby
120
- require 'youtube/transcript/rb'
120
+ require 'youtube_rb/transcript'
121
121
 
122
122
  # Fetch a transcript
123
- transcript = Youtube::Transcript::Rb.fetch(video_id)
123
+ transcript = YoutubeRb::Transcript.fetch(video_id)
124
124
 
125
125
  # List available transcripts
126
- transcript_list = Youtube::Transcript::Rb.list(video_id)
126
+ transcript_list = YoutubeRb::Transcript.list(video_id)
127
127
  ```
128
128
 
129
129
  ### Retrieve different languages
@@ -132,7 +132,7 @@ You can add the `languages` param if you want to make sure the transcripts are r
132
132
  (it defaults to english).
133
133
 
134
134
  ```ruby
135
- Youtube::Transcript::Rb::YouTubeTranscriptApi.new.fetch(video_id, languages: ['de', 'en'])
135
+ YoutubeRb::Transcript::YouTubeTranscriptApi.new.fetch(video_id, languages: ['de', 'en'])
136
136
  ```
137
137
 
138
138
  It's an array of language codes in a descending priority. In this example it will first try to fetch the german
@@ -142,7 +142,7 @@ which languages are available first, [have a look at `list`](#list-available-tra
142
142
  If you only want one language, you still need to format the `languages` argument as an array:
143
143
 
144
144
  ```ruby
145
- Youtube::Transcript::Rb::YouTubeTranscriptApi.new.fetch(video_id, languages: ['de'])
145
+ YoutubeRb::Transcript::YouTubeTranscriptApi.new.fetch(video_id, languages: ['de'])
146
146
  ```
147
147
 
148
148
  ### Preserve formatting
@@ -151,7 +151,7 @@ You can also add `preserve_formatting: true` if you'd like to keep HTML formatti
151
151
  and `<b>` (bold).
152
152
 
153
153
  ```ruby
154
- Youtube::Transcript::Rb::YouTubeTranscriptApi.new.fetch(video_id, languages: ['de', 'en'], preserve_formatting: true)
154
+ YoutubeRb::Transcript::YouTubeTranscriptApi.new.fetch(video_id, languages: ['de', 'en'], preserve_formatting: true)
155
155
  ```
156
156
 
157
157
  ### List available transcripts
@@ -159,7 +159,7 @@ Youtube::Transcript::Rb::YouTubeTranscriptApi.new.fetch(video_id, languages: ['d
159
159
  If you want to list all transcripts which are available for a given video you can call:
160
160
 
161
161
  ```ruby
162
- api = Youtube::Transcript::Rb::YouTubeTranscriptApi.new
162
+ api = YoutubeRb::Transcript::YouTubeTranscriptApi.new
163
163
  transcript_list = api.list(video_id)
164
164
  ```
165
165
 
@@ -220,9 +220,9 @@ puts translated_transcript.fetch
220
220
  ### By example
221
221
 
222
222
  ```ruby
223
- require 'youtube/transcript/rb'
223
+ require 'youtube_rb/transcript'
224
224
 
225
- api = Youtube::Transcript::Rb::YouTubeTranscriptApi.new
225
+ api = YoutubeRb::Transcript::YouTubeTranscriptApi.new
226
226
 
227
227
  # retrieve the available transcripts
228
228
  transcript_list = api.list('video_id')
@@ -262,7 +262,7 @@ transcript = transcript_list.find_generated_transcript(['de', 'en'])
262
262
  You can fetch transcripts for multiple videos at once:
263
263
 
264
264
  ```ruby
265
- api = Youtube::Transcript::Rb::YouTubeTranscriptApi.new
265
+ api = YoutubeRb::Transcript::YouTubeTranscriptApi.new
266
266
 
267
267
  # Fetch multiple videos
268
268
  transcripts = api.fetch_all(['video1', 'video2', 'video3'])
@@ -297,14 +297,14 @@ The `Formatters` module provides a few basic formatters:
297
297
  Here is how to import from the `Formatters` module:
298
298
 
299
299
  ```ruby
300
- require 'youtube/transcript/rb'
300
+ require 'youtube_rb/transcript'
301
301
 
302
302
  # Some provided formatter classes, each outputs a different string format.
303
- Youtube::Transcript::Rb::Formatters::JSONFormatter
304
- Youtube::Transcript::Rb::Formatters::TextFormatter
305
- Youtube::Transcript::Rb::Formatters::PrettyPrintFormatter
306
- Youtube::Transcript::Rb::Formatters::WebVTTFormatter
307
- Youtube::Transcript::Rb::Formatters::SRTFormatter
303
+ YoutubeRb::Formatters::JSONFormatter
304
+ YoutubeRb::Formatters::TextFormatter
305
+ YoutubeRb::Formatters::PrettyPrintFormatter
306
+ YoutubeRb::Formatters::WebVTTFormatter
307
+ YoutubeRb::Formatters::SRTFormatter
308
308
  ```
309
309
 
310
310
  ### Formatter Example
@@ -312,12 +312,12 @@ Youtube::Transcript::Rb::Formatters::SRTFormatter
312
312
  Let's say we wanted to retrieve a transcript and store it to a JSON file. That would look something like this:
313
313
 
314
314
  ```ruby
315
- require 'youtube/transcript/rb'
315
+ require 'youtube_rb/transcript'
316
316
 
317
- api = Youtube::Transcript::Rb::YouTubeTranscriptApi.new
317
+ api = YoutubeRb::Transcript::YouTubeTranscriptApi.new
318
318
  transcript = api.fetch(video_id)
319
319
 
320
- formatter = Youtube::Transcript::Rb::Formatters::JSONFormatter.new
320
+ formatter = YoutubeRb::Formatters::JSONFormatter.new
321
321
 
322
322
  # .format_transcript(transcript) turns the transcript into a JSON string.
323
323
  json_formatted = formatter.format_transcript(transcript)
@@ -334,7 +334,7 @@ Since `JSONFormatter` leverages `JSON.generate` you can also forward keyword arg
334
334
  `.format_transcript(transcript)` such as making your file output prettier:
335
335
 
336
336
  ```ruby
337
- json_formatted = Youtube::Transcript::Rb::Formatters::JSONFormatter.new.format_transcript(
337
+ json_formatted = YoutubeRb::Formatters::JSONFormatter.new.format_transcript(
338
338
  transcript,
339
339
  indent: ' ',
340
340
  space: ' '
@@ -346,9 +346,9 @@ json_formatted = Youtube::Transcript::Rb::Formatters::JSONFormatter.new.format_t
346
346
  You can also use the `FormatterLoader` to dynamically load formatters by name:
347
347
 
348
348
  ```ruby
349
- require 'youtube/transcript/rb'
349
+ require 'youtube_rb/transcript'
350
350
 
351
- loader = Youtube::Transcript::Rb::Formatters::FormatterLoader.new
351
+ loader = YoutubeRb::Formatters::FormatterLoader.new
352
352
 
353
353
  # Load by type name: "json", "pretty", "text", "webvtt", "srt"
354
354
  formatter = loader.load("json")
@@ -364,7 +364,7 @@ You can implement your own formatter class. Just inherit from the `Formatter` ba
364
364
  `format_transcript` and `format_transcripts` methods which should ultimately return a string:
365
365
 
366
366
  ```ruby
367
- class MyCustomFormatter < Youtube::Transcript::Rb::Formatters::Formatter
367
+ class MyCustomFormatter < YoutubeRb::Formatters::Formatter
368
368
  def format_transcript(transcript, **options)
369
369
  # Do your custom work in here, but return a string.
370
370
  'your processed output data as a string.'
@@ -382,28 +382,28 @@ end
382
382
  The library provides a comprehensive set of exceptions for different error scenarios:
383
383
 
384
384
  ```ruby
385
- require 'youtube/transcript/rb'
385
+ require 'youtube_rb/transcript'
386
386
 
387
387
  begin
388
- transcript = Youtube::Transcript::Rb.fetch(video_id)
389
- rescue Youtube::Transcript::Rb::TranscriptsDisabled => e
388
+ transcript = YoutubeRb::Transcript.fetch(video_id)
389
+ rescue YoutubeRb::Transcript::TranscriptsDisabled => e
390
390
  puts "Subtitles are disabled for this video"
391
- rescue Youtube::Transcript::Rb::NoTranscriptFound => e
391
+ rescue YoutubeRb::Transcript::NoTranscriptFound => e
392
392
  puts "No transcript found for the requested languages"
393
393
  puts e.requested_language_codes
394
- rescue Youtube::Transcript::Rb::NoTranscriptAvailable => e
394
+ rescue YoutubeRb::Transcript::NoTranscriptAvailable => e
395
395
  puts "No transcripts are available for this video"
396
- rescue Youtube::Transcript::Rb::VideoUnavailable => e
396
+ rescue YoutubeRb::Transcript::VideoUnavailable => e
397
397
  puts "The video is no longer available"
398
- rescue Youtube::Transcript::Rb::TooManyRequests => e
398
+ rescue YoutubeRb::Transcript::TooManyRequests => e
399
399
  puts "Rate limited by YouTube"
400
- rescue Youtube::Transcript::Rb::RequestBlocked => e
400
+ rescue YoutubeRb::Transcript::RequestBlocked => e
401
401
  puts "Request blocked by YouTube"
402
- rescue Youtube::Transcript::Rb::IpBlocked => e
402
+ rescue YoutubeRb::Transcript::IpBlocked => e
403
403
  puts "Your IP has been blocked by YouTube"
404
- rescue Youtube::Transcript::Rb::PoTokenRequired => e
404
+ rescue YoutubeRb::Transcript::PoTokenRequired => e
405
405
  puts "PO token required - this is a YouTube limitation"
406
- rescue Youtube::Transcript::Rb::CouldNotRetrieveTranscript => e
406
+ rescue YoutubeRb::Transcript::CouldNotRetrieveTranscript => e
407
407
  puts "Could not retrieve transcript: #{e.message}"
408
408
  end
409
409
  ```
@@ -456,11 +456,11 @@ http_client = Faraday.new do |conn|
456
456
  conn.adapter Faraday.default_adapter
457
457
  end
458
458
 
459
- api = Youtube::Transcript::Rb::YouTubeTranscriptApi.new(http_client: http_client)
459
+ api = YoutubeRb::Transcript::YouTubeTranscriptApi.new(http_client: http_client)
460
460
  api.fetch(video_id)
461
461
 
462
462
  # Share same connection between two instances
463
- api_2 = Youtube::Transcript::Rb::YouTubeTranscriptApi.new(http_client: http_client)
463
+ api_2 = YoutubeRb::Transcript::YouTubeTranscriptApi.new(http_client: http_client)
464
464
  api_2.fetch(video_id)
465
465
  ```
466
466
 
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "youtube_rb/transcript"
4
+ require_relative "youtube_rb/formatters"
@@ -0,0 +1,263 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module YoutubeRb
6
+ # Module containing all transcript formatters
7
+ module Formatters
8
+ # Base formatter class. All formatters should inherit from this class
9
+ # and implement their own format_transcript and format_transcripts methods.
10
+ class Formatter
11
+ # Format a single transcript
12
+ #
13
+ # @param transcript [FetchedTranscript] The transcript to format
14
+ # @param options [Hash] Additional formatting options
15
+ # @return [String] The formatted transcript
16
+ def format_transcript(transcript, **options)
17
+ raise NotImplementedError, "Subclass must implement #format_transcript"
18
+ end
19
+
20
+ # Format multiple transcripts
21
+ #
22
+ # @param transcripts [Array<FetchedTranscript>] The transcripts to format
23
+ # @param options [Hash] Additional formatting options
24
+ # @return [String] The formatted transcripts
25
+ def format_transcripts(transcripts, **options)
26
+ raise NotImplementedError, "Subclass must implement #format_transcripts"
27
+ end
28
+ end
29
+
30
+ # Formats transcript as pretty-printed Ruby data structures
31
+ class PrettyPrintFormatter < Formatter
32
+ # Format a single transcript as pretty-printed output
33
+ #
34
+ # @param transcript [FetchedTranscript] The transcript to format
35
+ # @param options [Hash] Options passed to PP.pp
36
+ # @return [String] Pretty-printed transcript data
37
+ def format_transcript(transcript, **options)
38
+ require "pp"
39
+ PP.pp(transcript.to_raw_data, +"", options[:width] || 79)
40
+ end
41
+
42
+ # Format multiple transcripts as pretty-printed output
43
+ #
44
+ # @param transcripts [Array<FetchedTranscript>] The transcripts to format
45
+ # @param options [Hash] Options passed to PP.pp
46
+ # @return [String] Pretty-printed transcripts data
47
+ def format_transcripts(transcripts, **options)
48
+ require "pp"
49
+ data = transcripts.map(&:to_raw_data)
50
+ PP.pp(data, +"", options[:width] || 79)
51
+ end
52
+ end
53
+
54
+ # Formats transcript as JSON
55
+ class JSONFormatter < Formatter
56
+ # Format a single transcript as JSON
57
+ #
58
+ # @param transcript [FetchedTranscript] The transcript to format
59
+ # @param options [Hash] Options passed to JSON.generate (e.g., :indent, :space)
60
+ # @return [String] JSON representation of the transcript
61
+ def format_transcript(transcript, **options)
62
+ JSON.generate(transcript.to_raw_data, options)
63
+ end
64
+
65
+ # Format multiple transcripts as JSON array
66
+ #
67
+ # @param transcripts [Array<FetchedTranscript>] The transcripts to format
68
+ # @param options [Hash] Options passed to JSON.generate
69
+ # @return [String] JSON array representation of the transcripts
70
+ def format_transcripts(transcripts, **options)
71
+ data = transcripts.map(&:to_raw_data)
72
+ JSON.generate(data, options)
73
+ end
74
+ end
75
+
76
+ # Formats transcript as plain text (text only, no timestamps)
77
+ class TextFormatter < Formatter
78
+ # Format a single transcript as plain text
79
+ #
80
+ # @param transcript [FetchedTranscript] The transcript to format
81
+ # @param options [Hash] Unused options
82
+ # @return [String] Plain text with each line separated by newlines
83
+ def format_transcript(transcript, **options)
84
+ transcript.map(&:text).join("\n")
85
+ end
86
+
87
+ # Format multiple transcripts as plain text
88
+ #
89
+ # @param transcripts [Array<FetchedTranscript>] The transcripts to format
90
+ # @param options [Hash] Unused options
91
+ # @return [String] Plain text with transcripts separated by triple newlines
92
+ def format_transcripts(transcripts, **options)
93
+ transcripts.map { |t| format_transcript(t, **options) }.join("\n\n\n")
94
+ end
95
+ end
96
+
97
+ # Base class for timestamp-based formatters (SRT, WebVTT)
98
+ class TextBasedFormatter < TextFormatter
99
+ # Format a single transcript with timestamps
100
+ #
101
+ # @param transcript [FetchedTranscript] The transcript to format
102
+ # @param options [Hash] Unused options
103
+ # @return [String] Formatted transcript with timestamps
104
+ def format_transcript(transcript, **options)
105
+ lines = []
106
+ snippets = transcript.to_a
107
+
108
+ snippets.each_with_index do |snippet, i|
109
+ end_time = snippet.start + snippet.duration
110
+
111
+ # Use next snippet's start time if it starts before current end time
112
+ end_time = snippets[i + 1].start if i < snippets.length - 1 && snippets[i + 1].start < end_time
113
+
114
+ time_text = "#{seconds_to_timestamp(snippet.start)} --> #{seconds_to_timestamp(end_time)}"
115
+ lines << format_transcript_helper(i, time_text, snippet)
116
+ end
117
+
118
+ format_transcript_header(lines)
119
+ end
120
+
121
+ protected
122
+
123
+ # Format a timestamp from components
124
+ #
125
+ # @param hours [Integer] Hours component
126
+ # @param mins [Integer] Minutes component
127
+ # @param secs [Integer] Seconds component
128
+ # @param ms [Integer] Milliseconds component
129
+ # @return [String] Formatted timestamp
130
+ def format_timestamp(hours, mins, secs, ms)
131
+ raise NotImplementedError, "Subclass must implement #format_timestamp"
132
+ end
133
+
134
+ # Format the transcript header/wrapper
135
+ #
136
+ # @param lines [Array<String>] The formatted lines
137
+ # @return [String] The complete formatted transcript
138
+ def format_transcript_header(lines)
139
+ raise NotImplementedError, "Subclass must implement #format_transcript_header"
140
+ end
141
+
142
+ # Format a single transcript entry
143
+ #
144
+ # @param index [Integer] The entry index (0-based)
145
+ # @param time_text [String] The formatted time range
146
+ # @param snippet [TranscriptSnippet] The snippet to format
147
+ # @return [String] The formatted entry
148
+ def format_transcript_helper(index, time_text, snippet)
149
+ raise NotImplementedError, "Subclass must implement #format_transcript_helper"
150
+ end
151
+
152
+ private
153
+
154
+ # Convert seconds to timestamp string
155
+ #
156
+ # @param time [Float] Time in seconds
157
+ # @return [String] Formatted timestamp
158
+ def seconds_to_timestamp(time)
159
+ time = time.to_f
160
+ hours, remainder = time.divmod(3600)
161
+ mins, secs_float = remainder.divmod(60)
162
+ secs = secs_float.to_i
163
+ ms = ((time - time.to_i) * 1000).round
164
+
165
+ format_timestamp(hours.to_i, mins.to_i, secs, ms)
166
+ end
167
+ end
168
+
169
+ # Formats transcript as SRT (SubRip) subtitle format
170
+ #
171
+ # @example SRT format
172
+ # 1
173
+ # 00:00:00,000 --> 00:00:02,500
174
+ # Hello world
175
+ #
176
+ # 2
177
+ # 00:00:02,500 --> 00:00:05,000
178
+ # This is a test
179
+ #
180
+ class SRTFormatter < TextBasedFormatter
181
+ protected
182
+
183
+ def format_timestamp(hours, mins, secs, ms)
184
+ format("%02d:%02d:%02d,%03d", hours, mins, secs, ms)
185
+ end
186
+
187
+ def format_transcript_header(lines)
188
+ "#{lines.join("\n\n")}\n"
189
+ end
190
+
191
+ def format_transcript_helper(index, time_text, snippet)
192
+ "#{index + 1}\n#{time_text}\n#{snippet.text}"
193
+ end
194
+ end
195
+
196
+ # Formats transcript as WebVTT (Web Video Text Tracks) format
197
+ #
198
+ # @example WebVTT format
199
+ # WEBVTT
200
+ #
201
+ # 00:00:00.000 --> 00:00:02.500
202
+ # Hello world
203
+ #
204
+ # 00:00:02.500 --> 00:00:05.000
205
+ # This is a test
206
+ #
207
+ class WebVTTFormatter < TextBasedFormatter
208
+ protected
209
+
210
+ def format_timestamp(hours, mins, secs, ms)
211
+ format("%02d:%02d:%02d.%03d", hours, mins, secs, ms)
212
+ end
213
+
214
+ def format_transcript_header(lines)
215
+ "WEBVTT\n\n#{lines.join("\n\n")}\n"
216
+ end
217
+
218
+ def format_transcript_helper(index, time_text, snippet)
219
+ "#{time_text}\n#{snippet.text}"
220
+ end
221
+ end
222
+
223
+ # Utility class to load formatters by type name
224
+ class FormatterLoader
225
+ # Mapping of format names to formatter classes
226
+ TYPES = {
227
+ "json" => JSONFormatter,
228
+ "pretty" => PrettyPrintFormatter,
229
+ "text" => TextFormatter,
230
+ "webvtt" => WebVTTFormatter,
231
+ "srt" => SRTFormatter
232
+ }.freeze
233
+
234
+ # Error raised when an unknown formatter type is requested
235
+ class UnknownFormatterType < StandardError
236
+ def initialize(formatter_type)
237
+ super(
238
+ "The format '#{formatter_type}' is not supported. " \
239
+ "Choose one of the following formats: #{TYPES.keys.join(', ')}"
240
+ )
241
+ end
242
+ end
243
+
244
+ # Load a formatter by type name
245
+ #
246
+ # @param formatter_type [String] The formatter type (json, pretty, text, webvtt, srt)
247
+ # @return [Formatter] An instance of the requested formatter
248
+ # @raise [UnknownFormatterType] If the formatter type is not supported
249
+ #
250
+ # @example
251
+ # loader = FormatterLoader.new
252
+ # formatter = loader.load("json")
253
+ # output = formatter.format_transcript(transcript)
254
+ #
255
+ def load(formatter_type = "pretty")
256
+ formatter_type = formatter_type.to_s
257
+ raise UnknownFormatterType, formatter_type unless TYPES.key?(formatter_type)
258
+
259
+ TYPES[formatter_type].new
260
+ end
261
+ end
262
+ end
263
+ end