youtube-transcript-rb 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d97023aca42aac08e42c3857d940e3a42ba735c895685522048caee415fe4af
4
- data.tar.gz: 1d435b06743716beb8f3e892bc97a2c7d105f6ab710f61659cd24dd6ce438a81
3
+ metadata.gz: 60cad31d1d80bf186d231cf3eed48cd1599f41000a3de1a185e24480421ea0dd
4
+ data.tar.gz: cc370e6e42208f18a0ed456800de0f2e8b470754c63908149171c5558e15500a
5
5
  SHA512:
6
- metadata.gz: 5b5f345ebcef944ba98ae4adcc42d9951c5e04bf0230e98c18dc41e0b3b715cfe57a34722924042cbcf9b7244d00ad31767bc2ce287136656c2b05b3378f2db8
7
- data.tar.gz: 07ba07e26a8ff0f895c64767869bda918122c35b21b39b945fc608067d864643712386987155e3ae1dbe24f5310410e642aa58fd03f1c15f4df358567a51fae2
6
+ metadata.gz: 42f16cf9961a05528f4289886ebb08b2b06cb9060fbecfc6b41ffd4267920ef0d2123afec048c231a4f507bd44a8f1df9e383addbb3390e4e9076d9617bb22ba
7
+ data.tar.gz: b529273917d15dca2f50d28b5c7ea6f04d359c111346bd0ef4547db86b7a016dae83fdefce80b75784b43849931c6595c67833d4977e8b816176bca027883491
data/.rubocop.yml ADDED
@@ -0,0 +1,9 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ plugins:
4
+ - rubocop-rake
5
+ - rubocop-rspec
6
+
7
+ AllCops:
8
+ NewCops: enable
9
+ TargetRubyVersion: 3.2
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,166 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2026-01-09 13:39:24 UTC using RuboCop version 1.82.1.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 3
10
+ # This cop supports safe autocorrection (--autocorrect).
11
+ # Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods, NotImplementedExceptions.
12
+ # NotImplementedExceptions: NotImplementedError
13
+ Lint/UnusedMethodArgument:
14
+ Exclude:
15
+ - 'lib/youtube_rb/formatters.rb'
16
+
17
+ # Offense count: 3
18
+ # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
19
+ Metrics/AbcSize:
20
+ Max: 25
21
+
22
+ # Offense count: 1
23
+ # Configuration parameters: CountComments, CountAsOne.
24
+ Metrics/ClassLength:
25
+ Max: 103
26
+
27
+ # Offense count: 2
28
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
29
+ Metrics/CyclomaticComplexity:
30
+ Max: 14
31
+
32
+ # Offense count: 7
33
+ # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
34
+ Metrics/MethodLength:
35
+ Max: 29
36
+
37
+ # Offense count: 1
38
+ # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
39
+ Metrics/ParameterLists:
40
+ Max: 7
41
+
42
+ # Offense count: 2
43
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
44
+ Metrics/PerceivedComplexity:
45
+ Max: 15
46
+
47
+ # Offense count: 1
48
+ # Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
49
+ # CheckDefinitionPathHierarchyRoots: lib, spec, test, src
50
+ # AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
51
+ Naming/FileName:
52
+ Exclude:
53
+ - 'Rakefile.rb'
54
+ - 'lib/youtube-transcript-rb.rb'
55
+
56
+ # Offense count: 3
57
+ # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
58
+ # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
59
+ Naming/MethodParameterName:
60
+ Exclude:
61
+ - 'lib/youtube_rb/formatters.rb'
62
+
63
+ # Offense count: 2
64
+ RSpec/BeforeAfterAll:
65
+ Exclude:
66
+ - '**/spec/spec_helper.rb'
67
+ - '**/spec/rails_helper.rb'
68
+ - '**/spec/support/**/*.rb'
69
+ - 'spec/integration_spec.rb'
70
+
71
+ # Offense count: 2
72
+ # Configuration parameters: IgnoredMetadata.
73
+ RSpec/DescribeClass:
74
+ Exclude:
75
+ - '**/spec/features/**/*'
76
+ - '**/spec/requests/**/*'
77
+ - '**/spec/routing/**/*'
78
+ - '**/spec/system/**/*'
79
+ - '**/spec/views/**/*'
80
+ - 'spec/integration_spec.rb'
81
+ - 'spec/settings_spec.rb'
82
+
83
+ # Offense count: 30
84
+ # Configuration parameters: CountAsOne.
85
+ RSpec/ExampleLength:
86
+ Max: 22
87
+
88
+ # Offense count: 4
89
+ # This cop supports safe autocorrection (--autocorrect).
90
+ RSpec/ExpectActual:
91
+ Exclude:
92
+ - '**/spec/routing/**/*'
93
+ - 'spec/integration_spec.rb'
94
+
95
+ # Offense count: 2
96
+ # Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
97
+ RSpec/IndexedLet:
98
+ Exclude:
99
+ - 'spec/transcript_spec.rb'
100
+
101
+ # Offense count: 91
102
+ RSpec/MultipleExpectations:
103
+ Max: 7
104
+
105
+ # Offense count: 44
106
+ # Configuration parameters: AllowSubject.
107
+ RSpec/MultipleMemoizedHelpers:
108
+ Max: 11
109
+
110
+ # Offense count: 3
111
+ # Configuration parameters: AllowedGroups.
112
+ RSpec/NestedGroups:
113
+ Max: 4
114
+
115
+ # Offense count: 7
116
+ # Configuration parameters: CustomTransform, IgnoreMethods, IgnoreMetadata, InflectorPath, EnforcedInflector.
117
+ # SupportedInflectors: default, active_support
118
+ RSpec/SpecFilePathFormat:
119
+ Exclude:
120
+ - '**/spec/routing/**/*'
121
+ - 'spec/api_spec.rb'
122
+ - 'spec/errors_spec.rb'
123
+ - 'spec/formatters_spec.rb'
124
+ - 'spec/transcript_list_fetcher_spec.rb'
125
+ - 'spec/transcript_list_spec.rb'
126
+ - 'spec/transcript_parser_spec.rb'
127
+ - 'spec/transcript_spec.rb'
128
+
129
+ # Offense count: 10
130
+ # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
131
+ RSpec/VerifiedDoubles:
132
+ Exclude:
133
+ - 'spec/api_spec.rb'
134
+ - 'spec/errors_spec.rb'
135
+ - 'spec/transcript_list_fetcher_spec.rb'
136
+ - 'spec/transcript_spec.rb'
137
+
138
+ # Offense count: 1
139
+ # Configuration parameters: AllowedConstants.
140
+ Style/Documentation:
141
+ Exclude:
142
+ - 'spec/**/*'
143
+ - 'test/**/*'
144
+ - 'lib/youtube_rb/transcript.rb'
145
+
146
+ # Offense count: 8
147
+ # This cop supports safe autocorrection (--autocorrect).
148
+ # Configuration parameters: EnforcedStyle, MaxUnannotatedPlaceholdersAllowed, Mode, AllowedMethods, AllowedPatterns.
149
+ # SupportedStyles: annotated, template, unannotated
150
+ Style/FormatStringToken:
151
+ Exclude:
152
+ - 'lib/youtube_rb/formatters.rb'
153
+
154
+ # Offense count: 1168
155
+ # This cop supports safe autocorrection (--autocorrect).
156
+ # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
157
+ # SupportedStyles: single_quotes, double_quotes
158
+ Style/StringLiterals:
159
+ Enabled: false
160
+
161
+ # Offense count: 6
162
+ # This cop supports safe autocorrection (--autocorrect).
163
+ # Configuration parameters: AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
164
+ # URISchemes: http, https
165
+ Layout/LineLength:
166
+ Max: 142
data/README.md CHANGED
@@ -300,11 +300,11 @@ Here is how to import from the `Formatters` module:
300
300
  require 'youtube_rb/transcript'
301
301
 
302
302
  # Some provided formatter classes, each outputs a different string format.
303
- YoutubeRb::Transcript::Formatters::JSONFormatter
304
- YoutubeRb::Transcript::Formatters::TextFormatter
305
- YoutubeRb::Transcript::Formatters::PrettyPrintFormatter
306
- YoutubeRb::Transcript::Formatters::WebVTTFormatter
307
- YoutubeRb::Transcript::Formatters::SRTFormatter
303
+ YoutubeRb::Formatters::JSONFormatter
304
+ YoutubeRb::Formatters::TextFormatter
305
+ YoutubeRb::Formatters::PrettyPrintFormatter
306
+ YoutubeRb::Formatters::WebVTTFormatter
307
+ YoutubeRb::Formatters::SRTFormatter
308
308
  ```
309
309
 
310
310
  ### Formatter Example
@@ -317,7 +317,7 @@ require 'youtube_rb/transcript'
317
317
  api = YoutubeRb::Transcript::YouTubeTranscriptApi.new
318
318
  transcript = api.fetch(video_id)
319
319
 
320
- formatter = YoutubeRb::Transcript::Formatters::JSONFormatter.new
320
+ formatter = YoutubeRb::Formatters::JSONFormatter.new
321
321
 
322
322
  # .format_transcript(transcript) turns the transcript into a JSON string.
323
323
  json_formatted = formatter.format_transcript(transcript)
@@ -334,7 +334,7 @@ Since `JSONFormatter` leverages `JSON.generate` you can also forward keyword arg
334
334
  `.format_transcript(transcript)` such as making your file output prettier:
335
335
 
336
336
  ```ruby
337
- json_formatted = YoutubeRb::Transcript::Formatters::JSONFormatter.new.format_transcript(
337
+ json_formatted = YoutubeRb::Formatters::JSONFormatter.new.format_transcript(
338
338
  transcript,
339
339
  indent: ' ',
340
340
  space: ' '
@@ -348,7 +348,7 @@ You can also use the `FormatterLoader` to dynamically load formatters by name:
348
348
  ```ruby
349
349
  require 'youtube_rb/transcript'
350
350
 
351
- loader = YoutubeRb::Transcript::Formatters::FormatterLoader.new
351
+ loader = YoutubeRb::Formatters::FormatterLoader.new
352
352
 
353
353
  # Load by type name: "json", "pretty", "text", "webvtt", "srt"
354
354
  formatter = loader.load("json")
@@ -364,7 +364,7 @@ You can implement your own formatter class. Just inherit from the `Formatter` ba
364
364
  `format_transcript` and `format_transcripts` methods which should ultimately return a string:
365
365
 
366
366
  ```ruby
367
- class MyCustomFormatter < YoutubeRb::Transcript::Formatters::Formatter
367
+ class MyCustomFormatter < YoutubeRb::Formatters::Formatter
368
368
  def format_transcript(transcript, **options)
369
369
  # Do your custom work in here, but return a string.
370
370
  'your processed output data as a string.'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "youtube_rb/transcript"
4
+ require_relative "youtube_rb/formatters"
@@ -0,0 +1,263 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module YoutubeRb
6
+ # Module containing all transcript formatters
7
+ module Formatters
8
+ # Base formatter class. All formatters should inherit from this class
9
+ # and implement their own format_transcript and format_transcripts methods.
10
+ class Formatter
11
+ # Format a single transcript
12
+ #
13
+ # @param transcript [FetchedTranscript] The transcript to format
14
+ # @param options [Hash] Additional formatting options
15
+ # @return [String] The formatted transcript
16
+ def format_transcript(transcript, **options)
17
+ raise NotImplementedError, "Subclass must implement #format_transcript"
18
+ end
19
+
20
+ # Format multiple transcripts
21
+ #
22
+ # @param transcripts [Array<FetchedTranscript>] The transcripts to format
23
+ # @param options [Hash] Additional formatting options
24
+ # @return [String] The formatted transcripts
25
+ def format_transcripts(transcripts, **options)
26
+ raise NotImplementedError, "Subclass must implement #format_transcripts"
27
+ end
28
+ end
29
+
30
+ # Formats transcript as pretty-printed Ruby data structures
31
+ class PrettyPrintFormatter < Formatter
32
+ # Format a single transcript as pretty-printed output
33
+ #
34
+ # @param transcript [FetchedTranscript] The transcript to format
35
+ # @param options [Hash] Options passed to PP.pp
36
+ # @return [String] Pretty-printed transcript data
37
+ def format_transcript(transcript, **options)
38
+ require "pp"
39
+ PP.pp(transcript.to_raw_data, +"", options[:width] || 79)
40
+ end
41
+
42
+ # Format multiple transcripts as pretty-printed output
43
+ #
44
+ # @param transcripts [Array<FetchedTranscript>] The transcripts to format
45
+ # @param options [Hash] Options passed to PP.pp
46
+ # @return [String] Pretty-printed transcripts data
47
+ def format_transcripts(transcripts, **options)
48
+ require "pp"
49
+ data = transcripts.map(&:to_raw_data)
50
+ PP.pp(data, +"", options[:width] || 79)
51
+ end
52
+ end
53
+
54
+ # Formats transcript as JSON
55
+ class JSONFormatter < Formatter
56
+ # Format a single transcript as JSON
57
+ #
58
+ # @param transcript [FetchedTranscript] The transcript to format
59
+ # @param options [Hash] Options passed to JSON.generate (e.g., :indent, :space)
60
+ # @return [String] JSON representation of the transcript
61
+ def format_transcript(transcript, **options)
62
+ JSON.generate(transcript.to_raw_data, options)
63
+ end
64
+
65
+ # Format multiple transcripts as JSON array
66
+ #
67
+ # @param transcripts [Array<FetchedTranscript>] The transcripts to format
68
+ # @param options [Hash] Options passed to JSON.generate
69
+ # @return [String] JSON array representation of the transcripts
70
+ def format_transcripts(transcripts, **options)
71
+ data = transcripts.map(&:to_raw_data)
72
+ JSON.generate(data, options)
73
+ end
74
+ end
75
+
76
+ # Formats transcript as plain text (text only, no timestamps)
77
+ class TextFormatter < Formatter
78
+ # Format a single transcript as plain text
79
+ #
80
+ # @param transcript [FetchedTranscript] The transcript to format
81
+ # @param options [Hash] Unused options
82
+ # @return [String] Plain text with each line separated by newlines
83
+ def format_transcript(transcript, **options)
84
+ transcript.map(&:text).join("\n")
85
+ end
86
+
87
+ # Format multiple transcripts as plain text
88
+ #
89
+ # @param transcripts [Array<FetchedTranscript>] The transcripts to format
90
+ # @param options [Hash] Unused options
91
+ # @return [String] Plain text with transcripts separated by triple newlines
92
+ def format_transcripts(transcripts, **options)
93
+ transcripts.map { |t| format_transcript(t, **options) }.join("\n\n\n")
94
+ end
95
+ end
96
+
97
+ # Base class for timestamp-based formatters (SRT, WebVTT)
98
+ class TextBasedFormatter < TextFormatter
99
+ # Format a single transcript with timestamps
100
+ #
101
+ # @param transcript [FetchedTranscript] The transcript to format
102
+ # @param options [Hash] Unused options
103
+ # @return [String] Formatted transcript with timestamps
104
+ def format_transcript(transcript, **options)
105
+ lines = []
106
+ snippets = transcript.to_a
107
+
108
+ snippets.each_with_index do |snippet, i|
109
+ end_time = snippet.start + snippet.duration
110
+
111
+ # Use next snippet's start time if it starts before current end time
112
+ end_time = snippets[i + 1].start if i < snippets.length - 1 && snippets[i + 1].start < end_time
113
+
114
+ time_text = "#{seconds_to_timestamp(snippet.start)} --> #{seconds_to_timestamp(end_time)}"
115
+ lines << format_transcript_helper(i, time_text, snippet)
116
+ end
117
+
118
+ format_transcript_header(lines)
119
+ end
120
+
121
+ protected
122
+
123
+ # Format a timestamp from components
124
+ #
125
+ # @param hours [Integer] Hours component
126
+ # @param mins [Integer] Minutes component
127
+ # @param secs [Integer] Seconds component
128
+ # @param ms [Integer] Milliseconds component
129
+ # @return [String] Formatted timestamp
130
+ def format_timestamp(hours, mins, secs, ms)
131
+ raise NotImplementedError, "Subclass must implement #format_timestamp"
132
+ end
133
+
134
+ # Format the transcript header/wrapper
135
+ #
136
+ # @param lines [Array<String>] The formatted lines
137
+ # @return [String] The complete formatted transcript
138
+ def format_transcript_header(lines)
139
+ raise NotImplementedError, "Subclass must implement #format_transcript_header"
140
+ end
141
+
142
+ # Format a single transcript entry
143
+ #
144
+ # @param index [Integer] The entry index (0-based)
145
+ # @param time_text [String] The formatted time range
146
+ # @param snippet [TranscriptSnippet] The snippet to format
147
+ # @return [String] The formatted entry
148
+ def format_transcript_helper(index, time_text, snippet)
149
+ raise NotImplementedError, "Subclass must implement #format_transcript_helper"
150
+ end
151
+
152
+ private
153
+
154
+ # Convert seconds to timestamp string
155
+ #
156
+ # @param time [Float] Time in seconds
157
+ # @return [String] Formatted timestamp
158
+ def seconds_to_timestamp(time)
159
+ time = time.to_f
160
+ hours, remainder = time.divmod(3600)
161
+ mins, secs_float = remainder.divmod(60)
162
+ secs = secs_float.to_i
163
+ ms = ((time - time.to_i) * 1000).round
164
+
165
+ format_timestamp(hours.to_i, mins.to_i, secs, ms)
166
+ end
167
+ end
168
+
169
+ # Formats transcript as SRT (SubRip) subtitle format
170
+ #
171
+ # @example SRT format
172
+ # 1
173
+ # 00:00:00,000 --> 00:00:02,500
174
+ # Hello world
175
+ #
176
+ # 2
177
+ # 00:00:02,500 --> 00:00:05,000
178
+ # This is a test
179
+ #
180
+ class SRTFormatter < TextBasedFormatter
181
+ protected
182
+
183
+ def format_timestamp(hours, mins, secs, ms)
184
+ format("%02d:%02d:%02d,%03d", hours, mins, secs, ms)
185
+ end
186
+
187
+ def format_transcript_header(lines)
188
+ "#{lines.join("\n\n")}\n"
189
+ end
190
+
191
+ def format_transcript_helper(index, time_text, snippet)
192
+ "#{index + 1}\n#{time_text}\n#{snippet.text}"
193
+ end
194
+ end
195
+
196
+ # Formats transcript as WebVTT (Web Video Text Tracks) format
197
+ #
198
+ # @example WebVTT format
199
+ # WEBVTT
200
+ #
201
+ # 00:00:00.000 --> 00:00:02.500
202
+ # Hello world
203
+ #
204
+ # 00:00:02.500 --> 00:00:05.000
205
+ # This is a test
206
+ #
207
+ class WebVTTFormatter < TextBasedFormatter
208
+ protected
209
+
210
+ def format_timestamp(hours, mins, secs, ms)
211
+ format("%02d:%02d:%02d.%03d", hours, mins, secs, ms)
212
+ end
213
+
214
+ def format_transcript_header(lines)
215
+ "WEBVTT\n\n#{lines.join("\n\n")}\n"
216
+ end
217
+
218
+ def format_transcript_helper(index, time_text, snippet)
219
+ "#{time_text}\n#{snippet.text}"
220
+ end
221
+ end
222
+
223
+ # Utility class to load formatters by type name
224
+ class FormatterLoader
225
+ # Mapping of format names to formatter classes
226
+ TYPES = {
227
+ "json" => JSONFormatter,
228
+ "pretty" => PrettyPrintFormatter,
229
+ "text" => TextFormatter,
230
+ "webvtt" => WebVTTFormatter,
231
+ "srt" => SRTFormatter
232
+ }.freeze
233
+
234
+ # Error raised when an unknown formatter type is requested
235
+ class UnknownFormatterType < StandardError
236
+ def initialize(formatter_type)
237
+ super(
238
+ "The format '#{formatter_type}' is not supported. " \
239
+ "Choose one of the following formats: #{TYPES.keys.join(', ')}"
240
+ )
241
+ end
242
+ end
243
+
244
+ # Load a formatter by type name
245
+ #
246
+ # @param formatter_type [String] The formatter type (json, pretty, text, webvtt, srt)
247
+ # @return [Formatter] An instance of the requested formatter
248
+ # @raise [UnknownFormatterType] If the formatter type is not supported
249
+ #
250
+ # @example
251
+ # loader = FormatterLoader.new
252
+ # formatter = loader.load("json")
253
+ # output = formatter.format_transcript(transcript)
254
+ #
255
+ def load(formatter_type = "pretty")
256
+ formatter_type = formatter_type.to_s
257
+ raise UnknownFormatterType, formatter_type unless TYPES.key?(formatter_type)
258
+
259
+ TYPES[formatter_type].new
260
+ end
261
+ end
262
+ end
263
+ end
@@ -113,17 +113,13 @@ module YoutubeRb
113
113
  results = {}
114
114
 
115
115
  video_ids.each do |video_id|
116
- begin
117
- transcript = fetch(video_id, languages: languages, preserve_formatting: preserve_formatting)
118
- results[video_id] = transcript
119
- yield(video_id, transcript) if block_given?
120
- rescue CouldNotRetrieveTranscript => e
121
- if continue_on_error
122
- yield(video_id, e) if block_given?
123
- else
124
- raise
125
- end
126
- end
116
+ transcript = fetch(video_id, languages: languages, preserve_formatting: preserve_formatting)
117
+ results[video_id] = transcript
118
+ yield(video_id, transcript) if block_given?
119
+ rescue CouldNotRetrieveTranscript => e
120
+ raise unless continue_on_error
121
+
122
+ yield(video_id, e) if block_given?
127
123
  end
128
124
 
129
125
  results
@@ -40,7 +40,7 @@ module YoutubeRb
40
40
  def github_referral
41
41
  "\n\nIf you are sure that the described cause is not responsible for this error " \
42
42
  "and that a transcript should be retrievable, please create an issue at " \
43
- "https://github.com/jdepoix/youtube-transcript-api/issues. " \
43
+ "https://github.com/stadia/youtube-transcript-rb/issues. " \
44
44
  "Please add which version of youtube_transcript_api you are using " \
45
45
  "and provide the information needed to replicate the error. " \
46
46
  "Also make sure that there are no open issues which already describe your problem!"
@@ -131,13 +131,13 @@ module YoutubeRb
131
131
  "2. (NOT RECOMMENDED) If you authenticate your requests using cookies, you " \
132
132
  "will be able to continue doing requests for a while. However, YouTube will " \
133
133
  "eventually permanently ban the account that you have used to authenticate " \
134
- "with! So only do this if you don't mind your account being banned!"
134
+ "with! So only do this if you don't mind your account being banned!".freeze
135
135
  end
136
136
 
137
137
  # Raised when YouTube blocks the IP specifically
138
138
  class IpBlocked < RequestBlocked
139
139
  CAUSE_MESSAGE = "#{RequestBlocked::BASE_CAUSE_MESSAGE}" \
140
- "Ways to work around this are using proxies or rotating residential IPs."
140
+ "Ways to work around this are using proxies or rotating residential IPs.".freeze
141
141
  end
142
142
 
143
143
  # Raised when too many requests are made (HTTP 429)
@@ -92,8 +92,8 @@ module YoutubeRb
92
92
 
93
93
  # Iterate over each snippet
94
94
  # @yield [TranscriptSnippet] each snippet in the transcript
95
- def each(&block)
96
- @snippets.each(&block)
95
+ def each(&)
96
+ @snippets.each(&)
97
97
  end
98
98
 
99
99
  # Get a snippet by index
@@ -69,11 +69,11 @@ module YoutubeRb
69
69
  #
70
70
  # @yield [TranscriptMetadata] each available transcript
71
71
  # @return [Enumerator] if no block given
72
- def each(&block)
72
+ def each(&)
73
73
  return to_enum(:each) unless block_given?
74
74
 
75
- @manually_created_transcripts.each_value(&block)
76
- @generated_transcripts.each_value(&block)
75
+ @manually_created_transcripts.each_value(&)
76
+ @generated_transcripts.each_value(&)
77
77
  end
78
78
 
79
79
  # Find a transcript for the given language codes.
@@ -56,10 +56,13 @@ module YoutubeRb
56
56
  innertube_data = fetch_innertube_data(video_id, api_key)
57
57
  extract_captions_json(innertube_data, video_id)
58
58
  rescue RequestBlocked => e
59
- retries = @proxy_config.nil? ? 0 : (@proxy_config.respond_to?(:retries_when_blocked) ? @proxy_config.retries_when_blocked : 0)
60
- if try_number + 1 < retries
61
- return fetch_captions_json(video_id, try_number: try_number + 1)
62
- end
59
+ retries = if @proxy_config.nil?
60
+ 0
61
+ else
62
+ (@proxy_config.respond_to?(:retries_when_blocked) ? @proxy_config.retries_when_blocked : 0)
63
+ end
64
+ return fetch_captions_json(video_id, try_number: try_number + 1) if try_number + 1 < retries
65
+
63
66
  raise e
64
67
  end
65
68
 
@@ -72,11 +75,10 @@ module YoutubeRb
72
75
  # @raise [YouTubeDataUnparsable] if the key cannot be found
73
76
  def extract_innertube_api_key(html, video_id)
74
77
  match = html.match(/"INNERTUBE_API_KEY":\s*"([a-zA-Z0-9_-]+)"/)
75
- if match && match[1]
76
- return match[1]
77
- end
78
+ return match[1] if match && match[1]
78
79
 
79
80
  raise IpBlocked, video_id if html.include?('class="g-recaptcha"')
81
+
80
82
  raise YouTubeDataUnparsable, video_id
81
83
  end
82
84
 
@@ -90,9 +92,7 @@ module YoutubeRb
90
92
  assert_playability(innertube_data["playabilityStatus"], video_id)
91
93
 
92
94
  captions_json = innertube_data.dig("captions", "playerCaptionsTracklistRenderer")
93
- if captions_json.nil? || !captions_json.key?("captionTracks")
94
- raise TranscriptsDisabled, video_id
95
- end
95
+ raise TranscriptsDisabled, video_id if captions_json.nil? || !captions_json.key?("captionTracks")
96
96
 
97
97
  captions_json
98
98
  end
@@ -119,9 +119,8 @@ module YoutubeRb
119
119
  end
120
120
 
121
121
  if status == PlayabilityStatus::ERROR && reason == PlayabilityFailedReason::VIDEO_UNAVAILABLE
122
- if video_id.start_with?("http://") || video_id.start_with?("https://")
123
- raise InvalidVideoId, video_id
124
- end
122
+ raise InvalidVideoId, video_id if video_id.start_with?("http://") || video_id.start_with?("https://")
123
+
125
124
  raise VideoUnavailable, video_id
126
125
  end
127
126
 
@@ -157,9 +156,7 @@ module YoutubeRb
157
156
  if html.include?('action="https://consent.youtube.com/s"')
158
157
  create_consent_cookie(html, video_id)
159
158
  html = fetch_html(video_id)
160
- if html.include?('action="https://consent.youtube.com/s"')
161
- raise FailedToCreateConsentCookie, video_id
162
- end
159
+ raise FailedToCreateConsentCookie, video_id if html.include?('action="https://consent.youtube.com/s"')
163
160
  end
164
161
 
165
162
  html
@@ -195,9 +192,9 @@ module YoutubeRb
195
192
  response = @http_client.post(url) do |req|
196
193
  req.headers["Content-Type"] = "application/json"
197
194
  req.body = JSON.generate({
198
- "context" => INNERTUBE_CONTEXT,
199
- "videoId" => video_id
200
- })
195
+ "context" => INNERTUBE_CONTEXT,
196
+ "videoId" => video_id
197
+ })
201
198
  end
202
199
 
203
200
  raise_http_errors(response, video_id)
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "transcript/version"
4
3
  require_relative "transcript/settings"
5
4
  require_relative "transcript/errors"
6
5
  require_relative "transcript/transcript_parser"
@@ -8,7 +7,6 @@ require_relative "transcript/transcript"
8
7
  require_relative "transcript/transcript_list"
9
8
  require_relative "transcript/transcript_list_fetcher"
10
9
  require_relative "transcript/api"
11
- require_relative "transcript/formatters"
12
10
 
13
11
  module YoutubeRb
14
12
  module Transcript