youtube-transcript-rb 0.1.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -0
  3. data/.rubocop_todo.yml +166 -0
  4. data/README.md +42 -42
  5. data/lib/youtube-transcript-rb.rb +4 -0
  6. data/lib/youtube_rb/formatters.rb +263 -0
  7. data/lib/youtube_rb/transcript/api.rb +144 -0
  8. data/lib/youtube_rb/transcript/errors.rb +215 -0
  9. data/lib/youtube_rb/transcript/settings.rb +26 -0
  10. data/lib/youtube_rb/transcript/transcript.rb +237 -0
  11. data/lib/youtube_rb/transcript/transcript_list.rb +168 -0
  12. data/lib/youtube_rb/transcript/transcript_list_fetcher.rb +220 -0
  13. data/lib/youtube_rb/transcript/transcript_parser.rb +81 -0
  14. data/lib/youtube_rb/transcript.rb +33 -0
  15. data/lib/youtube_rb/version.rb +5 -0
  16. data/sig/youtube_rb/transcript.rbs +4 -0
  17. data/spec/api_spec.rb +27 -27
  18. data/spec/errors_spec.rb +41 -41
  19. data/spec/formatters_spec.rb +45 -46
  20. data/spec/integration_spec.rb +39 -48
  21. data/spec/settings_spec.rb +16 -16
  22. data/spec/spec_helper.rb +52 -52
  23. data/spec/transcript_list_fetcher_spec.rb +38 -33
  24. data/spec/transcript_list_spec.rb +16 -19
  25. data/spec/transcript_parser_spec.rb +3 -3
  26. data/spec/transcript_spec.rb +23 -24
  27. metadata +17 -13
  28. data/lib/youtube/transcript/rb/api.rb +0 -150
  29. data/lib/youtube/transcript/rb/errors.rb +0 -217
  30. data/lib/youtube/transcript/rb/formatters.rb +0 -269
  31. data/lib/youtube/transcript/rb/settings.rb +0 -28
  32. data/lib/youtube/transcript/rb/transcript.rb +0 -239
  33. data/lib/youtube/transcript/rb/transcript_list.rb +0 -170
  34. data/lib/youtube/transcript/rb/transcript_list_fetcher.rb +0 -225
  35. data/lib/youtube/transcript/rb/transcript_parser.rb +0 -83
  36. data/lib/youtube/transcript/rb/version.rb +0 -9
  37. data/lib/youtube/transcript/rb.rb +0 -37
  38. data/sig/youtube/transcript/rb.rbs +0 -8
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ module YoutubeRb
4
+ module Transcript
5
+ # Represents a list of available transcripts for a YouTube video.
6
+ # This class is Enumerable, allowing iteration over all available transcripts.
7
+ # It provides functionality to search for transcripts in specific languages.
8
+ class TranscriptList
9
+ include Enumerable
10
+
11
+ # @return [String] the video ID this TranscriptList is for
12
+ attr_reader :video_id
13
+
14
+ # Build a TranscriptList from captions JSON data
15
+ #
16
+ # @param http_client [Faraday::Connection] the HTTP client for fetching transcripts
17
+ # @param video_id [String] the YouTube video ID
18
+ # @param captions_json [Hash] the captions JSON parsed from YouTube
19
+ # @return [TranscriptList] the created TranscriptList
20
+ def self.build(http_client:, video_id:, captions_json:)
21
+ translation_languages = (captions_json["translationLanguages"] || []).map do |tl|
22
+ TranslationLanguage.new(
23
+ language: tl.dig("languageName", "runs", 0, "text") || "",
24
+ language_code: tl["languageCode"]
25
+ )
26
+ end
27
+
28
+ manually_created_transcripts = {}
29
+ generated_transcripts = {}
30
+
31
+ (captions_json["captionTracks"] || []).each do |caption|
32
+ is_generated = caption.fetch("kind", "") == "asr"
33
+ target_dict = is_generated ? generated_transcripts : manually_created_transcripts
34
+
35
+ language_code = caption["languageCode"]
36
+ transcript_translation_languages = caption.fetch("isTranslatable", false) ? translation_languages : []
37
+
38
+ target_dict[language_code] = TranscriptMetadata.new(
39
+ http_client: http_client,
40
+ video_id: video_id,
41
+ url: caption["baseUrl"].to_s.gsub("&fmt=srv3", ""),
42
+ language: caption.dig("name", "runs", 0, "text") || "",
43
+ language_code: language_code,
44
+ is_generated: is_generated,
45
+ translation_languages: transcript_translation_languages
46
+ )
47
+ end
48
+
49
+ new(
50
+ video_id: video_id,
51
+ manually_created_transcripts: manually_created_transcripts,
52
+ generated_transcripts: generated_transcripts,
53
+ translation_languages: translation_languages
54
+ )
55
+ end
56
+
57
+ # @param video_id [String] the YouTube video ID
58
+ # @param manually_created_transcripts [Hash<String, TranscriptMetadata>] manually created transcripts by language code
59
+ # @param generated_transcripts [Hash<String, TranscriptMetadata>] auto-generated transcripts by language code
60
+ # @param translation_languages [Array<TranslationLanguage>] available translation languages
61
+ def initialize(video_id:, manually_created_transcripts:, generated_transcripts:, translation_languages:)
62
+ @video_id = video_id
63
+ @manually_created_transcripts = manually_created_transcripts
64
+ @generated_transcripts = generated_transcripts
65
+ @translation_languages = translation_languages
66
+ end
67
+
68
+ # Iterate over all transcripts (manually created first, then generated)
69
+ #
70
+ # @yield [TranscriptMetadata] each available transcript
71
+ # @return [Enumerator] if no block given
72
+ def each(&)
73
+ return to_enum(:each) unless block_given?
74
+
75
+ @manually_created_transcripts.each_value(&)
76
+ @generated_transcripts.each_value(&)
77
+ end
78
+
79
+ # Find a transcript for the given language codes.
80
+ # Manually created transcripts are preferred over generated ones.
81
+ #
82
+ # @param language_codes [Array<String>] language codes in descending priority
83
+ # @return [TranscriptMetadata] the found transcript
84
+ # @raise [NoTranscriptFound] if no transcript matches the requested languages
85
+ def find_transcript(language_codes)
86
+ find_transcript_in(
87
+ language_codes,
88
+ [@manually_created_transcripts, @generated_transcripts]
89
+ )
90
+ end
91
+
92
+ # Find an automatically generated transcript for the given language codes.
93
+ #
94
+ # @param language_codes [Array<String>] language codes in descending priority
95
+ # @return [TranscriptMetadata] the found transcript
96
+ # @raise [NoTranscriptFound] if no generated transcript matches
97
+ def find_generated_transcript(language_codes)
98
+ find_transcript_in(language_codes, [@generated_transcripts])
99
+ end
100
+
101
+ # Find a manually created transcript for the given language codes.
102
+ #
103
+ # @param language_codes [Array<String>] language codes in descending priority
104
+ # @return [TranscriptMetadata] the found transcript
105
+ # @raise [NoTranscriptFound] if no manually created transcript matches
106
+ def find_manually_created_transcript(language_codes)
107
+ find_transcript_in(language_codes, [@manually_created_transcripts])
108
+ end
109
+
110
+ # String representation of the transcript list
111
+ #
112
+ # @return [String] human-readable description of available transcripts
113
+ def to_s
114
+ <<~DESC
115
+ For this video (#{@video_id}) transcripts are available in the following languages:
116
+
117
+ (MANUALLY CREATED)
118
+ #{format_language_list(@manually_created_transcripts.values)}
119
+
120
+ (GENERATED)
121
+ #{format_language_list(@generated_transcripts.values)}
122
+
123
+ (TRANSLATION LANGUAGES)
124
+ #{format_translation_languages}
125
+ DESC
126
+ end
127
+
128
+ private
129
+
130
+ # Find a transcript from the given dictionaries
131
+ #
132
+ # @param language_codes [Array<String>] language codes to search for
133
+ # @param transcript_dicts [Array<Hash>] transcript dictionaries to search
134
+ # @return [TranscriptMetadata] the found transcript
135
+ # @raise [NoTranscriptFound] if no transcript matches
136
+ def find_transcript_in(language_codes, transcript_dicts)
137
+ language_codes.each do |language_code|
138
+ transcript_dicts.each do |dict|
139
+ return dict[language_code] if dict.key?(language_code)
140
+ end
141
+ end
142
+
143
+ raise NoTranscriptFound.new(@video_id, language_codes, self)
144
+ end
145
+
146
+ # Format a list of transcripts for display
147
+ #
148
+ # @param transcripts [Array<TranscriptMetadata>] transcripts to format
149
+ # @return [String] formatted list or "None"
150
+ def format_language_list(transcripts)
151
+ return "None" if transcripts.empty?
152
+
153
+ transcripts.map { |t| " - #{t}" }.join("\n")
154
+ end
155
+
156
+ # Format translation languages for display
157
+ #
158
+ # @return [String] formatted list or "None"
159
+ def format_translation_languages
160
+ return "None" if @translation_languages.empty?
161
+
162
+ @translation_languages.map do |tl|
163
+ " - #{tl.language_code} (\"#{tl.language}\")"
164
+ end.join("\n")
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,220 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cgi"
4
+ require "json"
5
+
6
+ module YoutubeRb
7
+ module Transcript
8
+ # Playability status values returned by YouTube
9
+ module PlayabilityStatus
10
+ OK = "OK"
11
+ ERROR = "ERROR"
12
+ LOGIN_REQUIRED = "LOGIN_REQUIRED"
13
+ end
14
+
15
+ # Reason messages for playability failures
16
+ module PlayabilityFailedReason
17
+ BOT_DETECTED = "Sign in to confirm you're not a bot"
18
+ AGE_RESTRICTED = "This video may be inappropriate for some users."
19
+ VIDEO_UNAVAILABLE = "This video is unavailable"
20
+ end
21
+
22
+ # Fetches transcript lists from YouTube videos.
23
+ # This class handles all the HTTP communication with YouTube,
24
+ # including consent cookie handling and error detection.
25
+ class TranscriptListFetcher
26
+ # @param http_client [Faraday::Connection] the HTTP client to use
27
+ # @param proxy_config [Object, nil] optional proxy configuration
28
+ def initialize(http_client:, proxy_config: nil)
29
+ @http_client = http_client
30
+ @proxy_config = proxy_config
31
+ end
32
+
33
+ # Fetch the transcript list for a video
34
+ #
35
+ # @param video_id [String] the YouTube video ID
36
+ # @return [TranscriptList] the list of available transcripts
37
+ # @raise [CouldNotRetrieveTranscript] if transcripts cannot be retrieved
38
+ def fetch(video_id)
39
+ TranscriptList.build(
40
+ http_client: @http_client,
41
+ video_id: video_id,
42
+ captions_json: fetch_captions_json(video_id)
43
+ )
44
+ end
45
+
46
+ private
47
+
48
+ # Fetch captions JSON with retry support
49
+ #
50
+ # @param video_id [String] the YouTube video ID
51
+ # @param try_number [Integer] current retry attempt
52
+ # @return [Hash] the captions JSON
53
+ def fetch_captions_json(video_id, try_number: 0)
54
+ html = fetch_video_html(video_id)
55
+ api_key = extract_innertube_api_key(html, video_id)
56
+ innertube_data = fetch_innertube_data(video_id, api_key)
57
+ extract_captions_json(innertube_data, video_id)
58
+ rescue RequestBlocked => e
59
+ retries = if @proxy_config.nil?
60
+ 0
61
+ else
62
+ (@proxy_config.respond_to?(:retries_when_blocked) ? @proxy_config.retries_when_blocked : 0)
63
+ end
64
+ return fetch_captions_json(video_id, try_number: try_number + 1) if try_number + 1 < retries
65
+
66
+ raise e
67
+ end
68
+
69
+ # Extract the INNERTUBE_API_KEY from the video page HTML
70
+ #
71
+ # @param html [String] the HTML content
72
+ # @param video_id [String] the video ID (for error messages)
73
+ # @return [String] the API key
74
+ # @raise [IpBlocked] if a CAPTCHA is detected
75
+ # @raise [YouTubeDataUnparsable] if the key cannot be found
76
+ def extract_innertube_api_key(html, video_id)
77
+ match = html.match(/"INNERTUBE_API_KEY":\s*"([a-zA-Z0-9_-]+)"/)
78
+ return match[1] if match && match[1]
79
+
80
+ raise IpBlocked, video_id if html.include?('class="g-recaptcha"')
81
+
82
+ raise YouTubeDataUnparsable, video_id
83
+ end
84
+
85
+ # Extract captions JSON from innertube data
86
+ #
87
+ # @param innertube_data [Hash] the innertube API response
88
+ # @param video_id [String] the video ID
89
+ # @return [Hash] the captions JSON
90
+ # @raise [TranscriptsDisabled] if no captions are available
91
+ def extract_captions_json(innertube_data, video_id)
92
+ assert_playability(innertube_data["playabilityStatus"], video_id)
93
+
94
+ captions_json = innertube_data.dig("captions", "playerCaptionsTracklistRenderer")
95
+ raise TranscriptsDisabled, video_id if captions_json.nil? || !captions_json.key?("captionTracks")
96
+
97
+ captions_json
98
+ end
99
+
100
+ # Assert that the video is playable
101
+ #
102
+ # @param playability_status_data [Hash, nil] the playability status from API
103
+ # @param video_id [String] the video ID
104
+ # @raise [Various] depending on the playability status
105
+ def assert_playability(playability_status_data, video_id)
106
+ return if playability_status_data.nil?
107
+
108
+ status = playability_status_data["status"]
109
+ return if status == PlayabilityStatus::OK || status.nil?
110
+
111
+ reason = playability_status_data["reason"]
112
+
113
+ if status == PlayabilityStatus::LOGIN_REQUIRED
114
+ if reason == PlayabilityFailedReason::BOT_DETECTED
115
+ raise RequestBlocked, video_id
116
+ elsif reason == PlayabilityFailedReason::AGE_RESTRICTED
117
+ raise AgeRestricted, video_id
118
+ end
119
+ end
120
+
121
+ if status == PlayabilityStatus::ERROR && reason == PlayabilityFailedReason::VIDEO_UNAVAILABLE
122
+ raise InvalidVideoId, video_id if video_id.start_with?("http://") || video_id.start_with?("https://")
123
+
124
+ raise VideoUnavailable, video_id
125
+ end
126
+
127
+ # Extract subreasons for more detailed error messages
128
+ subreasons = playability_status_data.dig("errorScreen", "playerErrorMessageRenderer", "subreason", "runs") || []
129
+ subreason_texts = subreasons.map { |run| run["text"] || "" }
130
+
131
+ raise VideoUnplayable.new(video_id, reason, subreason_texts)
132
+ end
133
+
134
+ # Create a consent cookie from the HTML
135
+ #
136
+ # @param html [String] the HTML content
137
+ # @param video_id [String] the video ID
138
+ # @raise [FailedToCreateConsentCookie] if the cookie cannot be created
139
+ def create_consent_cookie(html, video_id)
140
+ match = html.match(/name="v" value="(.*?)"/)
141
+ raise FailedToCreateConsentCookie, video_id if match.nil?
142
+
143
+ # Set the consent cookie
144
+ # Note: Faraday doesn't have built-in cookie management like requests.Session
145
+ # We'll need to handle this via headers or middleware
146
+ @consent_value = "YES+#{match[1]}"
147
+ end
148
+
149
+ # Fetch the video HTML page
150
+ #
151
+ # @param video_id [String] the video ID
152
+ # @return [String] the HTML content
153
+ def fetch_video_html(video_id)
154
+ html = fetch_html(video_id)
155
+
156
+ if html.include?('action="https://consent.youtube.com/s"')
157
+ create_consent_cookie(html, video_id)
158
+ html = fetch_html(video_id)
159
+ raise FailedToCreateConsentCookie, video_id if html.include?('action="https://consent.youtube.com/s"')
160
+ end
161
+
162
+ html
163
+ end
164
+
165
+ # Fetch raw HTML from YouTube
166
+ #
167
+ # @param video_id [String] the video ID
168
+ # @return [String] the HTML content (unescaped)
169
+ def fetch_html(video_id)
170
+ url = format(WATCH_URL, video_id: video_id)
171
+ headers = { "Accept-Language" => "en-US" }
172
+
173
+ # Add consent cookie if we have one
174
+ headers["Cookie"] = "CONSENT=#{@consent_value}" if @consent_value
175
+
176
+ response = @http_client.get(url) do |req|
177
+ headers.each { |k, v| req.headers[k] = v }
178
+ end
179
+
180
+ raise_http_errors(response, video_id)
181
+ CGI.unescapeHTML(response.body)
182
+ end
183
+
184
+ # Fetch data from the Innertube API
185
+ #
186
+ # @param video_id [String] the video ID
187
+ # @param api_key [String] the API key
188
+ # @return [Hash] the API response
189
+ def fetch_innertube_data(video_id, api_key)
190
+ url = format(INNERTUBE_API_URL, api_key: api_key)
191
+
192
+ response = @http_client.post(url) do |req|
193
+ req.headers["Content-Type"] = "application/json"
194
+ req.body = JSON.generate({
195
+ "context" => INNERTUBE_CONTEXT,
196
+ "videoId" => video_id
197
+ })
198
+ end
199
+
200
+ raise_http_errors(response, video_id)
201
+ JSON.parse(response.body)
202
+ end
203
+
204
+ # Raise appropriate errors for HTTP responses
205
+ #
206
+ # @param response [Faraday::Response] the HTTP response
207
+ # @param video_id [String] the video ID
208
+ # @raise [IpBlocked] for 429 responses
209
+ # @raise [YouTubeRequestFailed] for other error responses
210
+ def raise_http_errors(response, video_id)
211
+ case response.status
212
+ when 429
213
+ raise IpBlocked, video_id
214
+ when 400..599
215
+ raise YouTubeRequestFailed.new(video_id, StandardError.new("HTTP #{response.status}"))
216
+ end
217
+ end
218
+ end
219
+ end
220
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+ require "cgi"
5
+
6
+ module YoutubeRb
7
+ module Transcript
8
+ # Parses XML transcript data from YouTube
9
+ class TranscriptParser
10
+ # HTML formatting tags to preserve when preserve_formatting is enabled
11
+ FORMATTING_TAGS = %w[
12
+ strong
13
+ em
14
+ b
15
+ i
16
+ mark
17
+ small
18
+ del
19
+ ins
20
+ sub
21
+ sup
22
+ ].freeze
23
+
24
+ # @param preserve_formatting [Boolean] whether to preserve HTML formatting tags
25
+ def initialize(preserve_formatting: false)
26
+ @preserve_formatting = preserve_formatting
27
+ @html_regex = build_html_regex
28
+ end
29
+
30
+ # Parse XML transcript data into TranscriptSnippet objects
31
+ # @param raw_data [String] the raw XML data from YouTube
32
+ # @return [Array<TranscriptSnippet>] parsed transcript snippets
33
+ def parse(raw_data)
34
+ doc = Nokogiri::XML(raw_data)
35
+ snippets = []
36
+
37
+ doc.xpath("//text").each do |element|
38
+ text_content = element.text
39
+ next if text_content.nil? || text_content.empty?
40
+
41
+ # Unescape HTML entities and remove unwanted HTML tags
42
+ text = process_text(text_content)
43
+
44
+ snippets << TranscriptSnippet.new(
45
+ text: text,
46
+ start: element["start"].to_f,
47
+ duration: (element["dur"] || "0.0").to_f
48
+ )
49
+ end
50
+
51
+ snippets
52
+ end
53
+
54
+ private
55
+
56
+ # Build regex for removing HTML tags
57
+ # @return [Regexp]
58
+ def build_html_regex
59
+ if @preserve_formatting
60
+ # Remove all tags except formatting tags
61
+ formats_pattern = FORMATTING_TAGS.join("|")
62
+ # Match tags that are NOT the formatting tags
63
+ Regexp.new("</?(?!/?(?:#{formats_pattern})\\b)[^>]*>", Regexp::IGNORECASE)
64
+ else
65
+ # Remove all HTML tags
66
+ Regexp.new("<[^>]*>", Regexp::IGNORECASE)
67
+ end
68
+ end
69
+
70
+ # Process text by unescaping HTML entities and removing unwanted tags
71
+ # @param text [String] the raw text
72
+ # @return [String] processed text
73
+ def process_text(text)
74
+ # Unescape HTML entities
75
+ unescaped = CGI.unescapeHTML(text)
76
+ # Remove unwanted HTML tags
77
+ unescaped.gsub(@html_regex, "")
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "transcript/settings"
4
+ require_relative "transcript/errors"
5
+ require_relative "transcript/transcript_parser"
6
+ require_relative "transcript/transcript"
7
+ require_relative "transcript/transcript_list"
8
+ require_relative "transcript/transcript_list_fetcher"
9
+ require_relative "transcript/api"
10
+
11
+ module YoutubeRb
12
+ module Transcript
13
+ class << self
14
+ # Convenience method to fetch a transcript
15
+ # @param video_id [String] YouTube video ID
16
+ # @param languages [Array<String>] Language codes in order of preference
17
+ # @param preserve_formatting [Boolean] Whether to preserve HTML formatting
18
+ # @return [FetchedTranscript] The fetched transcript
19
+ def fetch(video_id, languages: ["en"], preserve_formatting: false)
20
+ api = YouTubeTranscriptApi.new
21
+ api.fetch(video_id, languages: languages, preserve_formatting: preserve_formatting)
22
+ end
23
+
24
+ # Convenience method to list available transcripts
25
+ # @param video_id [String] YouTube video ID
26
+ # @return [TranscriptList] List of available transcripts
27
+ def list(video_id)
28
+ api = YouTubeTranscriptApi.new
29
+ api.list(video_id)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module YoutubeRb
4
+ VERSION = "0.2.3"
5
+ end
@@ -0,0 +1,4 @@
1
+ module YoutubeRb
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
data/spec/api_spec.rb CHANGED
@@ -3,7 +3,7 @@
3
3
  require "spec_helper"
4
4
  require "webmock/rspec"
5
5
 
6
- RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
6
+ RSpec.describe YoutubeRb::Transcript::YouTubeTranscriptApi do
7
7
  let(:api) { described_class.new }
8
8
  let(:video_id) { "dQw4w9WgXcQ" }
9
9
  let(:api_key) { "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" }
@@ -84,7 +84,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
84
84
 
85
85
  it "creates a TranscriptListFetcher" do
86
86
  api = described_class.new
87
- expect(api.instance_variable_get(:@fetcher)).to be_a(Youtube::Transcript::Rb::TranscriptListFetcher)
87
+ expect(api.instance_variable_get(:@fetcher)).to be_a(YoutubeRb::Transcript::TranscriptListFetcher)
88
88
  end
89
89
  end
90
90
 
@@ -102,7 +102,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
102
102
 
103
103
  it "returns a FetchedTranscript" do
104
104
  result = api.fetch(video_id)
105
- expect(result).to be_a(Youtube::Transcript::Rb::FetchedTranscript)
105
+ expect(result).to be_a(YoutubeRb::Transcript::FetchedTranscript)
106
106
  end
107
107
 
108
108
  it "fetches the transcript with correct video_id" do
@@ -126,19 +126,19 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
126
126
  stub_request(:get, "https://www.youtube.com/api/timedtext?v=#{video_id}&lang=es")
127
127
  .to_return(status: 200, body: sample_transcript_xml)
128
128
 
129
- result = api.fetch(video_id, languages: ["es", "en"])
129
+ result = api.fetch(video_id, languages: %w[es en])
130
130
  expect(result.language_code).to eq("es")
131
131
  end
132
132
 
133
133
  it "falls back to next language if first not available" do
134
- result = api.fetch(video_id, languages: ["ja", "en"])
134
+ result = api.fetch(video_id, languages: %w[ja en])
135
135
  expect(result.language_code).to eq("en")
136
136
  end
137
137
 
138
138
  it "raises NoTranscriptFound when no language matches" do
139
- expect {
140
- api.fetch(video_id, languages: ["ja", "ko", "zh"])
141
- }.to raise_error(Youtube::Transcript::Rb::NoTranscriptFound)
139
+ expect do
140
+ api.fetch(video_id, languages: %w[ja ko zh])
141
+ end.to raise_error(YoutubeRb::Transcript::NoTranscriptFound)
142
142
  end
143
143
 
144
144
  context "with preserve_formatting option" do
@@ -182,7 +182,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
182
182
 
183
183
  it "returns a TranscriptList" do
184
184
  result = api.list(video_id)
185
- expect(result).to be_a(Youtube::Transcript::Rb::TranscriptList)
185
+ expect(result).to be_a(YoutubeRb::Transcript::TranscriptList)
186
186
  end
187
187
 
188
188
  it "returns a list with the correct video_id" do
@@ -213,7 +213,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
213
213
  end
214
214
 
215
215
  it "raises VideoUnavailable error" do
216
- expect { api.list(video_id) }.to raise_error(Youtube::Transcript::Rb::VideoUnavailable)
216
+ expect { api.list(video_id) }.to raise_error(YoutubeRb::Transcript::VideoUnavailable)
217
217
  end
218
218
  end
219
219
 
@@ -227,13 +227,13 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
227
227
  end
228
228
 
229
229
  it "raises TranscriptsDisabled error" do
230
- expect { api.list(video_id) }.to raise_error(Youtube::Transcript::Rb::TranscriptsDisabled)
230
+ expect { api.list(video_id) }.to raise_error(YoutubeRb::Transcript::TranscriptsDisabled)
231
231
  end
232
232
  end
233
233
  end
234
234
 
235
235
  describe "#fetch_all" do
236
- let(:video_ids) { ["video1", "video2", "video3"] }
236
+ let(:video_ids) { %w[video1 video2 video3] }
237
237
 
238
238
  before do
239
239
  video_ids.each do |vid|
@@ -266,20 +266,20 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
266
266
  it "returns a hash of transcripts" do
267
267
  results = api.fetch_all(video_ids)
268
268
  expect(results).to be_a(Hash)
269
- expect(results.keys).to contain_exactly(*video_ids)
269
+ expect(results.keys).to match_array(video_ids)
270
270
  end
271
271
 
272
272
  it "fetches all video transcripts" do
273
273
  results = api.fetch_all(video_ids)
274
274
  results.each do |vid, transcript|
275
- expect(transcript).to be_a(Youtube::Transcript::Rb::FetchedTranscript)
275
+ expect(transcript).to be_a(YoutubeRb::Transcript::FetchedTranscript)
276
276
  expect(transcript.video_id).to eq(vid)
277
277
  end
278
278
  end
279
279
 
280
280
  it "respects language preference" do
281
281
  results = api.fetch_all(video_ids, languages: ["en"])
282
- results.each do |_, transcript|
282
+ results.each_value do |transcript|
283
283
  expect(transcript.language_code).to eq("en")
284
284
  end
285
285
  end
@@ -292,7 +292,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
292
292
  expect(yielded.length).to eq(3)
293
293
  yielded.each do |vid, klass|
294
294
  expect(video_ids).to include(vid)
295
- expect(klass).to eq(Youtube::Transcript::Rb::FetchedTranscript)
295
+ expect(klass).to eq(YoutubeRb::Transcript::FetchedTranscript)
296
296
  end
297
297
  end
298
298
 
@@ -316,7 +316,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
316
316
  end
317
317
 
318
318
  it "raises error by default" do
319
- expect { api.fetch_all(failing_video_ids) }.to raise_error(Youtube::Transcript::Rb::VideoUnavailable)
319
+ expect { api.fetch_all(failing_video_ids) }.to raise_error(YoutubeRb::Transcript::VideoUnavailable)
320
320
  end
321
321
 
322
322
  it "continues on error when configured" do
@@ -332,7 +332,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
332
332
  end
333
333
  expect(errors.length).to eq(1)
334
334
  expect(errors.first[0]).to eq("fail_video")
335
- expect(errors.first[1]).to be_a(Youtube::Transcript::Rb::VideoUnavailable)
335
+ expect(errors.first[1]).to be_a(YoutubeRb::Transcript::VideoUnavailable)
336
336
  end
337
337
  end
338
338
 
@@ -356,27 +356,27 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
356
356
  .to_return(status: 200, body: sample_transcript_xml)
357
357
  end
358
358
 
359
- describe "Youtube::Transcript::Rb.fetch" do
359
+ describe "YoutubeRb::Transcript.fetch" do
360
360
  it "fetches a transcript" do
361
- result = Youtube::Transcript::Rb.fetch(video_id)
362
- expect(result).to be_a(Youtube::Transcript::Rb::FetchedTranscript)
361
+ result = YoutubeRb::Transcript.fetch(video_id)
362
+ expect(result).to be_a(YoutubeRb::Transcript::FetchedTranscript)
363
363
  end
364
364
 
365
365
  it "accepts language option" do
366
- result = Youtube::Transcript::Rb.fetch(video_id, languages: ["en"])
366
+ result = YoutubeRb::Transcript.fetch(video_id, languages: ["en"])
367
367
  expect(result.language_code).to eq("en")
368
368
  end
369
369
 
370
370
  it "accepts preserve_formatting option" do
371
- result = Youtube::Transcript::Rb.fetch(video_id, preserve_formatting: false)
372
- expect(result).to be_a(Youtube::Transcript::Rb::FetchedTranscript)
371
+ result = YoutubeRb::Transcript.fetch(video_id, preserve_formatting: false)
372
+ expect(result).to be_a(YoutubeRb::Transcript::FetchedTranscript)
373
373
  end
374
374
  end
375
375
 
376
- describe "Youtube::Transcript::Rb.list" do
376
+ describe "YoutubeRb::Transcript.list" do
377
377
  it "lists available transcripts" do
378
- result = Youtube::Transcript::Rb.list(video_id)
379
- expect(result).to be_a(Youtube::Transcript::Rb::TranscriptList)
378
+ result = YoutubeRb::Transcript.list(video_id)
379
+ expect(result).to be_a(YoutubeRb::Transcript::TranscriptList)
380
380
  end
381
381
  end
382
382
  end