youtube-transcript-rb 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of youtube-transcript-rb might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +42 -42
- data/lib/youtube-transcript-rb.rb +3 -0
- data/lib/youtube_rb/transcript/api.rb +148 -0
- data/lib/youtube_rb/transcript/errors.rb +215 -0
- data/lib/youtube_rb/transcript/formatters.rb +267 -0
- data/lib/youtube_rb/transcript/settings.rb +26 -0
- data/lib/youtube_rb/transcript/transcript.rb +237 -0
- data/lib/youtube_rb/transcript/transcript_list.rb +168 -0
- data/lib/youtube_rb/transcript/transcript_list_fetcher.rb +223 -0
- data/lib/youtube_rb/transcript/transcript_parser.rb +81 -0
- data/lib/{youtube/transcript/rb → youtube_rb/transcript}/version.rb +2 -4
- data/lib/youtube_rb/transcript.rb +35 -0
- data/sig/youtube_rb/transcript.rbs +6 -0
- data/spec/api_spec.rb +20 -20
- data/spec/errors_spec.rb +39 -39
- data/spec/formatters_spec.rb +36 -36
- data/spec/integration_spec.rb +32 -32
- data/spec/settings_spec.rb +16 -16
- data/spec/spec_helper.rb +1 -1
- data/spec/transcript_list_fetcher_spec.rb +27 -27
- data/spec/transcript_list_spec.rb +6 -6
- data/spec/transcript_parser_spec.rb +3 -3
- data/spec/transcript_spec.rb +16 -16
- metadata +13 -12
- data/lib/youtube/transcript/rb/api.rb +0 -150
- data/lib/youtube/transcript/rb/errors.rb +0 -217
- data/lib/youtube/transcript/rb/formatters.rb +0 -269
- data/lib/youtube/transcript/rb/settings.rb +0 -28
- data/lib/youtube/transcript/rb/transcript.rb +0 -239
- data/lib/youtube/transcript/rb/transcript_list.rb +0 -170
- data/lib/youtube/transcript/rb/transcript_list_fetcher.rb +0 -225
- data/lib/youtube/transcript/rb/transcript_parser.rb +0 -83
- data/lib/youtube/transcript/rb.rb +0 -37
- data/sig/youtube/transcript/rb.rbs +0 -8
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "cgi"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module YoutubeRb
|
|
7
|
+
module Transcript
|
|
8
|
+
# Playability status values returned by YouTube
|
|
9
|
+
module PlayabilityStatus
|
|
10
|
+
OK = "OK"
|
|
11
|
+
ERROR = "ERROR"
|
|
12
|
+
LOGIN_REQUIRED = "LOGIN_REQUIRED"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Reason messages for playability failures
|
|
16
|
+
module PlayabilityFailedReason
|
|
17
|
+
BOT_DETECTED = "Sign in to confirm you're not a bot"
|
|
18
|
+
AGE_RESTRICTED = "This video may be inappropriate for some users."
|
|
19
|
+
VIDEO_UNAVAILABLE = "This video is unavailable"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Fetches transcript lists from YouTube videos.
|
|
23
|
+
# This class handles all the HTTP communication with YouTube,
|
|
24
|
+
# including consent cookie handling and error detection.
|
|
25
|
+
class TranscriptListFetcher
|
|
26
|
+
# @param http_client [Faraday::Connection] the HTTP client to use
|
|
27
|
+
# @param proxy_config [Object, nil] optional proxy configuration
|
|
28
|
+
def initialize(http_client:, proxy_config: nil)
|
|
29
|
+
@http_client = http_client
|
|
30
|
+
@proxy_config = proxy_config
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Fetch the transcript list for a video
|
|
34
|
+
#
|
|
35
|
+
# @param video_id [String] the YouTube video ID
|
|
36
|
+
# @return [TranscriptList] the list of available transcripts
|
|
37
|
+
# @raise [CouldNotRetrieveTranscript] if transcripts cannot be retrieved
|
|
38
|
+
def fetch(video_id)
|
|
39
|
+
TranscriptList.build(
|
|
40
|
+
http_client: @http_client,
|
|
41
|
+
video_id: video_id,
|
|
42
|
+
captions_json: fetch_captions_json(video_id)
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
# Fetch captions JSON with retry support
|
|
49
|
+
#
|
|
50
|
+
# @param video_id [String] the YouTube video ID
|
|
51
|
+
# @param try_number [Integer] current retry attempt
|
|
52
|
+
# @return [Hash] the captions JSON
|
|
53
|
+
def fetch_captions_json(video_id, try_number: 0)
|
|
54
|
+
html = fetch_video_html(video_id)
|
|
55
|
+
api_key = extract_innertube_api_key(html, video_id)
|
|
56
|
+
innertube_data = fetch_innertube_data(video_id, api_key)
|
|
57
|
+
extract_captions_json(innertube_data, video_id)
|
|
58
|
+
rescue RequestBlocked => e
|
|
59
|
+
retries = @proxy_config.nil? ? 0 : (@proxy_config.respond_to?(:retries_when_blocked) ? @proxy_config.retries_when_blocked : 0)
|
|
60
|
+
if try_number + 1 < retries
|
|
61
|
+
return fetch_captions_json(video_id, try_number: try_number + 1)
|
|
62
|
+
end
|
|
63
|
+
raise e
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Extract the INNERTUBE_API_KEY from the video page HTML
|
|
67
|
+
#
|
|
68
|
+
# @param html [String] the HTML content
|
|
69
|
+
# @param video_id [String] the video ID (for error messages)
|
|
70
|
+
# @return [String] the API key
|
|
71
|
+
# @raise [IpBlocked] if a CAPTCHA is detected
|
|
72
|
+
# @raise [YouTubeDataUnparsable] if the key cannot be found
|
|
73
|
+
def extract_innertube_api_key(html, video_id)
|
|
74
|
+
match = html.match(/"INNERTUBE_API_KEY":\s*"([a-zA-Z0-9_-]+)"/)
|
|
75
|
+
if match && match[1]
|
|
76
|
+
return match[1]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
raise IpBlocked, video_id if html.include?('class="g-recaptcha"')
|
|
80
|
+
raise YouTubeDataUnparsable, video_id
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Extract captions JSON from innertube data
|
|
84
|
+
#
|
|
85
|
+
# @param innertube_data [Hash] the innertube API response
|
|
86
|
+
# @param video_id [String] the video ID
|
|
87
|
+
# @return [Hash] the captions JSON
|
|
88
|
+
# @raise [TranscriptsDisabled] if no captions are available
|
|
89
|
+
def extract_captions_json(innertube_data, video_id)
|
|
90
|
+
assert_playability(innertube_data["playabilityStatus"], video_id)
|
|
91
|
+
|
|
92
|
+
captions_json = innertube_data.dig("captions", "playerCaptionsTracklistRenderer")
|
|
93
|
+
if captions_json.nil? || !captions_json.key?("captionTracks")
|
|
94
|
+
raise TranscriptsDisabled, video_id
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
captions_json
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Assert that the video is playable
|
|
101
|
+
#
|
|
102
|
+
# @param playability_status_data [Hash, nil] the playability status from API
|
|
103
|
+
# @param video_id [String] the video ID
|
|
104
|
+
# @raise [Various] depending on the playability status
|
|
105
|
+
def assert_playability(playability_status_data, video_id)
|
|
106
|
+
return if playability_status_data.nil?
|
|
107
|
+
|
|
108
|
+
status = playability_status_data["status"]
|
|
109
|
+
return if status == PlayabilityStatus::OK || status.nil?
|
|
110
|
+
|
|
111
|
+
reason = playability_status_data["reason"]
|
|
112
|
+
|
|
113
|
+
if status == PlayabilityStatus::LOGIN_REQUIRED
|
|
114
|
+
if reason == PlayabilityFailedReason::BOT_DETECTED
|
|
115
|
+
raise RequestBlocked, video_id
|
|
116
|
+
elsif reason == PlayabilityFailedReason::AGE_RESTRICTED
|
|
117
|
+
raise AgeRestricted, video_id
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
if status == PlayabilityStatus::ERROR && reason == PlayabilityFailedReason::VIDEO_UNAVAILABLE
|
|
122
|
+
if video_id.start_with?("http://") || video_id.start_with?("https://")
|
|
123
|
+
raise InvalidVideoId, video_id
|
|
124
|
+
end
|
|
125
|
+
raise VideoUnavailable, video_id
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Extract subreasons for more detailed error messages
|
|
129
|
+
subreasons = playability_status_data.dig("errorScreen", "playerErrorMessageRenderer", "subreason", "runs") || []
|
|
130
|
+
subreason_texts = subreasons.map { |run| run["text"] || "" }
|
|
131
|
+
|
|
132
|
+
raise VideoUnplayable.new(video_id, reason, subreason_texts)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Create a consent cookie from the HTML
|
|
136
|
+
#
|
|
137
|
+
# @param html [String] the HTML content
|
|
138
|
+
# @param video_id [String] the video ID
|
|
139
|
+
# @raise [FailedToCreateConsentCookie] if the cookie cannot be created
|
|
140
|
+
def create_consent_cookie(html, video_id)
|
|
141
|
+
match = html.match(/name="v" value="(.*?)"/)
|
|
142
|
+
raise FailedToCreateConsentCookie, video_id if match.nil?
|
|
143
|
+
|
|
144
|
+
# Set the consent cookie
|
|
145
|
+
# Note: Faraday doesn't have built-in cookie management like requests.Session
|
|
146
|
+
# We'll need to handle this via headers or middleware
|
|
147
|
+
@consent_value = "YES+#{match[1]}"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Fetch the video HTML page
|
|
151
|
+
#
|
|
152
|
+
# @param video_id [String] the video ID
|
|
153
|
+
# @return [String] the HTML content
|
|
154
|
+
def fetch_video_html(video_id)
|
|
155
|
+
html = fetch_html(video_id)
|
|
156
|
+
|
|
157
|
+
if html.include?('action="https://consent.youtube.com/s"')
|
|
158
|
+
create_consent_cookie(html, video_id)
|
|
159
|
+
html = fetch_html(video_id)
|
|
160
|
+
if html.include?('action="https://consent.youtube.com/s"')
|
|
161
|
+
raise FailedToCreateConsentCookie, video_id
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
html
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Fetch raw HTML from YouTube
|
|
169
|
+
#
|
|
170
|
+
# @param video_id [String] the video ID
|
|
171
|
+
# @return [String] the HTML content (unescaped)
|
|
172
|
+
def fetch_html(video_id)
|
|
173
|
+
url = format(WATCH_URL, video_id: video_id)
|
|
174
|
+
headers = { "Accept-Language" => "en-US" }
|
|
175
|
+
|
|
176
|
+
# Add consent cookie if we have one
|
|
177
|
+
headers["Cookie"] = "CONSENT=#{@consent_value}" if @consent_value
|
|
178
|
+
|
|
179
|
+
response = @http_client.get(url) do |req|
|
|
180
|
+
headers.each { |k, v| req.headers[k] = v }
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
raise_http_errors(response, video_id)
|
|
184
|
+
CGI.unescapeHTML(response.body)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Fetch data from the Innertube API
|
|
188
|
+
#
|
|
189
|
+
# @param video_id [String] the video ID
|
|
190
|
+
# @param api_key [String] the API key
|
|
191
|
+
# @return [Hash] the API response
|
|
192
|
+
def fetch_innertube_data(video_id, api_key)
|
|
193
|
+
url = format(INNERTUBE_API_URL, api_key: api_key)
|
|
194
|
+
|
|
195
|
+
response = @http_client.post(url) do |req|
|
|
196
|
+
req.headers["Content-Type"] = "application/json"
|
|
197
|
+
req.body = JSON.generate({
|
|
198
|
+
"context" => INNERTUBE_CONTEXT,
|
|
199
|
+
"videoId" => video_id
|
|
200
|
+
})
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
raise_http_errors(response, video_id)
|
|
204
|
+
JSON.parse(response.body)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Raise appropriate errors for HTTP responses
|
|
208
|
+
#
|
|
209
|
+
# @param response [Faraday::Response] the HTTP response
|
|
210
|
+
# @param video_id [String] the video ID
|
|
211
|
+
# @raise [IpBlocked] for 429 responses
|
|
212
|
+
# @raise [YouTubeRequestFailed] for other error responses
|
|
213
|
+
def raise_http_errors(response, video_id)
|
|
214
|
+
case response.status
|
|
215
|
+
when 429
|
|
216
|
+
raise IpBlocked, video_id
|
|
217
|
+
when 400..599
|
|
218
|
+
raise YouTubeRequestFailed.new(video_id, StandardError.new("HTTP #{response.status}"))
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
require "cgi"
|
|
5
|
+
|
|
6
|
+
module YoutubeRb
|
|
7
|
+
module Transcript
|
|
8
|
+
# Parses XML transcript data from YouTube
|
|
9
|
+
class TranscriptParser
|
|
10
|
+
# HTML formatting tags to preserve when preserve_formatting is enabled
|
|
11
|
+
FORMATTING_TAGS = %w[
|
|
12
|
+
strong
|
|
13
|
+
em
|
|
14
|
+
b
|
|
15
|
+
i
|
|
16
|
+
mark
|
|
17
|
+
small
|
|
18
|
+
del
|
|
19
|
+
ins
|
|
20
|
+
sub
|
|
21
|
+
sup
|
|
22
|
+
].freeze
|
|
23
|
+
|
|
24
|
+
# @param preserve_formatting [Boolean] whether to preserve HTML formatting tags
|
|
25
|
+
def initialize(preserve_formatting: false)
|
|
26
|
+
@preserve_formatting = preserve_formatting
|
|
27
|
+
@html_regex = build_html_regex
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Parse XML transcript data into TranscriptSnippet objects
|
|
31
|
+
# @param raw_data [String] the raw XML data from YouTube
|
|
32
|
+
# @return [Array<TranscriptSnippet>] parsed transcript snippets
|
|
33
|
+
def parse(raw_data)
|
|
34
|
+
doc = Nokogiri::XML(raw_data)
|
|
35
|
+
snippets = []
|
|
36
|
+
|
|
37
|
+
doc.xpath("//text").each do |element|
|
|
38
|
+
text_content = element.text
|
|
39
|
+
next if text_content.nil? || text_content.empty?
|
|
40
|
+
|
|
41
|
+
# Unescape HTML entities and remove unwanted HTML tags
|
|
42
|
+
text = process_text(text_content)
|
|
43
|
+
|
|
44
|
+
snippets << TranscriptSnippet.new(
|
|
45
|
+
text: text,
|
|
46
|
+
start: element["start"].to_f,
|
|
47
|
+
duration: (element["dur"] || "0.0").to_f
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
snippets
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
# Build regex for removing HTML tags
|
|
57
|
+
# @return [Regexp]
|
|
58
|
+
def build_html_regex
|
|
59
|
+
if @preserve_formatting
|
|
60
|
+
# Remove all tags except formatting tags
|
|
61
|
+
formats_pattern = FORMATTING_TAGS.join("|")
|
|
62
|
+
# Match tags that are NOT the formatting tags
|
|
63
|
+
Regexp.new("</?(?!/?(?:#{formats_pattern})\\b)[^>]*>", Regexp::IGNORECASE)
|
|
64
|
+
else
|
|
65
|
+
# Remove all HTML tags
|
|
66
|
+
Regexp.new("<[^>]*>", Regexp::IGNORECASE)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Process text by unescaping HTML entities and removing unwanted tags
|
|
71
|
+
# @param text [String] the raw text
|
|
72
|
+
# @return [String] processed text
|
|
73
|
+
def process_text(text)
|
|
74
|
+
# Unescape HTML entities
|
|
75
|
+
unescaped = CGI.unescapeHTML(text)
|
|
76
|
+
# Remove unwanted HTML tags
|
|
77
|
+
unescaped.gsub(@html_regex, "")
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "transcript/version"
|
|
4
|
+
require_relative "transcript/settings"
|
|
5
|
+
require_relative "transcript/errors"
|
|
6
|
+
require_relative "transcript/transcript_parser"
|
|
7
|
+
require_relative "transcript/transcript"
|
|
8
|
+
require_relative "transcript/transcript_list"
|
|
9
|
+
require_relative "transcript/transcript_list_fetcher"
|
|
10
|
+
require_relative "transcript/api"
|
|
11
|
+
require_relative "transcript/formatters"
|
|
12
|
+
|
|
13
|
+
module YoutubeRb
|
|
14
|
+
module Transcript
|
|
15
|
+
class << self
|
|
16
|
+
# Convenience method to fetch a transcript
|
|
17
|
+
# @param video_id [String] YouTube video ID
|
|
18
|
+
# @param languages [Array<String>] Language codes in order of preference
|
|
19
|
+
# @param preserve_formatting [Boolean] Whether to preserve HTML formatting
|
|
20
|
+
# @return [FetchedTranscript] The fetched transcript
|
|
21
|
+
def fetch(video_id, languages: ["en"], preserve_formatting: false)
|
|
22
|
+
api = YouTubeTranscriptApi.new
|
|
23
|
+
api.fetch(video_id, languages: languages, preserve_formatting: preserve_formatting)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Convenience method to list available transcripts
|
|
27
|
+
# @param video_id [String] YouTube video ID
|
|
28
|
+
# @return [TranscriptList] List of available transcripts
|
|
29
|
+
def list(video_id)
|
|
30
|
+
api = YouTubeTranscriptApi.new
|
|
31
|
+
api.list(video_id)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
data/spec/api_spec.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
require "spec_helper"
|
|
4
4
|
require "webmock/rspec"
|
|
5
5
|
|
|
6
|
-
RSpec.describe
|
|
6
|
+
RSpec.describe YoutubeRb::Transcript::YouTubeTranscriptApi do
|
|
7
7
|
let(:api) { described_class.new }
|
|
8
8
|
let(:video_id) { "dQw4w9WgXcQ" }
|
|
9
9
|
let(:api_key) { "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" }
|
|
@@ -84,7 +84,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
84
84
|
|
|
85
85
|
it "creates a TranscriptListFetcher" do
|
|
86
86
|
api = described_class.new
|
|
87
|
-
expect(api.instance_variable_get(:@fetcher)).to be_a(
|
|
87
|
+
expect(api.instance_variable_get(:@fetcher)).to be_a(YoutubeRb::Transcript::TranscriptListFetcher)
|
|
88
88
|
end
|
|
89
89
|
end
|
|
90
90
|
|
|
@@ -102,7 +102,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
102
102
|
|
|
103
103
|
it "returns a FetchedTranscript" do
|
|
104
104
|
result = api.fetch(video_id)
|
|
105
|
-
expect(result).to be_a(
|
|
105
|
+
expect(result).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
106
106
|
end
|
|
107
107
|
|
|
108
108
|
it "fetches the transcript with correct video_id" do
|
|
@@ -138,7 +138,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
138
138
|
it "raises NoTranscriptFound when no language matches" do
|
|
139
139
|
expect {
|
|
140
140
|
api.fetch(video_id, languages: ["ja", "ko", "zh"])
|
|
141
|
-
}.to raise_error(
|
|
141
|
+
}.to raise_error(YoutubeRb::Transcript::NoTranscriptFound)
|
|
142
142
|
end
|
|
143
143
|
|
|
144
144
|
context "with preserve_formatting option" do
|
|
@@ -182,7 +182,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
182
182
|
|
|
183
183
|
it "returns a TranscriptList" do
|
|
184
184
|
result = api.list(video_id)
|
|
185
|
-
expect(result).to be_a(
|
|
185
|
+
expect(result).to be_a(YoutubeRb::Transcript::TranscriptList)
|
|
186
186
|
end
|
|
187
187
|
|
|
188
188
|
it "returns a list with the correct video_id" do
|
|
@@ -213,7 +213,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
213
213
|
end
|
|
214
214
|
|
|
215
215
|
it "raises VideoUnavailable error" do
|
|
216
|
-
expect { api.list(video_id) }.to raise_error(
|
|
216
|
+
expect { api.list(video_id) }.to raise_error(YoutubeRb::Transcript::VideoUnavailable)
|
|
217
217
|
end
|
|
218
218
|
end
|
|
219
219
|
|
|
@@ -227,7 +227,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
227
227
|
end
|
|
228
228
|
|
|
229
229
|
it "raises TranscriptsDisabled error" do
|
|
230
|
-
expect { api.list(video_id) }.to raise_error(
|
|
230
|
+
expect { api.list(video_id) }.to raise_error(YoutubeRb::Transcript::TranscriptsDisabled)
|
|
231
231
|
end
|
|
232
232
|
end
|
|
233
233
|
end
|
|
@@ -272,7 +272,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
272
272
|
it "fetches all video transcripts" do
|
|
273
273
|
results = api.fetch_all(video_ids)
|
|
274
274
|
results.each do |vid, transcript|
|
|
275
|
-
expect(transcript).to be_a(
|
|
275
|
+
expect(transcript).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
276
276
|
expect(transcript.video_id).to eq(vid)
|
|
277
277
|
end
|
|
278
278
|
end
|
|
@@ -292,7 +292,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
292
292
|
expect(yielded.length).to eq(3)
|
|
293
293
|
yielded.each do |vid, klass|
|
|
294
294
|
expect(video_ids).to include(vid)
|
|
295
|
-
expect(klass).to eq(
|
|
295
|
+
expect(klass).to eq(YoutubeRb::Transcript::FetchedTranscript)
|
|
296
296
|
end
|
|
297
297
|
end
|
|
298
298
|
|
|
@@ -316,7 +316,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
316
316
|
end
|
|
317
317
|
|
|
318
318
|
it "raises error by default" do
|
|
319
|
-
expect { api.fetch_all(failing_video_ids) }.to raise_error(
|
|
319
|
+
expect { api.fetch_all(failing_video_ids) }.to raise_error(YoutubeRb::Transcript::VideoUnavailable)
|
|
320
320
|
end
|
|
321
321
|
|
|
322
322
|
it "continues on error when configured" do
|
|
@@ -332,7 +332,7 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
332
332
|
end
|
|
333
333
|
expect(errors.length).to eq(1)
|
|
334
334
|
expect(errors.first[0]).to eq("fail_video")
|
|
335
|
-
expect(errors.first[1]).to be_a(
|
|
335
|
+
expect(errors.first[1]).to be_a(YoutubeRb::Transcript::VideoUnavailable)
|
|
336
336
|
end
|
|
337
337
|
end
|
|
338
338
|
|
|
@@ -356,27 +356,27 @@ RSpec.describe Youtube::Transcript::Rb::YouTubeTranscriptApi do
|
|
|
356
356
|
.to_return(status: 200, body: sample_transcript_xml)
|
|
357
357
|
end
|
|
358
358
|
|
|
359
|
-
describe "
|
|
359
|
+
describe "YoutubeRb::Transcript.fetch" do
|
|
360
360
|
it "fetches a transcript" do
|
|
361
|
-
result =
|
|
362
|
-
expect(result).to be_a(
|
|
361
|
+
result = YoutubeRb::Transcript.fetch(video_id)
|
|
362
|
+
expect(result).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
363
363
|
end
|
|
364
364
|
|
|
365
365
|
it "accepts language option" do
|
|
366
|
-
result =
|
|
366
|
+
result = YoutubeRb::Transcript.fetch(video_id, languages: ["en"])
|
|
367
367
|
expect(result.language_code).to eq("en")
|
|
368
368
|
end
|
|
369
369
|
|
|
370
370
|
it "accepts preserve_formatting option" do
|
|
371
|
-
result =
|
|
372
|
-
expect(result).to be_a(
|
|
371
|
+
result = YoutubeRb::Transcript.fetch(video_id, preserve_formatting: false)
|
|
372
|
+
expect(result).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
373
373
|
end
|
|
374
374
|
end
|
|
375
375
|
|
|
376
|
-
describe "
|
|
376
|
+
describe "YoutubeRb::Transcript.list" do
|
|
377
377
|
it "lists available transcripts" do
|
|
378
|
-
result =
|
|
379
|
-
expect(result).to be_a(
|
|
378
|
+
result = YoutubeRb::Transcript.list(video_id)
|
|
379
|
+
expect(result).to be_a(YoutubeRb::Transcript::TranscriptList)
|
|
380
380
|
end
|
|
381
381
|
end
|
|
382
382
|
end
|