youtube-transcript-rb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +1 -0
- data/.serena/.gitignore +1 -0
- data/.serena/memories/code_style_and_conventions.md +35 -0
- data/.serena/memories/project_overview.md +40 -0
- data/.serena/memories/suggested_commands.md +50 -0
- data/.serena/memories/task_completion_checklist.md +25 -0
- data/.serena/memories/tech_stack.md +20 -0
- data/.serena/project.yml +84 -0
- data/LICENSE +21 -0
- data/PLAN.md +422 -0
- data/README.md +496 -0
- data/Rakefile +4 -0
- data/lib/youtube/transcript/rb/api.rb +150 -0
- data/lib/youtube/transcript/rb/errors.rb +217 -0
- data/lib/youtube/transcript/rb/formatters.rb +269 -0
- data/lib/youtube/transcript/rb/settings.rb +28 -0
- data/lib/youtube/transcript/rb/transcript.rb +239 -0
- data/lib/youtube/transcript/rb/transcript_list.rb +170 -0
- data/lib/youtube/transcript/rb/transcript_list_fetcher.rb +225 -0
- data/lib/youtube/transcript/rb/transcript_parser.rb +83 -0
- data/lib/youtube/transcript/rb/version.rb +9 -0
- data/lib/youtube/transcript/rb.rb +37 -0
- data/sig/youtube/transcript/rb.rbs +8 -0
- data/spec/api_spec.rb +397 -0
- data/spec/errors_spec.rb +240 -0
- data/spec/formatters_spec.rb +436 -0
- data/spec/integration_spec.rb +363 -0
- data/spec/settings_spec.rb +67 -0
- data/spec/spec_helper.rb +109 -0
- data/spec/transcript_list_fetcher_spec.rb +520 -0
- data/spec/transcript_list_spec.rb +380 -0
- data/spec/transcript_parser_spec.rb +355 -0
- data/spec/transcript_spec.rb +435 -0
- metadata +118 -0
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
require "webmock/rspec"
|
|
5
|
+
|
|
6
|
+
RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
7
|
+
let(:http_client) { Faraday.new }
|
|
8
|
+
let(:fetcher) { described_class.new(http_client: http_client) }
|
|
9
|
+
let(:video_id) { "dQw4w9WgXcQ" }
|
|
10
|
+
let(:api_key) { "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" }
|
|
11
|
+
|
|
12
|
+
let(:watch_url) { "https://www.youtube.com/watch?v=#{video_id}" }
|
|
13
|
+
let(:innertube_url) { "https://www.youtube.com/youtubei/v1/player?key=#{api_key}" }
|
|
14
|
+
|
|
15
|
+
# Sample HTML with embedded API key
|
|
16
|
+
let(:sample_html) do
|
|
17
|
+
<<~HTML
|
|
18
|
+
<!DOCTYPE html>
|
|
19
|
+
<html>
|
|
20
|
+
<head><title>Test Video</title></head>
|
|
21
|
+
<body>
|
|
22
|
+
<script>
|
|
23
|
+
var ytcfg = {"INNERTUBE_API_KEY": "#{api_key}", "OTHER_KEY": "value"};
|
|
24
|
+
</script>
|
|
25
|
+
</body>
|
|
26
|
+
</html>
|
|
27
|
+
HTML
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Sample innertube API response with captions
|
|
31
|
+
let(:sample_innertube_response) do
|
|
32
|
+
{
|
|
33
|
+
"playabilityStatus" => { "status" => "OK" },
|
|
34
|
+
"captions" => {
|
|
35
|
+
"playerCaptionsTracklistRenderer" => {
|
|
36
|
+
"captionTracks" => [
|
|
37
|
+
{
|
|
38
|
+
"baseUrl" => "https://www.youtube.com/api/timedtext?v=#{video_id}&lang=en",
|
|
39
|
+
"name" => { "runs" => [{ "text" => "English" }] },
|
|
40
|
+
"languageCode" => "en",
|
|
41
|
+
"isTranslatable" => true
|
|
42
|
+
}
|
|
43
|
+
],
|
|
44
|
+
"translationLanguages" => [
|
|
45
|
+
{ "languageCode" => "es", "languageName" => { "runs" => [{ "text" => "Spanish" }] } }
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
describe "#initialize" do
|
|
53
|
+
it "stores the http_client" do
|
|
54
|
+
fetcher = described_class.new(http_client: http_client)
|
|
55
|
+
expect(fetcher.instance_variable_get(:@http_client)).to eq(http_client)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it "stores the proxy_config when provided" do
|
|
59
|
+
proxy_config = double("proxy_config")
|
|
60
|
+
fetcher = described_class.new(http_client: http_client, proxy_config: proxy_config)
|
|
61
|
+
expect(fetcher.instance_variable_get(:@proxy_config)).to eq(proxy_config)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "defaults proxy_config to nil" do
|
|
65
|
+
fetcher = described_class.new(http_client: http_client)
|
|
66
|
+
expect(fetcher.instance_variable_get(:@proxy_config)).to be_nil
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
describe "#fetch" do
|
|
71
|
+
before do
|
|
72
|
+
stub_request(:get, watch_url)
|
|
73
|
+
.to_return(status: 200, body: sample_html)
|
|
74
|
+
|
|
75
|
+
stub_request(:post, innertube_url)
|
|
76
|
+
.to_return(status: 200, body: sample_innertube_response.to_json, headers: { "Content-Type" => "application/json" })
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "returns a TranscriptList" do
|
|
80
|
+
result = fetcher.fetch(video_id)
|
|
81
|
+
expect(result).to be_a(Youtube::Transcript::Rb::TranscriptList)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
it "returns a TranscriptList with the correct video_id" do
|
|
85
|
+
result = fetcher.fetch(video_id)
|
|
86
|
+
expect(result.video_id).to eq(video_id)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
it "makes a GET request to the watch URL" do
|
|
90
|
+
fetcher.fetch(video_id)
|
|
91
|
+
expect(WebMock).to have_requested(:get, watch_url)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it "makes a POST request to the innertube API" do
|
|
95
|
+
fetcher.fetch(video_id)
|
|
96
|
+
expect(WebMock).to have_requested(:post, innertube_url)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
it "includes Accept-Language header in watch request" do
|
|
100
|
+
fetcher.fetch(video_id)
|
|
101
|
+
expect(WebMock).to have_requested(:get, watch_url)
|
|
102
|
+
.with(headers: { "Accept-Language" => "en-US" })
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "includes proper body in innertube request" do
|
|
106
|
+
fetcher.fetch(video_id)
|
|
107
|
+
expect(WebMock).to have_requested(:post, innertube_url)
|
|
108
|
+
.with { |req|
|
|
109
|
+
body = JSON.parse(req.body)
|
|
110
|
+
body["videoId"] == video_id && body["context"]["client"]["clientName"] == "ANDROID"
|
|
111
|
+
}
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
describe "error handling" do
|
|
116
|
+
describe "when IP is blocked (429 response)" do
|
|
117
|
+
before do
|
|
118
|
+
stub_request(:get, watch_url)
|
|
119
|
+
.to_return(status: 429, body: "Too Many Requests")
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
it "raises IpBlocked error" do
|
|
123
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::IpBlocked)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
describe "when CAPTCHA is detected" do
|
|
128
|
+
let(:captcha_html) do
|
|
129
|
+
'<html><body><div class="g-recaptcha" data-sitekey="abc"></div></body></html>'
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
before do
|
|
133
|
+
stub_request(:get, watch_url)
|
|
134
|
+
.to_return(status: 200, body: captcha_html)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
it "raises IpBlocked error" do
|
|
138
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::IpBlocked)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
describe "when API key cannot be found" do
|
|
143
|
+
let(:no_api_key_html) do
|
|
144
|
+
"<html><body>No API key here</body></html>"
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
before do
|
|
148
|
+
stub_request(:get, watch_url)
|
|
149
|
+
.to_return(status: 200, body: no_api_key_html)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
it "raises YouTubeDataUnparsable error" do
|
|
153
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::YouTubeDataUnparsable)
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
describe "when video is unavailable" do
|
|
158
|
+
before do
|
|
159
|
+
stub_request(:get, watch_url)
|
|
160
|
+
.to_return(status: 200, body: sample_html)
|
|
161
|
+
|
|
162
|
+
stub_request(:post, innertube_url)
|
|
163
|
+
.to_return(status: 200, body: {
|
|
164
|
+
"playabilityStatus" => {
|
|
165
|
+
"status" => "ERROR",
|
|
166
|
+
"reason" => "This video is unavailable"
|
|
167
|
+
}
|
|
168
|
+
}.to_json)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
it "raises VideoUnavailable error" do
|
|
172
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::VideoUnavailable)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
describe "when video ID looks like a URL" do
|
|
177
|
+
let(:url_video_id) { "https://www.youtube.com/watch?v=abc123" }
|
|
178
|
+
|
|
179
|
+
before do
|
|
180
|
+
stub_request(:get, "https://www.youtube.com/watch?v=#{url_video_id}")
|
|
181
|
+
.to_return(status: 200, body: sample_html)
|
|
182
|
+
|
|
183
|
+
stub_request(:post, innertube_url)
|
|
184
|
+
.to_return(status: 200, body: {
|
|
185
|
+
"playabilityStatus" => {
|
|
186
|
+
"status" => "ERROR",
|
|
187
|
+
"reason" => "This video is unavailable"
|
|
188
|
+
}
|
|
189
|
+
}.to_json)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
it "raises InvalidVideoId error" do
|
|
193
|
+
expect { fetcher.fetch(url_video_id) }.to raise_error(Youtube::Transcript::Rb::InvalidVideoId)
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
describe "when video is age restricted" do
|
|
198
|
+
before do
|
|
199
|
+
stub_request(:get, watch_url)
|
|
200
|
+
.to_return(status: 200, body: sample_html)
|
|
201
|
+
|
|
202
|
+
stub_request(:post, innertube_url)
|
|
203
|
+
.to_return(status: 200, body: {
|
|
204
|
+
"playabilityStatus" => {
|
|
205
|
+
"status" => "LOGIN_REQUIRED",
|
|
206
|
+
"reason" => "This video may be inappropriate for some users."
|
|
207
|
+
}
|
|
208
|
+
}.to_json)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
it "raises AgeRestricted error" do
|
|
212
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::AgeRestricted)
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
describe "when bot is detected" do
|
|
217
|
+
before do
|
|
218
|
+
stub_request(:get, watch_url)
|
|
219
|
+
.to_return(status: 200, body: sample_html)
|
|
220
|
+
|
|
221
|
+
stub_request(:post, innertube_url)
|
|
222
|
+
.to_return(status: 200, body: {
|
|
223
|
+
"playabilityStatus" => {
|
|
224
|
+
"status" => "LOGIN_REQUIRED",
|
|
225
|
+
"reason" => "Sign in to confirm you're not a bot"
|
|
226
|
+
}
|
|
227
|
+
}.to_json)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
it "raises RequestBlocked error" do
|
|
231
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::RequestBlocked)
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
describe "when video is unplayable with subreasons" do
|
|
236
|
+
before do
|
|
237
|
+
stub_request(:get, watch_url)
|
|
238
|
+
.to_return(status: 200, body: sample_html)
|
|
239
|
+
|
|
240
|
+
stub_request(:post, innertube_url)
|
|
241
|
+
.to_return(status: 200, body: {
|
|
242
|
+
"playabilityStatus" => {
|
|
243
|
+
"status" => "ERROR",
|
|
244
|
+
"reason" => "Video unavailable",
|
|
245
|
+
"errorScreen" => {
|
|
246
|
+
"playerErrorMessageRenderer" => {
|
|
247
|
+
"subreason" => {
|
|
248
|
+
"runs" => [
|
|
249
|
+
{ "text" => "This video is private" },
|
|
250
|
+
{ "text" => "Please contact the owner" }
|
|
251
|
+
]
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}.to_json)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
it "raises VideoUnplayable error" do
|
|
260
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::VideoUnplayable)
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
describe "when transcripts are disabled" do
|
|
265
|
+
before do
|
|
266
|
+
stub_request(:get, watch_url)
|
|
267
|
+
.to_return(status: 200, body: sample_html)
|
|
268
|
+
|
|
269
|
+
stub_request(:post, innertube_url)
|
|
270
|
+
.to_return(status: 200, body: {
|
|
271
|
+
"playabilityStatus" => { "status" => "OK" },
|
|
272
|
+
"captions" => {}
|
|
273
|
+
}.to_json)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
it "raises TranscriptsDisabled error" do
|
|
277
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::TranscriptsDisabled)
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
describe "when captions is nil" do
|
|
282
|
+
before do
|
|
283
|
+
stub_request(:get, watch_url)
|
|
284
|
+
.to_return(status: 200, body: sample_html)
|
|
285
|
+
|
|
286
|
+
stub_request(:post, innertube_url)
|
|
287
|
+
.to_return(status: 200, body: {
|
|
288
|
+
"playabilityStatus" => { "status" => "OK" }
|
|
289
|
+
}.to_json)
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
it "raises TranscriptsDisabled error" do
|
|
293
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::TranscriptsDisabled)
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
describe "when captionTracks is missing" do
|
|
298
|
+
before do
|
|
299
|
+
stub_request(:get, watch_url)
|
|
300
|
+
.to_return(status: 200, body: sample_html)
|
|
301
|
+
|
|
302
|
+
stub_request(:post, innertube_url)
|
|
303
|
+
.to_return(status: 200, body: {
|
|
304
|
+
"playabilityStatus" => { "status" => "OK" },
|
|
305
|
+
"captions" => {
|
|
306
|
+
"playerCaptionsTracklistRenderer" => {
|
|
307
|
+
"translationLanguages" => []
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}.to_json)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
it "raises TranscriptsDisabled error" do
|
|
314
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::TranscriptsDisabled)
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
describe "when HTTP request fails" do
|
|
319
|
+
before do
|
|
320
|
+
stub_request(:get, watch_url)
|
|
321
|
+
.to_return(status: 500, body: "Internal Server Error")
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
it "raises YouTubeRequestFailed error" do
|
|
325
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::YouTubeRequestFailed)
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
describe "when innertube API returns error" do
|
|
330
|
+
before do
|
|
331
|
+
stub_request(:get, watch_url)
|
|
332
|
+
.to_return(status: 200, body: sample_html)
|
|
333
|
+
|
|
334
|
+
stub_request(:post, innertube_url)
|
|
335
|
+
.to_return(status: 403, body: "Forbidden")
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
it "raises YouTubeRequestFailed error" do
|
|
339
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::YouTubeRequestFailed)
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
describe "consent cookie handling" do
|
|
345
|
+
let(:consent_html) do
|
|
346
|
+
<<~HTML
|
|
347
|
+
<!DOCTYPE html>
|
|
348
|
+
<html>
|
|
349
|
+
<body>
|
|
350
|
+
<form action="https://consent.youtube.com/s">
|
|
351
|
+
<input name="v" value="cb.20231201-01-p1.en+FX+999">
|
|
352
|
+
</form>
|
|
353
|
+
</body>
|
|
354
|
+
</html>
|
|
355
|
+
HTML
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
context "when consent is required and resolved" do
|
|
359
|
+
before do
|
|
360
|
+
stub_request(:get, watch_url)
|
|
361
|
+
.to_return(
|
|
362
|
+
{ status: 200, body: consent_html },
|
|
363
|
+
{ status: 200, body: sample_html }
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
stub_request(:post, innertube_url)
|
|
367
|
+
.to_return(status: 200, body: sample_innertube_response.to_json)
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
it "retries after setting consent cookie" do
|
|
371
|
+
result = fetcher.fetch(video_id)
|
|
372
|
+
expect(result).to be_a(Youtube::Transcript::Rb::TranscriptList)
|
|
373
|
+
expect(WebMock).to have_requested(:get, watch_url).times(2)
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
it "includes consent cookie in second request" do
|
|
377
|
+
fetcher.fetch(video_id)
|
|
378
|
+
expect(WebMock).to have_requested(:get, watch_url)
|
|
379
|
+
.with(headers: { "Cookie" => /CONSENT=YES\+/ })
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
context "when consent cannot be resolved" do
|
|
384
|
+
let(:no_value_consent_html) do
|
|
385
|
+
<<~HTML
|
|
386
|
+
<!DOCTYPE html>
|
|
387
|
+
<html>
|
|
388
|
+
<body>
|
|
389
|
+
<form action="https://consent.youtube.com/s">
|
|
390
|
+
<input name="other" value="something">
|
|
391
|
+
</form>
|
|
392
|
+
</body>
|
|
393
|
+
</html>
|
|
394
|
+
HTML
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
before do
|
|
398
|
+
stub_request(:get, watch_url)
|
|
399
|
+
.to_return(status: 200, body: no_value_consent_html)
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
it "raises FailedToCreateConsentCookie error" do
|
|
403
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::FailedToCreateConsentCookie)
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
context "when consent page persists after cookie" do
|
|
408
|
+
before do
|
|
409
|
+
stub_request(:get, watch_url)
|
|
410
|
+
.to_return(status: 200, body: consent_html)
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
it "raises FailedToCreateConsentCookie error" do
|
|
414
|
+
expect { fetcher.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::FailedToCreateConsentCookie)
|
|
415
|
+
end
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
describe "HTML unescaping" do
|
|
420
|
+
let(:escaped_html) do
|
|
421
|
+
<<~HTML
|
|
422
|
+
<!DOCTYPE html>
|
|
423
|
+
<html>
|
|
424
|
+
<head><title>Test & Video</title></head>
|
|
425
|
+
<body>
|
|
426
|
+
<script>
|
|
427
|
+
var ytcfg = {"INNERTUBE_API_KEY": "#{api_key}"};
|
|
428
|
+
</script>
|
|
429
|
+
</body>
|
|
430
|
+
</html>
|
|
431
|
+
HTML
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
before do
|
|
435
|
+
stub_request(:get, watch_url)
|
|
436
|
+
.to_return(status: 200, body: escaped_html)
|
|
437
|
+
|
|
438
|
+
stub_request(:post, innertube_url)
|
|
439
|
+
.to_return(status: 200, body: sample_innertube_response.to_json)
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
it "properly unescapes HTML entities" do
|
|
443
|
+
result = fetcher.fetch(video_id)
|
|
444
|
+
expect(result).to be_a(Youtube::Transcript::Rb::TranscriptList)
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
describe "with proxy config" do
|
|
449
|
+
let(:proxy_config) do
|
|
450
|
+
double("proxy_config", retries_when_blocked: 3)
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
let(:fetcher_with_proxy) do
|
|
454
|
+
described_class.new(http_client: http_client, proxy_config: proxy_config)
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
context "when request is blocked and retries configured" do
|
|
458
|
+
before do
|
|
459
|
+
stub_request(:get, watch_url)
|
|
460
|
+
.to_return(status: 200, body: sample_html)
|
|
461
|
+
|
|
462
|
+
stub_request(:post, innertube_url)
|
|
463
|
+
.to_return(
|
|
464
|
+
{ status: 200, body: { "playabilityStatus" => { "status" => "LOGIN_REQUIRED", "reason" => "Sign in to confirm you're not a bot" } }.to_json },
|
|
465
|
+
{ status: 200, body: { "playabilityStatus" => { "status" => "LOGIN_REQUIRED", "reason" => "Sign in to confirm you're not a bot" } }.to_json },
|
|
466
|
+
{ status: 200, body: sample_innertube_response.to_json }
|
|
467
|
+
)
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
it "retries the request" do
|
|
471
|
+
result = fetcher_with_proxy.fetch(video_id)
|
|
472
|
+
expect(result).to be_a(Youtube::Transcript::Rb::TranscriptList)
|
|
473
|
+
expect(WebMock).to have_requested(:post, innertube_url).times(3)
|
|
474
|
+
end
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
context "when all retries fail" do
|
|
478
|
+
before do
|
|
479
|
+
stub_request(:get, watch_url)
|
|
480
|
+
.to_return(status: 200, body: sample_html)
|
|
481
|
+
|
|
482
|
+
stub_request(:post, innertube_url)
|
|
483
|
+
.to_return(status: 200, body: { "playabilityStatus" => { "status" => "LOGIN_REQUIRED", "reason" => "Sign in to confirm you're not a bot" } }.to_json)
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
it "raises RequestBlocked after exhausting retries" do
|
|
487
|
+
expect { fetcher_with_proxy.fetch(video_id) }.to raise_error(Youtube::Transcript::Rb::RequestBlocked)
|
|
488
|
+
expect(WebMock).to have_requested(:post, innertube_url).times(3)
|
|
489
|
+
end
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
describe "PlayabilityStatus module" do
|
|
494
|
+
it "defines OK status" do
|
|
495
|
+
expect(Youtube::Transcript::Rb::PlayabilityStatus::OK).to eq("OK")
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
it "defines ERROR status" do
|
|
499
|
+
expect(Youtube::Transcript::Rb::PlayabilityStatus::ERROR).to eq("ERROR")
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
it "defines LOGIN_REQUIRED status" do
|
|
503
|
+
expect(Youtube::Transcript::Rb::PlayabilityStatus::LOGIN_REQUIRED).to eq("LOGIN_REQUIRED")
|
|
504
|
+
end
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
describe "PlayabilityFailedReason module" do
|
|
508
|
+
it "defines BOT_DETECTED reason" do
|
|
509
|
+
expect(Youtube::Transcript::Rb::PlayabilityFailedReason::BOT_DETECTED).to eq("Sign in to confirm you're not a bot")
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
it "defines AGE_RESTRICTED reason" do
|
|
513
|
+
expect(Youtube::Transcript::Rb::PlayabilityFailedReason::AGE_RESTRICTED).to eq("This video may be inappropriate for some users.")
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
it "defines VIDEO_UNAVAILABLE reason" do
|
|
517
|
+
expect(Youtube::Transcript::Rb::PlayabilityFailedReason::VIDEO_UNAVAILABLE).to eq("This video is unavailable")
|
|
518
|
+
end
|
|
519
|
+
end
|
|
520
|
+
end
|