youtube-transcript-rb 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of youtube-transcript-rb might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +42 -42
- data/lib/youtube-transcript-rb.rb +3 -0
- data/lib/youtube_rb/transcript/api.rb +148 -0
- data/lib/youtube_rb/transcript/errors.rb +215 -0
- data/lib/youtube_rb/transcript/formatters.rb +267 -0
- data/lib/youtube_rb/transcript/settings.rb +26 -0
- data/lib/youtube_rb/transcript/transcript.rb +237 -0
- data/lib/youtube_rb/transcript/transcript_list.rb +168 -0
- data/lib/youtube_rb/transcript/transcript_list_fetcher.rb +223 -0
- data/lib/youtube_rb/transcript/transcript_parser.rb +81 -0
- data/lib/{youtube/transcript/rb → youtube_rb/transcript}/version.rb +2 -4
- data/lib/youtube_rb/transcript.rb +35 -0
- data/sig/youtube_rb/transcript.rbs +6 -0
- data/spec/api_spec.rb +20 -20
- data/spec/errors_spec.rb +39 -39
- data/spec/formatters_spec.rb +36 -36
- data/spec/integration_spec.rb +32 -32
- data/spec/settings_spec.rb +16 -16
- data/spec/spec_helper.rb +1 -1
- data/spec/transcript_list_fetcher_spec.rb +27 -27
- data/spec/transcript_list_spec.rb +6 -6
- data/spec/transcript_parser_spec.rb +3 -3
- data/spec/transcript_spec.rb +16 -16
- metadata +13 -12
- data/lib/youtube/transcript/rb/api.rb +0 -150
- data/lib/youtube/transcript/rb/errors.rb +0 -217
- data/lib/youtube/transcript/rb/formatters.rb +0 -269
- data/lib/youtube/transcript/rb/settings.rb +0 -28
- data/lib/youtube/transcript/rb/transcript.rb +0 -239
- data/lib/youtube/transcript/rb/transcript_list.rb +0 -170
- data/lib/youtube/transcript/rb/transcript_list_fetcher.rb +0 -225
- data/lib/youtube/transcript/rb/transcript_parser.rb +0 -83
- data/lib/youtube/transcript/rb.rb +0 -37
- data/sig/youtube/transcript/rb.rbs +0 -8
data/spec/integration_spec.rb
CHANGED
|
@@ -27,14 +27,14 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
27
27
|
let(:ted_talk_video_id) { "8jPQjjsBbIc" } # TED Talk - usually has good transcripts
|
|
28
28
|
let(:google_video_id) { "dQw4w9WgXcQ" } # Rick Astley - Never Gonna Give You Up (very stable)
|
|
29
29
|
|
|
30
|
-
describe
|
|
30
|
+
describe YoutubeRb::Transcript::YouTubeTranscriptApi do
|
|
31
31
|
let(:api) { described_class.new }
|
|
32
32
|
|
|
33
33
|
describe "#list" do
|
|
34
34
|
it "fetches available transcripts for a video" do
|
|
35
35
|
transcript_list = api.list(ted_talk_video_id)
|
|
36
36
|
|
|
37
|
-
expect(transcript_list).to be_a(
|
|
37
|
+
expect(transcript_list).to be_a(YoutubeRb::Transcript::TranscriptMetadataList)
|
|
38
38
|
expect(transcript_list.video_id).to eq(ted_talk_video_id)
|
|
39
39
|
expect(transcript_list.count).to be > 0
|
|
40
40
|
|
|
@@ -47,7 +47,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
47
47
|
transcript_list = api.list(ted_talk_video_id)
|
|
48
48
|
|
|
49
49
|
transcript_list.each do |transcript|
|
|
50
|
-
expect(transcript).to be_a(
|
|
50
|
+
expect(transcript).to be_a(YoutubeRb::Transcript::TranscriptMetadata)
|
|
51
51
|
expect(transcript.language_code).to be_a(String)
|
|
52
52
|
expect(transcript.language).to be_a(String)
|
|
53
53
|
end
|
|
@@ -58,12 +58,12 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
58
58
|
it "fetches English transcript by default" do
|
|
59
59
|
transcript = api.fetch(ted_talk_video_id)
|
|
60
60
|
|
|
61
|
-
expect(transcript).to be_a(
|
|
61
|
+
expect(transcript).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
62
62
|
expect(transcript.video_id).to eq(ted_talk_video_id)
|
|
63
63
|
expect(transcript.snippets).not_to be_empty
|
|
64
64
|
|
|
65
65
|
first_snippet = transcript.first
|
|
66
|
-
expect(first_snippet).to be_a(
|
|
66
|
+
expect(first_snippet).to be_a(YoutubeRb::Transcript::TranscriptMetadataSnippet)
|
|
67
67
|
expect(first_snippet.text).to be_a(String)
|
|
68
68
|
expect(first_snippet.start).to be_a(Float)
|
|
69
69
|
expect(first_snippet.duration).to be_a(Float)
|
|
@@ -91,7 +91,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
91
91
|
it "preserves HTML formatting when requested" do
|
|
92
92
|
transcript = api.fetch(ted_talk_video_id, preserve_formatting: true)
|
|
93
93
|
|
|
94
|
-
expect(transcript).to be_a(
|
|
94
|
+
expect(transcript).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
95
95
|
# Note: Not all videos have HTML formatting, so we just verify it doesn't break
|
|
96
96
|
end
|
|
97
97
|
end
|
|
@@ -103,7 +103,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
103
103
|
|
|
104
104
|
expect(results).to be_a(Hash)
|
|
105
105
|
expect(results.keys).to include(ted_talk_video_id)
|
|
106
|
-
expect(results[ted_talk_video_id]).to be_a(
|
|
106
|
+
expect(results[ted_talk_video_id]).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
107
107
|
end
|
|
108
108
|
|
|
109
109
|
it "continues on error when option is set" do
|
|
@@ -122,12 +122,12 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
122
122
|
end
|
|
123
123
|
end
|
|
124
124
|
|
|
125
|
-
describe
|
|
125
|
+
describe YoutubeRb::Transcript do
|
|
126
126
|
describe ".fetch" do
|
|
127
127
|
it "provides convenience method for fetching transcripts" do
|
|
128
128
|
transcript = described_class.fetch(ted_talk_video_id)
|
|
129
129
|
|
|
130
|
-
expect(transcript).to be_a(
|
|
130
|
+
expect(transcript).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
131
131
|
expect(transcript.snippets).not_to be_empty
|
|
132
132
|
end
|
|
133
133
|
end
|
|
@@ -136,7 +136,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
136
136
|
it "provides convenience method for listing transcripts" do
|
|
137
137
|
transcript_list = described_class.list(ted_talk_video_id)
|
|
138
138
|
|
|
139
|
-
expect(transcript_list).to be_a(
|
|
139
|
+
expect(transcript_list).to be_a(YoutubeRb::Transcript::TranscriptMetadataList)
|
|
140
140
|
expect(transcript_list.count).to be > 0
|
|
141
141
|
end
|
|
142
142
|
end
|
|
@@ -144,13 +144,13 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
144
144
|
|
|
145
145
|
describe "Transcript Translation" do
|
|
146
146
|
it "translates a transcript to another language" do
|
|
147
|
-
api =
|
|
147
|
+
api = YoutubeRb::Transcript::YouTubeTranscriptApi.new
|
|
148
148
|
transcript_list = api.list(ted_talk_video_id)
|
|
149
149
|
|
|
150
150
|
# Find an English transcript
|
|
151
151
|
begin
|
|
152
152
|
transcript = transcript_list.find_transcript(["en"])
|
|
153
|
-
rescue
|
|
153
|
+
rescue YoutubeRb::Transcript::NoTranscriptFound
|
|
154
154
|
skip "No English transcript available for this video"
|
|
155
155
|
end
|
|
156
156
|
|
|
@@ -160,14 +160,14 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
160
160
|
translated = transcript.translate("es")
|
|
161
161
|
fetched = translated.fetch
|
|
162
162
|
|
|
163
|
-
expect(fetched).to be_a(
|
|
163
|
+
expect(fetched).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
164
164
|
expect(fetched.language_code).to eq("es")
|
|
165
165
|
expect(fetched.snippets).not_to be_empty
|
|
166
166
|
|
|
167
167
|
puts "\nTranslated to Spanish: #{fetched.first.text[0..50]}..."
|
|
168
|
-
rescue
|
|
168
|
+
rescue YoutubeRb::Transcript::TranslationLanguageNotAvailable
|
|
169
169
|
skip "Spanish translation not available for this video"
|
|
170
|
-
rescue
|
|
170
|
+
rescue YoutubeRb::Transcript::IpBlocked
|
|
171
171
|
skip "IP blocked by YouTube - try again later or use a proxy"
|
|
172
172
|
end
|
|
173
173
|
else
|
|
@@ -177,10 +177,10 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
177
177
|
end
|
|
178
178
|
|
|
179
179
|
describe "Formatters with Real Data" do
|
|
180
|
-
let(:api) {
|
|
180
|
+
let(:api) { YoutubeRb::Transcript::YouTubeTranscriptApi.new }
|
|
181
181
|
let(:transcript) { api.fetch(ted_talk_video_id) }
|
|
182
182
|
|
|
183
|
-
describe
|
|
183
|
+
describe YoutubeRb::Transcript::Formatters::JSONFormatter do
|
|
184
184
|
it "formats real transcript as JSON" do
|
|
185
185
|
formatter = described_class.new
|
|
186
186
|
output = formatter.format_transcript(transcript)
|
|
@@ -192,7 +192,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
192
192
|
end
|
|
193
193
|
end
|
|
194
194
|
|
|
195
|
-
describe
|
|
195
|
+
describe YoutubeRb::Transcript::Formatters::TextFormatter do
|
|
196
196
|
it "formats real transcript as plain text" do
|
|
197
197
|
formatter = described_class.new
|
|
198
198
|
output = formatter.format_transcript(transcript)
|
|
@@ -205,7 +205,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
205
205
|
end
|
|
206
206
|
end
|
|
207
207
|
|
|
208
|
-
describe
|
|
208
|
+
describe YoutubeRb::Transcript::Formatters::SRTFormatter do
|
|
209
209
|
it "formats real transcript as SRT" do
|
|
210
210
|
formatter = described_class.new
|
|
211
211
|
output = formatter.format_transcript(transcript)
|
|
@@ -218,7 +218,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
218
218
|
end
|
|
219
219
|
end
|
|
220
220
|
|
|
221
|
-
describe
|
|
221
|
+
describe YoutubeRb::Transcript::Formatters::WebVTTFormatter do
|
|
222
222
|
it "formats real transcript as WebVTT" do
|
|
223
223
|
formatter = described_class.new
|
|
224
224
|
output = formatter.format_transcript(transcript)
|
|
@@ -231,7 +231,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
231
231
|
end
|
|
232
232
|
end
|
|
233
233
|
|
|
234
|
-
describe
|
|
234
|
+
describe YoutubeRb::Transcript::Formatters::PrettyPrintFormatter do
|
|
235
235
|
it "formats real transcript as pretty-printed output" do
|
|
236
236
|
formatter = described_class.new
|
|
237
237
|
output = formatter.format_transcript(transcript)
|
|
@@ -245,18 +245,18 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
245
245
|
end
|
|
246
246
|
|
|
247
247
|
describe "Error Handling" do
|
|
248
|
-
let(:api) {
|
|
248
|
+
let(:api) { YoutubeRb::Transcript::YouTubeTranscriptApi.new }
|
|
249
249
|
|
|
250
250
|
it "raises NoTranscriptFound for unavailable language" do
|
|
251
251
|
expect {
|
|
252
252
|
api.fetch(ted_talk_video_id, languages: ["xx"]) # Invalid language code
|
|
253
|
-
}.to raise_error(
|
|
253
|
+
}.to raise_error(YoutubeRb::Transcript::NoTranscriptFound)
|
|
254
254
|
end
|
|
255
255
|
|
|
256
256
|
it "raises appropriate error for invalid video ID" do
|
|
257
257
|
expect {
|
|
258
258
|
api.fetch("this_is_not_a_valid_video_id_12345")
|
|
259
|
-
}.to raise_error(
|
|
259
|
+
}.to raise_error(YoutubeRb::Transcript::CouldNotRetrieveTranscript)
|
|
260
260
|
end
|
|
261
261
|
|
|
262
262
|
it "raises TranscriptsDisabled for video without transcripts" do
|
|
@@ -267,7 +267,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
267
267
|
end
|
|
268
268
|
|
|
269
269
|
describe "FetchedTranscript Interface" do
|
|
270
|
-
let(:api) {
|
|
270
|
+
let(:api) { YoutubeRb::Transcript::YouTubeTranscriptApi.new }
|
|
271
271
|
let(:transcript) { api.fetch(ted_talk_video_id) }
|
|
272
272
|
|
|
273
273
|
it "is enumerable" do
|
|
@@ -276,12 +276,12 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
276
276
|
expect(transcript).to respond_to(:select)
|
|
277
277
|
expect(transcript).to respond_to(:first)
|
|
278
278
|
# Note: Enumerable doesn't provide #last by default, but we can use to_a.last
|
|
279
|
-
expect(transcript.to_a.last).to be_a(
|
|
279
|
+
expect(transcript.to_a.last).to be_a(YoutubeRb::Transcript::TranscriptMetadataSnippet)
|
|
280
280
|
end
|
|
281
281
|
|
|
282
282
|
it "is indexable" do
|
|
283
|
-
expect(transcript[0]).to be_a(
|
|
284
|
-
expect(transcript[-1]).to be_a(
|
|
283
|
+
expect(transcript[0]).to be_a(YoutubeRb::Transcript::TranscriptMetadataSnippet)
|
|
284
|
+
expect(transcript[-1]).to be_a(YoutubeRb::Transcript::TranscriptMetadataSnippet)
|
|
285
285
|
end
|
|
286
286
|
|
|
287
287
|
it "has length" do
|
|
@@ -306,7 +306,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
306
306
|
end
|
|
307
307
|
|
|
308
308
|
describe "TranscriptList Interface" do
|
|
309
|
-
let(:api) {
|
|
309
|
+
let(:api) { YoutubeRb::Transcript::YouTubeTranscriptApi.new }
|
|
310
310
|
let(:transcript_list) { api.list(ted_talk_video_id) }
|
|
311
311
|
|
|
312
312
|
it "is enumerable" do
|
|
@@ -317,7 +317,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
317
317
|
|
|
318
318
|
it "finds transcripts by language" do
|
|
319
319
|
transcript = transcript_list.find_transcript(["en"])
|
|
320
|
-
expect(transcript).to be_a(
|
|
320
|
+
expect(transcript).to be_a(YoutubeRb::Transcript::TranscriptMetadata)
|
|
321
321
|
end
|
|
322
322
|
|
|
323
323
|
it "provides string representation" do
|
|
@@ -330,7 +330,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
330
330
|
end
|
|
331
331
|
|
|
332
332
|
describe "Transcript Object" do
|
|
333
|
-
let(:api) {
|
|
333
|
+
let(:api) { YoutubeRb::Transcript::YouTubeTranscriptApi.new }
|
|
334
334
|
let(:transcript_list) { api.list(ted_talk_video_id) }
|
|
335
335
|
let(:transcript) { transcript_list.find_transcript(["en"]) }
|
|
336
336
|
|
|
@@ -349,7 +349,7 @@ RSpec.describe "Integration Tests", :integration do
|
|
|
349
349
|
it "fetches transcript data" do
|
|
350
350
|
fetched = transcript.fetch
|
|
351
351
|
|
|
352
|
-
expect(fetched).to be_a(
|
|
352
|
+
expect(fetched).to be_a(YoutubeRb::Transcript::FetchedTranscript)
|
|
353
353
|
expect(fetched.snippets).not_to be_empty
|
|
354
354
|
end
|
|
355
355
|
|
data/spec/settings_spec.rb
CHANGED
|
@@ -1,67 +1,67 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "spec_helper"
|
|
4
|
-
require "
|
|
4
|
+
require "youtube_rb/transcript"
|
|
5
5
|
|
|
6
|
-
RSpec.describe "
|
|
6
|
+
RSpec.describe "YoutubeRb::Transcript Settings" do
|
|
7
7
|
describe "WATCH_URL" do
|
|
8
8
|
it "is defined" do
|
|
9
|
-
expect(
|
|
9
|
+
expect(YoutubeRb::Transcript::WATCH_URL).not_to be_nil
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
it "is a YouTube watch URL template" do
|
|
13
|
-
expect(
|
|
13
|
+
expect(YoutubeRb::Transcript::WATCH_URL).to include("youtube.com/watch")
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
it "contains video_id placeholder" do
|
|
17
|
-
expect(
|
|
17
|
+
expect(YoutubeRb::Transcript::WATCH_URL).to include("%<video_id>s")
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
it "can be formatted with a video_id" do
|
|
21
|
-
url = format(
|
|
21
|
+
url = format(YoutubeRb::Transcript::WATCH_URL, video_id: "abc123")
|
|
22
22
|
expect(url).to eq("https://www.youtube.com/watch?v=abc123")
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
25
|
|
|
26
26
|
describe "INNERTUBE_API_URL" do
|
|
27
27
|
it "is defined" do
|
|
28
|
-
expect(
|
|
28
|
+
expect(YoutubeRb::Transcript::INNERTUBE_API_URL).not_to be_nil
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
it "is a YouTube API URL" do
|
|
32
|
-
expect(
|
|
32
|
+
expect(YoutubeRb::Transcript::INNERTUBE_API_URL).to include("youtube.com/youtubei")
|
|
33
33
|
end
|
|
34
34
|
|
|
35
35
|
it "contains api_key placeholder" do
|
|
36
|
-
expect(
|
|
36
|
+
expect(YoutubeRb::Transcript::INNERTUBE_API_URL).to include("%<api_key>s")
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
it "can be formatted with an api_key" do
|
|
40
|
-
url = format(
|
|
40
|
+
url = format(YoutubeRb::Transcript::INNERTUBE_API_URL, api_key: "my_api_key")
|
|
41
41
|
expect(url).to eq("https://www.youtube.com/youtubei/v1/player?key=my_api_key")
|
|
42
42
|
end
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
describe "INNERTUBE_CONTEXT" do
|
|
46
46
|
it "is defined" do
|
|
47
|
-
expect(
|
|
47
|
+
expect(YoutubeRb::Transcript::INNERTUBE_CONTEXT).not_to be_nil
|
|
48
48
|
end
|
|
49
49
|
|
|
50
50
|
it "is a frozen hash" do
|
|
51
|
-
expect(
|
|
51
|
+
expect(YoutubeRb::Transcript::INNERTUBE_CONTEXT).to be_frozen
|
|
52
52
|
end
|
|
53
53
|
|
|
54
54
|
it "contains client configuration" do
|
|
55
|
-
expect(
|
|
55
|
+
expect(YoutubeRb::Transcript::INNERTUBE_CONTEXT).to have_key("client")
|
|
56
56
|
end
|
|
57
57
|
|
|
58
58
|
it "specifies clientName as ANDROID" do
|
|
59
|
-
expect(
|
|
59
|
+
expect(YoutubeRb::Transcript::INNERTUBE_CONTEXT["client"]["clientName"]).to eq("ANDROID")
|
|
60
60
|
end
|
|
61
61
|
|
|
62
62
|
it "specifies a clientVersion" do
|
|
63
|
-
expect(
|
|
64
|
-
expect(
|
|
63
|
+
expect(YoutubeRb::Transcript::INNERTUBE_CONTEXT["client"]["clientVersion"]).not_to be_nil
|
|
64
|
+
expect(YoutubeRb::Transcript::INNERTUBE_CONTEXT["client"]["clientVersion"]).to be_a(String)
|
|
65
65
|
end
|
|
66
66
|
end
|
|
67
67
|
end
|
data/spec/spec_helper.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
require "spec_helper"
|
|
4
4
|
require "webmock/rspec"
|
|
5
5
|
|
|
6
|
-
RSpec.describe
|
|
6
|
+
RSpec.describe YoutubeRb::Transcript::TranscriptListFetcher do
|
|
7
7
|
let(:http_client) { Faraday.new }
|
|
8
8
|
let(:fetcher) { described_class.new(http_client: http_client) }
|
|
9
9
|
let(:video_id) { "dQw4w9WgXcQ" }
|
|
@@ -78,7 +78,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
78
78
|
|
|
79
79
|
it "returns a TranscriptList" do
|
|
80
80
|
result = fetcher.fetch(video_id)
|
|
81
|
-
expect(result).to be_a(
|
|
81
|
+
expect(result).to be_a(YoutubeRb::Transcript::TranscriptList)
|
|
82
82
|
end
|
|
83
83
|
|
|
84
84
|
it "returns a TranscriptList with the correct video_id" do
|
|
@@ -120,7 +120,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
120
120
|
end
|
|
121
121
|
|
|
122
122
|
it "raises IpBlocked error" do
|
|
123
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
123
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::IpBlocked)
|
|
124
124
|
end
|
|
125
125
|
end
|
|
126
126
|
|
|
@@ -135,7 +135,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
135
135
|
end
|
|
136
136
|
|
|
137
137
|
it "raises IpBlocked error" do
|
|
138
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
138
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::IpBlocked)
|
|
139
139
|
end
|
|
140
140
|
end
|
|
141
141
|
|
|
@@ -150,7 +150,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
150
150
|
end
|
|
151
151
|
|
|
152
152
|
it "raises YouTubeDataUnparsable error" do
|
|
153
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
153
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::YouTubeDataUnparsable)
|
|
154
154
|
end
|
|
155
155
|
end
|
|
156
156
|
|
|
@@ -169,7 +169,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
169
169
|
end
|
|
170
170
|
|
|
171
171
|
it "raises VideoUnavailable error" do
|
|
172
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
172
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::VideoUnavailable)
|
|
173
173
|
end
|
|
174
174
|
end
|
|
175
175
|
|
|
@@ -190,7 +190,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
190
190
|
end
|
|
191
191
|
|
|
192
192
|
it "raises InvalidVideoId error" do
|
|
193
|
-
expect { fetcher.fetch(url_video_id) }.to raise_error(
|
|
193
|
+
expect { fetcher.fetch(url_video_id) }.to raise_error(YoutubeRb::Transcript::InvalidVideoId)
|
|
194
194
|
end
|
|
195
195
|
end
|
|
196
196
|
|
|
@@ -209,7 +209,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
209
209
|
end
|
|
210
210
|
|
|
211
211
|
it "raises AgeRestricted error" do
|
|
212
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
212
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::AgeRestricted)
|
|
213
213
|
end
|
|
214
214
|
end
|
|
215
215
|
|
|
@@ -228,7 +228,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
228
228
|
end
|
|
229
229
|
|
|
230
230
|
it "raises RequestBlocked error" do
|
|
231
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
231
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::RequestBlocked)
|
|
232
232
|
end
|
|
233
233
|
end
|
|
234
234
|
|
|
@@ -257,7 +257,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
257
257
|
end
|
|
258
258
|
|
|
259
259
|
it "raises VideoUnplayable error" do
|
|
260
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
260
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::VideoUnplayable)
|
|
261
261
|
end
|
|
262
262
|
end
|
|
263
263
|
|
|
@@ -274,7 +274,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
274
274
|
end
|
|
275
275
|
|
|
276
276
|
it "raises TranscriptsDisabled error" do
|
|
277
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
277
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::TranscriptsDisabled)
|
|
278
278
|
end
|
|
279
279
|
end
|
|
280
280
|
|
|
@@ -290,7 +290,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
290
290
|
end
|
|
291
291
|
|
|
292
292
|
it "raises TranscriptsDisabled error" do
|
|
293
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
293
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::TranscriptsDisabled)
|
|
294
294
|
end
|
|
295
295
|
end
|
|
296
296
|
|
|
@@ -311,7 +311,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
311
311
|
end
|
|
312
312
|
|
|
313
313
|
it "raises TranscriptsDisabled error" do
|
|
314
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
314
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::TranscriptsDisabled)
|
|
315
315
|
end
|
|
316
316
|
end
|
|
317
317
|
|
|
@@ -322,7 +322,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
322
322
|
end
|
|
323
323
|
|
|
324
324
|
it "raises YouTubeRequestFailed error" do
|
|
325
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
325
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::YouTubeRequestFailed)
|
|
326
326
|
end
|
|
327
327
|
end
|
|
328
328
|
|
|
@@ -336,7 +336,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
336
336
|
end
|
|
337
337
|
|
|
338
338
|
it "raises YouTubeRequestFailed error" do
|
|
339
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
339
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::YouTubeRequestFailed)
|
|
340
340
|
end
|
|
341
341
|
end
|
|
342
342
|
end
|
|
@@ -369,7 +369,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
369
369
|
|
|
370
370
|
it "retries after setting consent cookie" do
|
|
371
371
|
result = fetcher.fetch(video_id)
|
|
372
|
-
expect(result).to be_a(
|
|
372
|
+
expect(result).to be_a(YoutubeRb::Transcript::TranscriptList)
|
|
373
373
|
expect(WebMock).to have_requested(:get, watch_url).times(2)
|
|
374
374
|
end
|
|
375
375
|
|
|
@@ -400,7 +400,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
400
400
|
end
|
|
401
401
|
|
|
402
402
|
it "raises FailedToCreateConsentCookie error" do
|
|
403
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
403
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::FailedToCreateConsentCookie)
|
|
404
404
|
end
|
|
405
405
|
end
|
|
406
406
|
|
|
@@ -411,7 +411,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
411
411
|
end
|
|
412
412
|
|
|
413
413
|
it "raises FailedToCreateConsentCookie error" do
|
|
414
|
-
expect { fetcher.fetch(video_id) }.to raise_error(
|
|
414
|
+
expect { fetcher.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::FailedToCreateConsentCookie)
|
|
415
415
|
end
|
|
416
416
|
end
|
|
417
417
|
end
|
|
@@ -441,7 +441,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
441
441
|
|
|
442
442
|
it "properly unescapes HTML entities" do
|
|
443
443
|
result = fetcher.fetch(video_id)
|
|
444
|
-
expect(result).to be_a(
|
|
444
|
+
expect(result).to be_a(YoutubeRb::Transcript::TranscriptList)
|
|
445
445
|
end
|
|
446
446
|
end
|
|
447
447
|
|
|
@@ -469,7 +469,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
469
469
|
|
|
470
470
|
it "retries the request" do
|
|
471
471
|
result = fetcher_with_proxy.fetch(video_id)
|
|
472
|
-
expect(result).to be_a(
|
|
472
|
+
expect(result).to be_a(YoutubeRb::Transcript::TranscriptList)
|
|
473
473
|
expect(WebMock).to have_requested(:post, innertube_url).times(3)
|
|
474
474
|
end
|
|
475
475
|
end
|
|
@@ -484,7 +484,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
484
484
|
end
|
|
485
485
|
|
|
486
486
|
it "raises RequestBlocked after exhausting retries" do
|
|
487
|
-
expect { fetcher_with_proxy.fetch(video_id) }.to raise_error(
|
|
487
|
+
expect { fetcher_with_proxy.fetch(video_id) }.to raise_error(YoutubeRb::Transcript::RequestBlocked)
|
|
488
488
|
expect(WebMock).to have_requested(:post, innertube_url).times(3)
|
|
489
489
|
end
|
|
490
490
|
end
|
|
@@ -492,29 +492,29 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptListFetcher do
|
|
|
492
492
|
|
|
493
493
|
describe "PlayabilityStatus module" do
|
|
494
494
|
it "defines OK status" do
|
|
495
|
-
expect(
|
|
495
|
+
expect(YoutubeRb::Transcript::PlayabilityStatus::OK).to eq("OK")
|
|
496
496
|
end
|
|
497
497
|
|
|
498
498
|
it "defines ERROR status" do
|
|
499
|
-
expect(
|
|
499
|
+
expect(YoutubeRb::Transcript::PlayabilityStatus::ERROR).to eq("ERROR")
|
|
500
500
|
end
|
|
501
501
|
|
|
502
502
|
it "defines LOGIN_REQUIRED status" do
|
|
503
|
-
expect(
|
|
503
|
+
expect(YoutubeRb::Transcript::PlayabilityStatus::LOGIN_REQUIRED).to eq("LOGIN_REQUIRED")
|
|
504
504
|
end
|
|
505
505
|
end
|
|
506
506
|
|
|
507
507
|
describe "PlayabilityFailedReason module" do
|
|
508
508
|
it "defines BOT_DETECTED reason" do
|
|
509
|
-
expect(
|
|
509
|
+
expect(YoutubeRb::Transcript::PlayabilityFailedReason::BOT_DETECTED).to eq("Sign in to confirm you're not a bot")
|
|
510
510
|
end
|
|
511
511
|
|
|
512
512
|
it "defines AGE_RESTRICTED reason" do
|
|
513
|
-
expect(
|
|
513
|
+
expect(YoutubeRb::Transcript::PlayabilityFailedReason::AGE_RESTRICTED).to eq("This video may be inappropriate for some users.")
|
|
514
514
|
end
|
|
515
515
|
|
|
516
516
|
it "defines VIDEO_UNAVAILABLE reason" do
|
|
517
|
-
expect(
|
|
517
|
+
expect(YoutubeRb::Transcript::PlayabilityFailedReason::VIDEO_UNAVAILABLE).to eq("This video is unavailable")
|
|
518
518
|
end
|
|
519
519
|
end
|
|
520
520
|
end
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "spec_helper"
|
|
4
4
|
|
|
5
|
-
RSpec.describe
|
|
5
|
+
RSpec.describe YoutubeRb::Transcript::TranscriptList do
|
|
6
6
|
let(:http_client) { instance_double(Faraday::Connection) }
|
|
7
7
|
let(:video_id) { "test_video_123" }
|
|
8
8
|
|
|
@@ -203,13 +203,13 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptList do
|
|
|
203
203
|
it "raises NoTranscriptFound when no match" do
|
|
204
204
|
expect {
|
|
205
205
|
list.find_transcript(["ja", "ko", "zh"])
|
|
206
|
-
}.to raise_error(
|
|
206
|
+
}.to raise_error(YoutubeRb::Transcript::NoTranscriptFound)
|
|
207
207
|
end
|
|
208
208
|
|
|
209
209
|
it "includes requested languages in error" do
|
|
210
210
|
begin
|
|
211
211
|
list.find_transcript(["ja", "ko"])
|
|
212
|
-
rescue
|
|
212
|
+
rescue YoutubeRb::Transcript::NoTranscriptFound => e
|
|
213
213
|
expect(e.requested_language_codes).to eq(["ja", "ko"])
|
|
214
214
|
end
|
|
215
215
|
end
|
|
@@ -233,13 +233,13 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptList do
|
|
|
233
233
|
it "does not return manually created transcripts" do
|
|
234
234
|
expect {
|
|
235
235
|
list.find_generated_transcript(["en"])
|
|
236
|
-
}.to raise_error(
|
|
236
|
+
}.to raise_error(YoutubeRb::Transcript::NoTranscriptFound)
|
|
237
237
|
end
|
|
238
238
|
|
|
239
239
|
it "raises NoTranscriptFound when no match" do
|
|
240
240
|
expect {
|
|
241
241
|
list.find_generated_transcript(["ja"])
|
|
242
|
-
}.to raise_error(
|
|
242
|
+
}.to raise_error(YoutubeRb::Transcript::NoTranscriptFound)
|
|
243
243
|
end
|
|
244
244
|
end
|
|
245
245
|
|
|
@@ -261,7 +261,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptList do
|
|
|
261
261
|
it "does not return generated transcripts" do
|
|
262
262
|
expect {
|
|
263
263
|
list.find_manually_created_transcript(["en-auto"])
|
|
264
|
-
}.to raise_error(
|
|
264
|
+
}.to raise_error(YoutubeRb::Transcript::NoTranscriptFound)
|
|
265
265
|
end
|
|
266
266
|
|
|
267
267
|
it "tries language codes in order" do
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "spec_helper"
|
|
4
|
-
require "
|
|
4
|
+
require "youtube_rb/transcript"
|
|
5
5
|
|
|
6
|
-
RSpec.describe
|
|
6
|
+
RSpec.describe YoutubeRb::Transcript::TranscriptParser do
|
|
7
7
|
describe "#initialize" do
|
|
8
8
|
it "creates a parser with preserve_formatting false by default" do
|
|
9
9
|
parser = described_class.new
|
|
@@ -34,7 +34,7 @@ RSpec.describe Youtube::Transcript::Rb::TranscriptParser do
|
|
|
34
34
|
result = parser.parse(xml)
|
|
35
35
|
expect(result).to be_an(Array)
|
|
36
36
|
expect(result.length).to eq(2)
|
|
37
|
-
expect(result.first).to be_a(
|
|
37
|
+
expect(result.first).to be_a(YoutubeRb::Transcript::TranscriptSnippet)
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
it "parses text content correctly" do
|