youtube-transcript-rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,380 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ RSpec.describe Youtube::Transcript::Rb::TranscriptList do
6
+ let(:http_client) { instance_double(Faraday::Connection) }
7
+ let(:video_id) { "test_video_123" }
8
+
9
+ # Sample captions JSON similar to what YouTube returns
10
+ let(:sample_captions_json) do
11
+ {
12
+ "captionTracks" => [
13
+ {
14
+ "baseUrl" => "https://www.youtube.com/api/timedtext?v=test&lang=en&fmt=srv3",
15
+ "name" => { "runs" => [{ "text" => "English" }] },
16
+ "languageCode" => "en",
17
+ "kind" => "",
18
+ "isTranslatable" => true
19
+ },
20
+ {
21
+ "baseUrl" => "https://www.youtube.com/api/timedtext?v=test&lang=es&fmt=srv3",
22
+ "name" => { "runs" => [{ "text" => "Spanish" }] },
23
+ "languageCode" => "es",
24
+ "kind" => "",
25
+ "isTranslatable" => false
26
+ },
27
+ {
28
+ "baseUrl" => "https://www.youtube.com/api/timedtext?v=test&lang=en&fmt=srv3",
29
+ "name" => { "runs" => [{ "text" => "English (auto-generated)" }] },
30
+ "languageCode" => "en-auto",
31
+ "kind" => "asr",
32
+ "isTranslatable" => true
33
+ }
34
+ ],
35
+ "translationLanguages" => [
36
+ { "languageCode" => "fr", "languageName" => { "runs" => [{ "text" => "French" }] } },
37
+ { "languageCode" => "de", "languageName" => { "runs" => [{ "text" => "German" }] } }
38
+ ]
39
+ }
40
+ end
41
+
42
+ describe ".build" do
43
+ it "creates a TranscriptList from captions JSON" do
44
+ list = described_class.build(
45
+ http_client: http_client,
46
+ video_id: video_id,
47
+ captions_json: sample_captions_json
48
+ )
49
+
50
+ expect(list).to be_a(described_class)
51
+ expect(list.video_id).to eq(video_id)
52
+ end
53
+
54
+ it "separates manually created and generated transcripts" do
55
+ list = described_class.build(
56
+ http_client: http_client,
57
+ video_id: video_id,
58
+ captions_json: sample_captions_json
59
+ )
60
+
61
+ # Should have 3 total transcripts (2 manual + 1 generated)
62
+ expect(list.count).to eq(3)
63
+ end
64
+
65
+ it "removes &fmt=srv3 from base URLs" do
66
+ list = described_class.build(
67
+ http_client: http_client,
68
+ video_id: video_id,
69
+ captions_json: sample_captions_json
70
+ )
71
+
72
+ list.each do |transcript|
73
+ # The URL should not contain &fmt=srv3
74
+ expect(transcript.instance_variable_get(:@url)).not_to include("&fmt=srv3")
75
+ end
76
+ end
77
+
78
+ it "handles empty captions JSON" do
79
+ empty_json = { "captionTracks" => [], "translationLanguages" => [] }
80
+ list = described_class.build(
81
+ http_client: http_client,
82
+ video_id: video_id,
83
+ captions_json: empty_json
84
+ )
85
+
86
+ expect(list.count).to eq(0)
87
+ end
88
+
89
+ it "handles missing translationLanguages" do
90
+ json = { "captionTracks" => sample_captions_json["captionTracks"] }
91
+ list = described_class.build(
92
+ http_client: http_client,
93
+ video_id: video_id,
94
+ captions_json: json
95
+ )
96
+
97
+ expect(list.count).to eq(3)
98
+ end
99
+
100
+ it "assigns translation languages only to translatable transcripts" do
101
+ list = described_class.build(
102
+ http_client: http_client,
103
+ video_id: video_id,
104
+ captions_json: sample_captions_json
105
+ )
106
+
107
+ transcripts = list.to_a
108
+ en_transcript = transcripts.find { |t| t.language_code == "en" }
109
+ es_transcript = transcripts.find { |t| t.language_code == "es" }
110
+
111
+ expect(en_transcript.translation_languages).not_to be_empty
112
+ expect(es_transcript.translation_languages).to be_empty
113
+ end
114
+ end
115
+
116
+ describe "#initialize" do
117
+ it "stores the video ID" do
118
+ list = described_class.new(
119
+ video_id: video_id,
120
+ manually_created_transcripts: {},
121
+ generated_transcripts: {},
122
+ translation_languages: []
123
+ )
124
+
125
+ expect(list.video_id).to eq(video_id)
126
+ end
127
+ end
128
+
129
+ describe "Enumerable" do
130
+ let(:list) do
131
+ described_class.build(
132
+ http_client: http_client,
133
+ video_id: video_id,
134
+ captions_json: sample_captions_json
135
+ )
136
+ end
137
+
138
+ it "includes Enumerable" do
139
+ expect(described_class).to include(Enumerable)
140
+ end
141
+
142
+ describe "#each" do
143
+ it "yields each transcript" do
144
+ transcripts = []
145
+ list.each { |t| transcripts << t }
146
+ expect(transcripts.length).to eq(3)
147
+ end
148
+
149
+ it "returns an enumerator when no block given" do
150
+ expect(list.each).to be_a(Enumerator)
151
+ end
152
+
153
+ it "yields manually created transcripts first" do
154
+ transcripts = list.to_a
155
+ # First two should be manually created (en and es)
156
+ expect(transcripts[0].is_generated).to be false
157
+ expect(transcripts[1].is_generated).to be false
158
+ # Last one should be generated (en-auto)
159
+ expect(transcripts[2].is_generated).to be true
160
+ end
161
+ end
162
+
163
+ it "supports #map" do
164
+ codes = list.map(&:language_code)
165
+ expect(codes).to contain_exactly("en", "es", "en-auto")
166
+ end
167
+
168
+ it "supports #count" do
169
+ expect(list.count).to eq(3)
170
+ end
171
+
172
+ it "supports #select" do
173
+ generated = list.select(&:is_generated)
174
+ expect(generated.length).to eq(1)
175
+ end
176
+ end
177
+
178
+ describe "#find_transcript" do
179
+ let(:list) do
180
+ described_class.build(
181
+ http_client: http_client,
182
+ video_id: video_id,
183
+ captions_json: sample_captions_json
184
+ )
185
+ end
186
+
187
+ it "finds a transcript by language code" do
188
+ transcript = list.find_transcript(["en"])
189
+ expect(transcript.language_code).to eq("en")
190
+ expect(transcript.language).to eq("English")
191
+ end
192
+
193
+ it "prefers manually created over generated transcripts" do
194
+ transcript = list.find_transcript(["en"])
195
+ expect(transcript.is_generated).to be false
196
+ end
197
+
198
+ it "tries language codes in order of priority" do
199
+ transcript = list.find_transcript(["ja", "es", "en"])
200
+ expect(transcript.language_code).to eq("es")
201
+ end
202
+
203
+ it "raises NoTranscriptFound when no match" do
204
+ expect {
205
+ list.find_transcript(["ja", "ko", "zh"])
206
+ }.to raise_error(Youtube::Transcript::Rb::NoTranscriptFound)
207
+ end
208
+
209
+ it "includes requested languages in error" do
210
+ begin
211
+ list.find_transcript(["ja", "ko"])
212
+ rescue Youtube::Transcript::Rb::NoTranscriptFound => e
213
+ expect(e.requested_language_codes).to eq(["ja", "ko"])
214
+ end
215
+ end
216
+ end
217
+
218
+ describe "#find_generated_transcript" do
219
+ let(:list) do
220
+ described_class.build(
221
+ http_client: http_client,
222
+ video_id: video_id,
223
+ captions_json: sample_captions_json
224
+ )
225
+ end
226
+
227
+ it "finds only generated transcripts" do
228
+ transcript = list.find_generated_transcript(["en-auto"])
229
+ expect(transcript.language_code).to eq("en-auto")
230
+ expect(transcript.is_generated).to be true
231
+ end
232
+
233
+ it "does not return manually created transcripts" do
234
+ expect {
235
+ list.find_generated_transcript(["en"])
236
+ }.to raise_error(Youtube::Transcript::Rb::NoTranscriptFound)
237
+ end
238
+
239
+ it "raises NoTranscriptFound when no match" do
240
+ expect {
241
+ list.find_generated_transcript(["ja"])
242
+ }.to raise_error(Youtube::Transcript::Rb::NoTranscriptFound)
243
+ end
244
+ end
245
+
246
+ describe "#find_manually_created_transcript" do
247
+ let(:list) do
248
+ described_class.build(
249
+ http_client: http_client,
250
+ video_id: video_id,
251
+ captions_json: sample_captions_json
252
+ )
253
+ end
254
+
255
+ it "finds only manually created transcripts" do
256
+ transcript = list.find_manually_created_transcript(["en"])
257
+ expect(transcript.language_code).to eq("en")
258
+ expect(transcript.is_generated).to be false
259
+ end
260
+
261
+ it "does not return generated transcripts" do
262
+ expect {
263
+ list.find_manually_created_transcript(["en-auto"])
264
+ }.to raise_error(Youtube::Transcript::Rb::NoTranscriptFound)
265
+ end
266
+
267
+ it "tries language codes in order" do
268
+ transcript = list.find_manually_created_transcript(["ja", "es"])
269
+ expect(transcript.language_code).to eq("es")
270
+ end
271
+ end
272
+
273
+ describe "#to_s" do
274
+ let(:list) do
275
+ described_class.build(
276
+ http_client: http_client,
277
+ video_id: video_id,
278
+ captions_json: sample_captions_json
279
+ )
280
+ end
281
+
282
+ it "includes the video ID" do
283
+ expect(list.to_s).to include(video_id)
284
+ end
285
+
286
+ it "includes MANUALLY CREATED section" do
287
+ expect(list.to_s).to include("(MANUALLY CREATED)")
288
+ end
289
+
290
+ it "includes GENERATED section" do
291
+ expect(list.to_s).to include("(GENERATED)")
292
+ end
293
+
294
+ it "includes TRANSLATION LANGUAGES section" do
295
+ expect(list.to_s).to include("(TRANSLATION LANGUAGES)")
296
+ end
297
+
298
+ it "lists manually created transcripts" do
299
+ str = list.to_s
300
+ expect(str).to include("en")
301
+ expect(str).to include("English")
302
+ end
303
+
304
+ it "lists translation languages" do
305
+ str = list.to_s
306
+ expect(str).to include("fr")
307
+ expect(str).to include("French")
308
+ end
309
+
310
+ context "with empty transcript list" do
311
+ let(:empty_list) do
312
+ described_class.new(
313
+ video_id: video_id,
314
+ manually_created_transcripts: {},
315
+ generated_transcripts: {},
316
+ translation_languages: []
317
+ )
318
+ end
319
+
320
+ it "shows None for empty sections" do
321
+ str = empty_list.to_s
322
+ expect(str).to include("None")
323
+ end
324
+ end
325
+ end
326
+
327
+ describe "edge cases" do
328
+ it "handles missing name in caption tracks" do
329
+ json = {
330
+ "captionTracks" => [
331
+ {
332
+ "baseUrl" => "https://example.com",
333
+ "name" => {},
334
+ "languageCode" => "en"
335
+ }
336
+ ]
337
+ }
338
+
339
+ list = described_class.build(
340
+ http_client: http_client,
341
+ video_id: video_id,
342
+ captions_json: json
343
+ )
344
+
345
+ expect(list.count).to eq(1)
346
+ end
347
+
348
+ it "handles nil baseUrl" do
349
+ json = {
350
+ "captionTracks" => [
351
+ {
352
+ "baseUrl" => nil,
353
+ "name" => { "runs" => [{ "text" => "English" }] },
354
+ "languageCode" => "en"
355
+ }
356
+ ]
357
+ }
358
+
359
+ list = described_class.build(
360
+ http_client: http_client,
361
+ video_id: video_id,
362
+ captions_json: json
363
+ )
364
+
365
+ expect(list.count).to eq(1)
366
+ end
367
+
368
+ it "handles missing captionTracks" do
369
+ json = { "translationLanguages" => [] }
370
+
371
+ list = described_class.build(
372
+ http_client: http_client,
373
+ video_id: video_id,
374
+ captions_json: json
375
+ )
376
+
377
+ expect(list.count).to eq(0)
378
+ end
379
+ end
380
+ end