youtube-transcript-rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,240 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+ require "youtube/transcript/rb"
5
+
6
+ RSpec.describe Youtube::Transcript::Rb do
7
+ describe "Error hierarchy" do
8
+ it "has Error as the base class" do
9
+ expect(Youtube::Transcript::Rb::Error).to be < StandardError
10
+ end
11
+
12
+ it "has CouldNotRetrieveTranscript inheriting from Error" do
13
+ expect(Youtube::Transcript::Rb::CouldNotRetrieveTranscript).to be < Youtube::Transcript::Rb::Error
14
+ end
15
+
16
+ describe "error classes inherit from CouldNotRetrieveTranscript" do
17
+ [
18
+ Youtube::Transcript::Rb::YouTubeDataUnparsable,
19
+ Youtube::Transcript::Rb::YouTubeRequestFailed,
20
+ Youtube::Transcript::Rb::VideoUnplayable,
21
+ Youtube::Transcript::Rb::VideoUnavailable,
22
+ Youtube::Transcript::Rb::InvalidVideoId,
23
+ Youtube::Transcript::Rb::RequestBlocked,
24
+ Youtube::Transcript::Rb::IpBlocked,
25
+ Youtube::Transcript::Rb::TooManyRequests,
26
+ Youtube::Transcript::Rb::TranscriptsDisabled,
27
+ Youtube::Transcript::Rb::AgeRestricted,
28
+ Youtube::Transcript::Rb::NotTranslatable,
29
+ Youtube::Transcript::Rb::TranslationLanguageNotAvailable,
30
+ Youtube::Transcript::Rb::FailedToCreateConsentCookie,
31
+ Youtube::Transcript::Rb::NoTranscriptFound,
32
+ Youtube::Transcript::Rb::NoTranscriptAvailable,
33
+ Youtube::Transcript::Rb::PoTokenRequired
34
+ ].each do |error_class|
35
+ it "#{error_class} inherits from CouldNotRetrieveTranscript" do
36
+ expect(error_class).to be < Youtube::Transcript::Rb::CouldNotRetrieveTranscript
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ describe Youtube::Transcript::Rb::CouldNotRetrieveTranscript do
43
+ let(:video_id) { "test_video_123" }
44
+
45
+ it "stores the video_id" do
46
+ # Using a subclass since CouldNotRetrieveTranscript needs CAUSE_MESSAGE
47
+ error = Youtube::Transcript::Rb::VideoUnavailable.new(video_id)
48
+ expect(error.video_id).to eq(video_id)
49
+ end
50
+
51
+ it "includes video URL in error message" do
52
+ error = Youtube::Transcript::Rb::VideoUnavailable.new(video_id)
53
+ expect(error.message).to include("https://www.youtube.com/watch?v=#{video_id}")
54
+ end
55
+
56
+ it "includes cause message in error message" do
57
+ error = Youtube::Transcript::Rb::VideoUnavailable.new(video_id)
58
+ expect(error.message).to include("The video is no longer available")
59
+ end
60
+ end
61
+
62
+ describe Youtube::Transcript::Rb::VideoUnavailable do
63
+ let(:video_id) { "unavailable_video" }
64
+ let(:error) { described_class.new(video_id) }
65
+
66
+ it "has the correct cause message" do
67
+ expect(error.cause_message).to eq("The video is no longer available")
68
+ end
69
+ end
70
+
71
+ describe Youtube::Transcript::Rb::TranscriptsDisabled do
72
+ let(:video_id) { "disabled_video" }
73
+ let(:error) { described_class.new(video_id) }
74
+
75
+ it "has the correct cause message" do
76
+ expect(error.cause_message).to eq("Subtitles are disabled for this video")
77
+ end
78
+ end
79
+
80
+ describe Youtube::Transcript::Rb::TooManyRequests do
81
+ let(:video_id) { "rate_limited" }
82
+ let(:error) { described_class.new(video_id) }
83
+
84
+ it "has the correct cause message" do
85
+ expect(error.cause_message).to include("rate limiting")
86
+ end
87
+ end
88
+
89
+ describe Youtube::Transcript::Rb::PoTokenRequired do
90
+ let(:video_id) { "po_token_video" }
91
+ let(:error) { described_class.new(video_id) }
92
+
93
+ it "has the correct cause message" do
94
+ expect(error.cause_message).to include("PO Token")
95
+ end
96
+ end
97
+
98
+ describe Youtube::Transcript::Rb::InvalidVideoId do
99
+ let(:video_id) { "https://www.youtube.com/watch?v=1234" }
100
+ let(:error) { described_class.new(video_id) }
101
+
102
+ it "includes usage instructions in cause message" do
103
+ expect(error.cause_message).to include("Do NOT run")
104
+ expect(error.cause_message).to include("Instead run")
105
+ end
106
+ end
107
+
108
+ describe Youtube::Transcript::Rb::YouTubeRequestFailed do
109
+ let(:video_id) { "failed_request" }
110
+ let(:http_error) { StandardError.new("Connection refused") }
111
+ let(:error) { described_class.new(video_id, http_error) }
112
+
113
+ it "stores the reason" do
114
+ expect(error.reason).to eq("Connection refused")
115
+ end
116
+
117
+ it "includes the reason in cause message" do
118
+ expect(error.cause_message).to include("Connection refused")
119
+ end
120
+ end
121
+
122
+ describe Youtube::Transcript::Rb::VideoUnplayable do
123
+ let(:video_id) { "unplayable_video" }
124
+
125
+ context "with reason only" do
126
+ let(:error) { described_class.new(video_id, "Video is private") }
127
+
128
+ it "stores the reason" do
129
+ expect(error.reason).to eq("Video is private")
130
+ end
131
+
132
+ it "includes reason in cause message" do
133
+ expect(error.cause_message).to include("Video is private")
134
+ end
135
+ end
136
+
137
+ context "with no reason" do
138
+ let(:error) { described_class.new(video_id) }
139
+
140
+ it "uses default reason text" do
141
+ expect(error.cause_message).to include("No reason specified!")
142
+ end
143
+ end
144
+
145
+ context "with sub_reasons" do
146
+ let(:error) { described_class.new(video_id, "Video is restricted", ["Region blocked", "Age restricted"]) }
147
+
148
+ it "stores sub_reasons" do
149
+ expect(error.sub_reasons).to eq(["Region blocked", "Age restricted"])
150
+ end
151
+
152
+ it "includes sub_reasons in cause message" do
153
+ expect(error.cause_message).to include("Region blocked")
154
+ expect(error.cause_message).to include("Age restricted")
155
+ expect(error.cause_message).to include("Additional Details")
156
+ end
157
+ end
158
+ end
159
+
160
+ describe Youtube::Transcript::Rb::NoTranscriptFound do
161
+ let(:video_id) { "no_transcript" }
162
+ let(:requested_languages) { ["ko", "ja"] }
163
+ let(:transcript_data) { double("TranscriptList", to_s: "Available: en, es") }
164
+ let(:error) { described_class.new(video_id, requested_languages, transcript_data) }
165
+
166
+ it "stores requested_language_codes" do
167
+ expect(error.requested_language_codes).to eq(["ko", "ja"])
168
+ end
169
+
170
+ it "stores transcript_data" do
171
+ expect(error.transcript_data).to eq(transcript_data)
172
+ end
173
+
174
+ it "includes requested languages in cause message" do
175
+ expect(error.cause_message).to include("ko")
176
+ expect(error.cause_message).to include("ja")
177
+ end
178
+
179
+ it "includes transcript data in cause message" do
180
+ expect(error.cause_message).to include("Available: en, es")
181
+ end
182
+ end
183
+
184
+ describe Youtube::Transcript::Rb::RequestBlocked do
185
+ let(:video_id) { "blocked_video" }
186
+ let(:error) { described_class.new(video_id) }
187
+
188
+ it "mentions IP blocking" do
189
+ expect(error.cause_message).to include("YouTube is blocking requests from your IP")
190
+ end
191
+
192
+ it "mentions cloud providers" do
193
+ expect(error.cause_message).to include("cloud provider")
194
+ end
195
+ end
196
+
197
+ describe Youtube::Transcript::Rb::IpBlocked do
198
+ let(:video_id) { "ip_blocked" }
199
+ let(:error) { described_class.new(video_id) }
200
+
201
+ it "inherits from RequestBlocked" do
202
+ expect(described_class).to be < Youtube::Transcript::Rb::RequestBlocked
203
+ end
204
+
205
+ it "mentions IP or proxies as workaround" do
206
+ expect(error.cause_message).to include("IP").or include("proxy")
207
+ end
208
+ end
209
+
210
+ describe Youtube::Transcript::Rb::AgeRestricted do
211
+ let(:video_id) { "age_restricted" }
212
+ let(:error) { described_class.new(video_id) }
213
+
214
+ it "mentions age restriction" do
215
+ expect(error.cause_message).to include("age-restricted")
216
+ end
217
+
218
+ it "mentions authentication limitation" do
219
+ expect(error.cause_message).to include("Cookie Authentication is temporarily unsupported")
220
+ end
221
+ end
222
+
223
+ describe Youtube::Transcript::Rb::NotTranslatable do
224
+ let(:video_id) { "not_translatable" }
225
+ let(:error) { described_class.new(video_id) }
226
+
227
+ it "has the correct cause message" do
228
+ expect(error.cause_message).to include("not translatable")
229
+ end
230
+ end
231
+
232
+ describe Youtube::Transcript::Rb::TranslationLanguageNotAvailable do
233
+ let(:video_id) { "translation_unavailable" }
234
+ let(:error) { described_class.new(video_id) }
235
+
236
+ it "has the correct cause message" do
237
+ expect(error.cause_message).to include("translation language is not available")
238
+ end
239
+ end
240
+ end
@@ -0,0 +1,436 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ RSpec.describe Youtube::Transcript::Rb::Formatters do
6
+ # Helper to create a FetchedTranscript with snippets
7
+ def create_transcript(video_id: "test123", language: "English", language_code: "en", is_generated: false, snippets: nil)
8
+ snippets ||= [
9
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: "Hello world", start: 0.0, duration: 2.5),
10
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: "This is a test", start: 2.5, duration: 3.0),
11
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: "Thank you", start: 5.5, duration: 2.0)
12
+ ]
13
+
14
+ Youtube::Transcript::Rb::FetchedTranscript.new(
15
+ video_id: video_id,
16
+ language: language,
17
+ language_code: language_code,
18
+ is_generated: is_generated,
19
+ snippets: snippets
20
+ )
21
+ end
22
+
23
+ let(:transcript) { create_transcript }
24
+ let(:transcript2) { create_transcript(video_id: "video2", language_code: "es", language: "Spanish") }
25
+ let(:transcripts) { [transcript, transcript2] }
26
+
27
+ describe Youtube::Transcript::Rb::Formatters::Formatter do
28
+ let(:formatter) { described_class.new }
29
+
30
+ describe "#format_transcript" do
31
+ it "raises NotImplementedError" do
32
+ expect { formatter.format_transcript(transcript) }.to raise_error(NotImplementedError)
33
+ end
34
+ end
35
+
36
+ describe "#format_transcripts" do
37
+ it "raises NotImplementedError" do
38
+ expect { formatter.format_transcripts(transcripts) }.to raise_error(NotImplementedError)
39
+ end
40
+ end
41
+ end
42
+
43
+ describe Youtube::Transcript::Rb::Formatters::JSONFormatter do
44
+ let(:formatter) { described_class.new }
45
+
46
+ describe "#format_transcript" do
47
+ it "returns valid JSON" do
48
+ result = formatter.format_transcript(transcript)
49
+ expect { JSON.parse(result) }.not_to raise_error
50
+ end
51
+
52
+ it "contains all snippets" do
53
+ result = formatter.format_transcript(transcript)
54
+ parsed = JSON.parse(result)
55
+ expect(parsed.length).to eq(3)
56
+ end
57
+
58
+ it "includes text, start, and duration for each snippet" do
59
+ result = formatter.format_transcript(transcript)
60
+ parsed = JSON.parse(result)
61
+
62
+ expect(parsed[0]["text"]).to eq("Hello world")
63
+ expect(parsed[0]["start"]).to eq(0.0)
64
+ expect(parsed[0]["duration"]).to eq(2.5)
65
+ end
66
+
67
+ it "supports JSON options" do
68
+ # JSON.generate with indent requires array_nl and object_nl for newlines
69
+ result = formatter.format_transcript(transcript, indent: " ", array_nl: "\n", object_nl: "\n")
70
+ expect(result).to include("\n")
71
+ end
72
+ end
73
+
74
+ describe "#format_transcripts" do
75
+ it "returns valid JSON array" do
76
+ result = formatter.format_transcripts(transcripts)
77
+ parsed = JSON.parse(result)
78
+ expect(parsed).to be_an(Array)
79
+ expect(parsed.length).to eq(2)
80
+ end
81
+
82
+ it "contains all transcripts" do
83
+ result = formatter.format_transcripts(transcripts)
84
+ parsed = JSON.parse(result)
85
+ expect(parsed[0].length).to eq(3)
86
+ expect(parsed[1].length).to eq(3)
87
+ end
88
+ end
89
+ end
90
+
91
+ describe Youtube::Transcript::Rb::Formatters::TextFormatter do
92
+ let(:formatter) { described_class.new }
93
+
94
+ describe "#format_transcript" do
95
+ it "returns plain text with newlines" do
96
+ result = formatter.format_transcript(transcript)
97
+ expect(result).to eq("Hello world\nThis is a test\nThank you")
98
+ end
99
+
100
+ it "contains only text, no timestamps" do
101
+ result = formatter.format_transcript(transcript)
102
+ expect(result).not_to include("0.0")
103
+ expect(result).not_to include("-->")
104
+ end
105
+ end
106
+
107
+ describe "#format_transcripts" do
108
+ it "separates transcripts with triple newlines" do
109
+ result = formatter.format_transcripts(transcripts)
110
+ expect(result).to include("\n\n\n")
111
+ end
112
+
113
+ it "contains all transcript texts" do
114
+ result = formatter.format_transcripts(transcripts)
115
+ expect(result).to include("Hello world")
116
+ expect(result).to include("Thank you")
117
+ end
118
+ end
119
+ end
120
+
121
+ describe Youtube::Transcript::Rb::Formatters::PrettyPrintFormatter do
122
+ let(:formatter) { described_class.new }
123
+
124
+ describe "#format_transcript" do
125
+ it "returns a string" do
126
+ result = formatter.format_transcript(transcript)
127
+ expect(result).to be_a(String)
128
+ end
129
+
130
+ it "contains transcript data" do
131
+ result = formatter.format_transcript(transcript)
132
+ expect(result).to include("Hello world")
133
+ expect(result).to include("text")
134
+ expect(result).to include("start")
135
+ expect(result).to include("duration")
136
+ end
137
+
138
+ it "is formatted with indentation" do
139
+ result = formatter.format_transcript(transcript)
140
+ # PP output typically has newlines for arrays
141
+ expect(result).to include("\n") if transcript.length > 1
142
+ end
143
+
144
+ it "accepts width option" do
145
+ result = formatter.format_transcript(transcript, width: 40)
146
+ expect(result).to be_a(String)
147
+ end
148
+ end
149
+
150
+ describe "#format_transcripts" do
151
+ it "returns a string containing all transcripts" do
152
+ result = formatter.format_transcripts(transcripts)
153
+ expect(result).to be_a(String)
154
+ expect(result).to include("Hello world")
155
+ end
156
+ end
157
+ end
158
+
159
+ describe Youtube::Transcript::Rb::Formatters::SRTFormatter do
160
+ let(:formatter) { described_class.new }
161
+
162
+ describe "#format_transcript" do
163
+ let(:result) { formatter.format_transcript(transcript) }
164
+
165
+ it "includes sequence numbers starting from 1" do
166
+ expect(result).to include("1\n")
167
+ expect(result).to include("2\n")
168
+ expect(result).to include("3\n")
169
+ end
170
+
171
+ it "uses comma as millisecond separator" do
172
+ expect(result).to include(",")
173
+ expect(result).not_to match(/\d{2}:\d{2}:\d{2}\.\d{3}/)
174
+ end
175
+
176
+ it "formats timestamps correctly" do
177
+ expect(result).to include("00:00:00,000 --> 00:00:02,500")
178
+ expect(result).to include("00:00:02,500 --> 00:00:05,500")
179
+ end
180
+
181
+ it "includes the text content" do
182
+ expect(result).to include("Hello world")
183
+ expect(result).to include("This is a test")
184
+ expect(result).to include("Thank you")
185
+ end
186
+
187
+ it "separates entries with blank lines" do
188
+ expect(result).to include("\n\n")
189
+ end
190
+
191
+ it "ends with a newline" do
192
+ expect(result).to end_with("\n")
193
+ end
194
+
195
+ it "follows SRT format structure" do
196
+ lines = result.split("\n\n")
197
+ first_entry = lines[0].split("\n")
198
+
199
+ expect(first_entry[0]).to eq("1")
200
+ expect(first_entry[1]).to match(/\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/)
201
+ expect(first_entry[2]).to eq("Hello world")
202
+ end
203
+ end
204
+
205
+ describe "timestamp edge cases" do
206
+ it "handles hours correctly" do
207
+ snippets = [
208
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: "Long video", start: 3661.5, duration: 2.0)
209
+ ]
210
+ transcript = create_transcript(snippets: snippets)
211
+ result = formatter.format_transcript(transcript)
212
+
213
+ expect(result).to include("01:01:01,500")
214
+ end
215
+
216
+ it "handles overlapping timestamps" do
217
+ snippets = [
218
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: "First", start: 0.0, duration: 5.0),
219
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: "Second", start: 2.0, duration: 3.0)
220
+ ]
221
+ transcript = create_transcript(snippets: snippets)
222
+ result = formatter.format_transcript(transcript)
223
+
224
+ # First snippet should end at second snippet's start
225
+ expect(result).to include("00:00:00,000 --> 00:00:02,000")
226
+ end
227
+ end
228
+ end
229
+
230
+ describe Youtube::Transcript::Rb::Formatters::WebVTTFormatter do
231
+ let(:formatter) { described_class.new }
232
+
233
+ describe "#format_transcript" do
234
+ let(:result) { formatter.format_transcript(transcript) }
235
+
236
+ it "starts with WEBVTT header" do
237
+ expect(result).to start_with("WEBVTT\n\n")
238
+ end
239
+
240
+ it "uses period as millisecond separator" do
241
+ expect(result).to match(/\d{2}:\d{2}:\d{2}\.\d{3}/)
242
+ expect(result).not_to match(/\d{2}:\d{2}:\d{2},\d{3}/)
243
+ end
244
+
245
+ it "formats timestamps correctly" do
246
+ expect(result).to include("00:00:00.000 --> 00:00:02.500")
247
+ expect(result).to include("00:00:02.500 --> 00:00:05.500")
248
+ end
249
+
250
+ it "does not include sequence numbers" do
251
+ lines = result.split("\n")
252
+ # Skip WEBVTT header
253
+ timestamp_lines = lines.select { |l| l.include?("-->") }
254
+ timestamp_lines.each_with_index do |line, i|
255
+ prev_line = lines[lines.index(line) - 1]
256
+ # Previous line should be empty or WEBVTT, not a number
257
+ expect(prev_line).not_to match(/^\d+$/)
258
+ end
259
+ end
260
+
261
+ it "includes the text content" do
262
+ expect(result).to include("Hello world")
263
+ expect(result).to include("This is a test")
264
+ expect(result).to include("Thank you")
265
+ end
266
+
267
+ it "ends with a newline" do
268
+ expect(result).to end_with("\n")
269
+ end
270
+ end
271
+
272
+ describe "timestamp edge cases" do
273
+ it "handles hours correctly" do
274
+ snippets = [
275
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: "Long video", start: 3661.5, duration: 2.0)
276
+ ]
277
+ transcript = create_transcript(snippets: snippets)
278
+ result = formatter.format_transcript(transcript)
279
+
280
+ expect(result).to include("01:01:01.500")
281
+ end
282
+ end
283
+ end
284
+
285
+ describe Youtube::Transcript::Rb::Formatters::FormatterLoader do
286
+ let(:loader) { described_class.new }
287
+
288
+ describe "#load" do
289
+ it "loads JSONFormatter for 'json'" do
290
+ formatter = loader.load("json")
291
+ expect(formatter).to be_a(Youtube::Transcript::Rb::Formatters::JSONFormatter)
292
+ end
293
+
294
+ it "loads TextFormatter for 'text'" do
295
+ formatter = loader.load("text")
296
+ expect(formatter).to be_a(Youtube::Transcript::Rb::Formatters::TextFormatter)
297
+ end
298
+
299
+ it "loads PrettyPrintFormatter for 'pretty'" do
300
+ formatter = loader.load("pretty")
301
+ expect(formatter).to be_a(Youtube::Transcript::Rb::Formatters::PrettyPrintFormatter)
302
+ end
303
+
304
+ it "loads SRTFormatter for 'srt'" do
305
+ formatter = loader.load("srt")
306
+ expect(formatter).to be_a(Youtube::Transcript::Rb::Formatters::SRTFormatter)
307
+ end
308
+
309
+ it "loads WebVTTFormatter for 'webvtt'" do
310
+ formatter = loader.load("webvtt")
311
+ expect(formatter).to be_a(Youtube::Transcript::Rb::Formatters::WebVTTFormatter)
312
+ end
313
+
314
+ it "defaults to PrettyPrintFormatter" do
315
+ formatter = loader.load
316
+ expect(formatter).to be_a(Youtube::Transcript::Rb::Formatters::PrettyPrintFormatter)
317
+ end
318
+
319
+ it "accepts symbol as formatter type" do
320
+ formatter = loader.load(:json)
321
+ expect(formatter).to be_a(Youtube::Transcript::Rb::Formatters::JSONFormatter)
322
+ end
323
+
324
+ it "raises UnknownFormatterType for invalid type" do
325
+ expect { loader.load("invalid") }.to raise_error(
326
+ Youtube::Transcript::Rb::Formatters::FormatterLoader::UnknownFormatterType
327
+ )
328
+ end
329
+
330
+ it "includes available formats in error message" do
331
+ begin
332
+ loader.load("invalid")
333
+ rescue Youtube::Transcript::Rb::Formatters::FormatterLoader::UnknownFormatterType => e
334
+ expect(e.message).to include("json")
335
+ expect(e.message).to include("text")
336
+ expect(e.message).to include("srt")
337
+ expect(e.message).to include("webvtt")
338
+ expect(e.message).to include("pretty")
339
+ end
340
+ end
341
+ end
342
+
343
+ describe "TYPES constant" do
344
+ it "contains all expected formatter types" do
345
+ expect(described_class::TYPES.keys).to contain_exactly("json", "pretty", "text", "webvtt", "srt")
346
+ end
347
+
348
+ it "is frozen" do
349
+ expect(described_class::TYPES).to be_frozen
350
+ end
351
+ end
352
+ end
353
+
354
+ describe "integration tests" do
355
+ let(:loader) { Youtube::Transcript::Rb::Formatters::FormatterLoader.new }
356
+
357
+ it "can format transcript with each formatter type" do
358
+ %w[json text pretty srt webvtt].each do |type|
359
+ formatter = loader.load(type)
360
+ result = formatter.format_transcript(transcript)
361
+ expect(result).to be_a(String)
362
+ expect(result.length).to be > 0
363
+ end
364
+ end
365
+
366
+ it "can format multiple transcripts with each formatter type" do
367
+ %w[json text pretty].each do |type|
368
+ formatter = loader.load(type)
369
+ result = formatter.format_transcripts(transcripts)
370
+ expect(result).to be_a(String)
371
+ expect(result.length).to be > 0
372
+ end
373
+ end
374
+ end
375
+
376
+ describe "empty transcript handling" do
377
+ let(:empty_snippets) { [] }
378
+ let(:empty_transcript) { create_transcript(snippets: empty_snippets) }
379
+
380
+ it "JSONFormatter handles empty transcript" do
381
+ formatter = Youtube::Transcript::Rb::Formatters::JSONFormatter.new
382
+ result = formatter.format_transcript(empty_transcript)
383
+ expect(JSON.parse(result)).to eq([])
384
+ end
385
+
386
+ it "TextFormatter handles empty transcript" do
387
+ formatter = Youtube::Transcript::Rb::Formatters::TextFormatter.new
388
+ result = formatter.format_transcript(empty_transcript)
389
+ expect(result).to eq("")
390
+ end
391
+
392
+ it "SRTFormatter handles empty transcript" do
393
+ formatter = Youtube::Transcript::Rb::Formatters::SRTFormatter.new
394
+ result = formatter.format_transcript(empty_transcript)
395
+ expect(result).to eq("\n")
396
+ end
397
+
398
+ it "WebVTTFormatter handles empty transcript" do
399
+ formatter = Youtube::Transcript::Rb::Formatters::WebVTTFormatter.new
400
+ result = formatter.format_transcript(empty_transcript)
401
+ expect(result).to eq("WEBVTT\n\n\n")
402
+ end
403
+ end
404
+
405
+ describe "special character handling" do
406
+ let(:special_snippets) do
407
+ [
408
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: "Hello <b>world</b>", start: 0.0, duration: 2.0),
409
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: 'Quote: "test"', start: 2.0, duration: 2.0),
410
+ Youtube::Transcript::Rb::TranscriptSnippet.new(text: "Line1\nLine2", start: 4.0, duration: 2.0)
411
+ ]
412
+ end
413
+ let(:special_transcript) { create_transcript(snippets: special_snippets) }
414
+
415
+ it "JSONFormatter escapes special characters" do
416
+ formatter = Youtube::Transcript::Rb::Formatters::JSONFormatter.new
417
+ result = formatter.format_transcript(special_transcript)
418
+ parsed = JSON.parse(result)
419
+ expect(parsed[0]["text"]).to eq("Hello <b>world</b>")
420
+ expect(parsed[1]["text"]).to eq('Quote: "test"')
421
+ end
422
+
423
+ it "TextFormatter preserves special characters" do
424
+ formatter = Youtube::Transcript::Rb::Formatters::TextFormatter.new
425
+ result = formatter.format_transcript(special_transcript)
426
+ expect(result).to include("<b>world</b>")
427
+ expect(result).to include('"test"')
428
+ end
429
+
430
+ it "SRTFormatter preserves HTML tags in text" do
431
+ formatter = Youtube::Transcript::Rb::Formatters::SRTFormatter.new
432
+ result = formatter.format_transcript(special_transcript)
433
+ expect(result).to include("<b>world</b>")
434
+ end
435
+ end
436
+ end