wp2txt 1.1.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.dockerignore +12 -0
  3. data/.github/workflows/ci.yml +13 -13
  4. data/.gitignore +14 -0
  5. data/CHANGELOG.md +284 -0
  6. data/DEVELOPMENT.md +415 -0
  7. data/DEVELOPMENT_ja.md +415 -0
  8. data/Dockerfile +19 -10
  9. data/Gemfile +2 -8
  10. data/README.md +259 -123
  11. data/README_ja.md +375 -0
  12. data/Rakefile +4 -0
  13. data/bin/wp2txt +863 -161
  14. data/lib/wp2txt/article.rb +98 -13
  15. data/lib/wp2txt/bz2_validator.rb +239 -0
  16. data/lib/wp2txt/category_cache.rb +313 -0
  17. data/lib/wp2txt/cli.rb +319 -0
  18. data/lib/wp2txt/cli_ui.rb +428 -0
  19. data/lib/wp2txt/config.rb +158 -0
  20. data/lib/wp2txt/constants.rb +134 -0
  21. data/lib/wp2txt/data/html_entities.json +2135 -0
  22. data/lib/wp2txt/data/language_metadata.json +4769 -0
  23. data/lib/wp2txt/data/language_tiers.json +59 -0
  24. data/lib/wp2txt/data/mediawiki_aliases.json +12366 -0
  25. data/lib/wp2txt/data/template_aliases.json +193 -0
  26. data/lib/wp2txt/data/wikipedia_entities.json +12 -0
  27. data/lib/wp2txt/extractor.rb +545 -0
  28. data/lib/wp2txt/file_utils.rb +91 -0
  29. data/lib/wp2txt/formatter.rb +352 -0
  30. data/lib/wp2txt/global_data_cache.rb +353 -0
  31. data/lib/wp2txt/index_cache.rb +258 -0
  32. data/lib/wp2txt/magic_words.rb +353 -0
  33. data/lib/wp2txt/memory_monitor.rb +236 -0
  34. data/lib/wp2txt/multistream.rb +1383 -0
  35. data/lib/wp2txt/output_writer.rb +182 -0
  36. data/lib/wp2txt/parser_functions.rb +606 -0
  37. data/lib/wp2txt/ractor_worker.rb +215 -0
  38. data/lib/wp2txt/regex.rb +396 -12
  39. data/lib/wp2txt/section_extractor.rb +354 -0
  40. data/lib/wp2txt/stream_processor.rb +271 -0
  41. data/lib/wp2txt/template_expander.rb +830 -0
  42. data/lib/wp2txt/text_processing.rb +337 -0
  43. data/lib/wp2txt/utils.rb +629 -270
  44. data/lib/wp2txt/version.rb +1 -1
  45. data/lib/wp2txt.rb +53 -26
  46. data/scripts/benchmark_regex.rb +161 -0
  47. data/scripts/fetch_html_entities.rb +94 -0
  48. data/scripts/fetch_language_metadata.rb +180 -0
  49. data/scripts/fetch_mediawiki_data.rb +334 -0
  50. data/scripts/fetch_template_data.rb +186 -0
  51. data/scripts/profile_memory.rb +139 -0
  52. data/spec/article_spec.rb +402 -0
  53. data/spec/auto_download_spec.rb +314 -0
  54. data/spec/bz2_validator_spec.rb +193 -0
  55. data/spec/category_cache_spec.rb +226 -0
  56. data/spec/category_fetcher_spec.rb +504 -0
  57. data/spec/cleanup_spec.rb +197 -0
  58. data/spec/cli_options_spec.rb +678 -0
  59. data/spec/cli_spec.rb +876 -0
  60. data/spec/config_spec.rb +194 -0
  61. data/spec/constants_spec.rb +138 -0
  62. data/spec/file_utils_spec.rb +170 -0
  63. data/spec/fixtures/samples.rb +181 -0
  64. data/spec/formatter_sections_spec.rb +382 -0
  65. data/spec/global_data_cache_spec.rb +186 -0
  66. data/spec/index_cache_spec.rb +210 -0
  67. data/spec/integration_spec.rb +543 -0
  68. data/spec/magic_words_spec.rb +261 -0
  69. data/spec/markers_spec.rb +476 -0
  70. data/spec/memory_monitor_spec.rb +192 -0
  71. data/spec/multistream_spec.rb +690 -0
  72. data/spec/output_writer_spec.rb +400 -0
  73. data/spec/parser_functions_spec.rb +455 -0
  74. data/spec/ractor_worker_spec.rb +197 -0
  75. data/spec/regex_spec.rb +281 -0
  76. data/spec/section_extractor_spec.rb +397 -0
  77. data/spec/spec_helper.rb +63 -0
  78. data/spec/stream_processor_spec.rb +579 -0
  79. data/spec/template_data_spec.rb +246 -0
  80. data/spec/template_expander_spec.rb +472 -0
  81. data/spec/template_processing_spec.rb +217 -0
  82. data/spec/text_processing_spec.rb +312 -0
  83. data/spec/utils_spec.rb +195 -16
  84. data/spec/wp2txt_spec.rb +510 -0
  85. data/wp2txt.gemspec +5 -3
  86. metadata +146 -18
  87. data/.rubocop.yml +0 -80
  88. data/data/output_samples/testdata_en.txt +0 -23002
  89. data/data/output_samples/testdata_en_category.txt +0 -132
  90. data/data/output_samples/testdata_en_summary.txt +0 -1376
  91. data/data/output_samples/testdata_ja.txt +0 -22774
  92. data/data/output_samples/testdata_ja_category.txt +0 -206
  93. data/data/output_samples/testdata_ja_summary.txt +0 -1560
  94. data/data/testdata_en.bz2 +0 -0
  95. data/data/testdata_ja.bz2 +0 -0
  96. data/image/screenshot.png +0 -0
@@ -0,0 +1,314 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "spec_helper"
4
+ require "tmpdir"
5
+ require_relative "../lib/wp2txt/multistream"
6
+ require_relative "../lib/wp2txt/cli"
7
+
8
+ RSpec.describe "Wp2txt Auto Download" do
9
+ include Wp2txt
10
+
11
+ describe "DumpManager" do
12
+ let(:cache_dir) { File.join(Dir.tmpdir, "wp2txt_test_cache_#{Process.pid}") }
13
+
14
+ after do
15
+ FileUtils.rm_rf(cache_dir) if File.exist?(cache_dir)
16
+ end
17
+
18
+ describe ".default_cache_dir" do
19
+ it "returns ~/.wp2txt/cache by default" do
20
+ expect(Wp2txt::DumpManager.default_cache_dir).to eq(File.expand_path("~/.wp2txt/cache"))
21
+ end
22
+ end
23
+
24
+ describe "#initialize" do
25
+ it "accepts custom cache directory" do
26
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir)
27
+ expect(manager.cache_dir).to eq(cache_dir)
28
+ end
29
+
30
+ it "uses default cache directory when not specified" do
31
+ manager = Wp2txt::DumpManager.new(:ja)
32
+ expect(manager.cache_dir).to eq(Wp2txt::DumpManager.default_cache_dir)
33
+ end
34
+ end
35
+
36
+ describe "#cache_status" do
37
+ it "returns status hash with expected keys" do
38
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir)
39
+ # Stub the dump date to avoid network call
40
+ allow(manager).to receive(:latest_dump_date).and_return("20260101")
41
+
42
+ status = manager.cache_status
43
+ expect(status).to have_key(:lang)
44
+ expect(status).to have_key(:cache_dir)
45
+ expect(status).to have_key(:index_exists)
46
+ expect(status).to have_key(:multistream_exists)
47
+ expect(status).to have_key(:age_days)
48
+ expect(status).to have_key(:mtime)
49
+ expect(status).to have_key(:expiry_days)
50
+ expect(status[:lang]).to eq(:ja)
51
+ end
52
+
53
+ it "returns false for index_exists when cache is empty" do
54
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir)
55
+ allow(manager).to receive(:latest_dump_date).and_return("20260101")
56
+
57
+ status = manager.cache_status
58
+ expect(status[:index_exists]).to be false
59
+ expect(status[:multistream_exists]).to be false
60
+ end
61
+ end
62
+
63
+ describe "#cache_age_days" do
64
+ it "returns nil when cache does not exist" do
65
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir)
66
+ allow(manager).to receive(:latest_dump_date).and_return("20260101")
67
+
68
+ expect(manager.cache_age_days).to be_nil
69
+ end
70
+
71
+ it "returns age in days when cache exists" do
72
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir)
73
+ allow(manager).to receive(:latest_dump_date).and_return("20260101")
74
+
75
+ # Create a fake index file
76
+ index_path = manager.cached_index_path
77
+ FileUtils.mkdir_p(File.dirname(index_path))
78
+ File.write(index_path, "test")
79
+
80
+ age = manager.cache_age_days
81
+ expect(age).to be_a(Float)
82
+ expect(age).to be >= 0
83
+ expect(age).to be < 1 # Just created
84
+ end
85
+ end
86
+
87
+ describe "#cache_mtime" do
88
+ it "returns nil when cache does not exist" do
89
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir)
90
+ allow(manager).to receive(:latest_dump_date).and_return("20260101")
91
+
92
+ expect(manager.cache_mtime).to be_nil
93
+ end
94
+
95
+ it "returns modification time when cache exists" do
96
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir)
97
+ allow(manager).to receive(:latest_dump_date).and_return("20260101")
98
+
99
+ # Create a fake index file
100
+ index_path = manager.cached_index_path
101
+ FileUtils.mkdir_p(File.dirname(index_path))
102
+ File.write(index_path, "test")
103
+
104
+ mtime = manager.cache_mtime
105
+ expect(mtime).to be_a(Time)
106
+ end
107
+ end
108
+
109
+ describe "#cache_stale?" do
110
+ it "returns true when cache does not exist" do
111
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir)
112
+ allow(manager).to receive(:latest_dump_date).and_return("20260101")
113
+
114
+ expect(manager.cache_stale?).to be true
115
+ end
116
+
117
+ it "returns false when cache is fresh" do
118
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir, dump_expiry_days: 30)
119
+ allow(manager).to receive(:latest_dump_date).and_return("20260101")
120
+
121
+ # Create a fake index file (just created = fresh)
122
+ index_path = manager.cached_index_path
123
+ FileUtils.mkdir_p(File.dirname(index_path))
124
+ File.write(index_path, "test")
125
+
126
+ expect(manager.cache_stale?).to be false
127
+ end
128
+
129
+ it "returns true when cache is older than expiry days" do
130
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir, dump_expiry_days: 1)
131
+ allow(manager).to receive(:latest_dump_date).and_return("20260101")
132
+
133
+ # Create a fake index file
134
+ index_path = manager.cached_index_path
135
+ FileUtils.mkdir_p(File.dirname(index_path))
136
+ File.write(index_path, "test")
137
+
138
+ # Set modification time to 2 days ago
139
+ old_time = Time.now - (2 * 86400)
140
+ File.utime(old_time, old_time, index_path)
141
+
142
+ expect(manager.cache_stale?).to be true
143
+ end
144
+ end
145
+
146
+ describe "#clear_cache!" do
147
+ it "removes language-specific cache directory" do
148
+ manager = Wp2txt::DumpManager.new(:ja, cache_dir: cache_dir)
149
+ lang_dir = File.join(cache_dir, "jawiki")
150
+ FileUtils.mkdir_p(lang_dir)
151
+ File.write(File.join(lang_dir, "test.txt"), "test")
152
+
153
+ expect(File.exist?(lang_dir)).to be true
154
+ manager.clear_cache!
155
+ expect(File.exist?(lang_dir)).to be false
156
+ end
157
+ end
158
+
159
+ describe ".clear_all_cache!" do
160
+ it "removes entire cache directory" do
161
+ FileUtils.mkdir_p(File.join(cache_dir, "jawiki"))
162
+ FileUtils.mkdir_p(File.join(cache_dir, "enwiki"))
163
+ File.write(File.join(cache_dir, "jawiki", "test.txt"), "test")
164
+
165
+ Wp2txt::DumpManager.clear_all_cache!(cache_dir)
166
+ expect(File.exist?(cache_dir)).to be false
167
+ end
168
+ end
169
+ end
170
+
171
+ describe "Wp2txt::CLI" do
172
+ describe ".valid_language_code?" do
173
+ it "accepts valid 2-letter codes" do
174
+ expect(Wp2txt::CLI.valid_language_code?("ja")).to be true
175
+ expect(Wp2txt::CLI.valid_language_code?("en")).to be true
176
+ expect(Wp2txt::CLI.valid_language_code?("zh")).to be true
177
+ expect(Wp2txt::CLI.valid_language_code?("de")).to be true
178
+ end
179
+
180
+ it "accepts valid longer codes" do
181
+ expect(Wp2txt::CLI.valid_language_code?("simple")).to be true
182
+ end
183
+
184
+ it "accepts hyphenated codes" do
185
+ expect(Wp2txt::CLI.valid_language_code?("zh-yue")).to be true
186
+ end
187
+
188
+ it "rejects invalid codes" do
189
+ expect(Wp2txt::CLI.valid_language_code?("INVALID")).to be false
190
+ expect(Wp2txt::CLI.valid_language_code?("123")).to be false
191
+ expect(Wp2txt::CLI.valid_language_code?("")).to be false
192
+ expect(Wp2txt::CLI.valid_language_code?(nil)).to be false
193
+ end
194
+ end
195
+
196
+ describe ".default_cache_dir" do
197
+ it "returns ~/.wp2txt/cache" do
198
+ expect(Wp2txt::CLI.default_cache_dir).to eq(File.expand_path("~/.wp2txt/cache"))
199
+ end
200
+ end
201
+
202
+ describe ".parse_options" do
203
+ let(:cache_dir) { File.join(Dir.tmpdir, "wp2txt_cli_test_#{Process.pid}") }
204
+
205
+ before do
206
+ FileUtils.mkdir_p(cache_dir)
207
+ end
208
+
209
+ after do
210
+ FileUtils.rm_rf(cache_dir) if File.exist?(cache_dir)
211
+ end
212
+
213
+ it "accepts --lang option" do
214
+ opts = Wp2txt::CLI.parse_options(["--lang=ja", "--cache-dir=#{cache_dir}"])
215
+ expect(opts[:lang]).to eq("ja")
216
+ end
217
+
218
+ it "accepts --cache-dir option" do
219
+ opts = Wp2txt::CLI.parse_options(["--lang=ja", "--cache-dir=#{cache_dir}"])
220
+ expect(opts[:cache_dir]).to eq(cache_dir)
221
+ end
222
+
223
+ it "accepts --cache-status option" do
224
+ opts = Wp2txt::CLI.parse_options(["--cache-status", "--cache-dir=#{cache_dir}"])
225
+ expect(opts[:cache_status]).to be true
226
+ end
227
+
228
+ it "accepts --cache-clear option" do
229
+ opts = Wp2txt::CLI.parse_options(["--cache-clear", "--cache-dir=#{cache_dir}"])
230
+ expect(opts[:cache_clear]).to be true
231
+ end
232
+
233
+ it "allows --cache-status without --input or --lang" do
234
+ opts = Wp2txt::CLI.parse_options(["--cache-status", "--cache-dir=#{cache_dir}"])
235
+ expect(opts[:cache_status]).to be true
236
+ expect(opts[:input]).to be_nil
237
+ expect(opts[:lang]).to be_nil
238
+ end
239
+
240
+ it "allows --cache-clear without --input or --lang" do
241
+ opts = Wp2txt::CLI.parse_options(["--cache-clear", "--cache-dir=#{cache_dir}"])
242
+ expect(opts[:cache_clear]).to be true
243
+ end
244
+
245
+ context "input source validation" do
246
+ it "requires either --input or --lang for normal operation" do
247
+ suppress_stderr do
248
+ expect { Wp2txt::CLI.parse_options(["--output-dir=#{cache_dir}"]) }.to raise_error(SystemExit)
249
+ end
250
+ end
251
+
252
+ it "rejects both --input and --lang together" do
253
+ # Create a dummy input file
254
+ input_file = File.join(cache_dir, "test.xml")
255
+ File.write(input_file, "<test/>")
256
+
257
+ suppress_stderr do
258
+ expect { Wp2txt::CLI.parse_options(["--input=#{input_file}", "--lang=ja"]) }.to raise_error(SystemExit)
259
+ end
260
+ end
261
+ end
262
+
263
+ context "--articles option" do
264
+ it "accepts --articles with --lang" do
265
+ opts = Wp2txt::CLI.parse_options(["--lang=ja", "--articles=認知言語学"])
266
+ expect(opts[:articles]).to eq("認知言語学")
267
+ end
268
+
269
+ it "accepts multiple articles separated by comma" do
270
+ opts = Wp2txt::CLI.parse_options(["--lang=ja", "--articles=認知言語学,生成文法,言語学"])
271
+ expect(opts[:articles]).to eq("認知言語学,生成文法,言語学")
272
+ end
273
+
274
+ it "requires --lang when --articles is specified" do
275
+ suppress_stderr do
276
+ expect { Wp2txt::CLI.parse_options(["--articles=Test"]) }.to raise_error(SystemExit)
277
+ end
278
+ end
279
+
280
+ it "rejects --articles with --input" do
281
+ input_file = File.join(cache_dir, "test.xml")
282
+ File.write(input_file, "<test/>")
283
+ suppress_stderr do
284
+ expect { Wp2txt::CLI.parse_options(["--input=#{input_file}", "--articles=Test"]) }.to raise_error(SystemExit)
285
+ end
286
+ end
287
+ end
288
+ end
289
+ end
290
+
291
+ describe "Article extraction" do
292
+ describe "Wp2txt::CLI.parse_article_list" do
293
+ it "parses single article" do
294
+ articles = Wp2txt::CLI.parse_article_list("認知言語学")
295
+ expect(articles).to eq(["認知言語学"])
296
+ end
297
+
298
+ it "parses multiple articles" do
299
+ articles = Wp2txt::CLI.parse_article_list("認知言語学,生成文法,言語学")
300
+ expect(articles).to eq(["認知言語学", "生成文法", "言語学"])
301
+ end
302
+
303
+ it "trims whitespace" do
304
+ articles = Wp2txt::CLI.parse_article_list(" 認知言語学 , 生成文法 ")
305
+ expect(articles).to eq(["認知言語学", "生成文法"])
306
+ end
307
+
308
+ it "returns empty array for nil" do
309
+ articles = Wp2txt::CLI.parse_article_list(nil)
310
+ expect(articles).to eq([])
311
+ end
312
+ end
313
+ end
314
+ end
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+ require "tempfile"
5
+ require "fileutils"
6
+ require "wp2txt/bz2_validator"
7
+
8
+ RSpec.describe Wp2txt::Bz2Validator do
9
+ let(:temp_dir) { Dir.mktmpdir }
10
+
11
+ after { FileUtils.rm_rf(temp_dir) }
12
+
13
+ describe ".validate" do
14
+ context "with non-existent file" do
15
+ it "returns not_found error" do
16
+ result = described_class.validate("/nonexistent/file.bz2")
17
+ expect(result.valid?).to be false
18
+ expect(result.error_type).to eq(:not_found)
19
+ end
20
+ end
21
+
22
+ context "with too small file" do
23
+ let(:small_file) { File.join(temp_dir, "small.bz2") }
24
+
25
+ before { File.write(small_file, "BZh9") }
26
+
27
+ it "returns too_small error" do
28
+ result = described_class.validate(small_file)
29
+ expect(result.valid?).to be false
30
+ expect(result.error_type).to eq(:too_small)
31
+ end
32
+ end
33
+
34
+ context "with invalid magic bytes" do
35
+ let(:invalid_file) { File.join(temp_dir, "invalid.bz2") }
36
+
37
+ before { File.write(invalid_file, "XX" + ("x" * 100)) }
38
+
39
+ it "returns invalid_magic error" do
40
+ result = described_class.validate(invalid_file)
41
+ expect(result.valid?).to be false
42
+ expect(result.error_type).to eq(:invalid_magic)
43
+ end
44
+ end
45
+
46
+ context "with invalid version byte" do
47
+ let(:invalid_version) { File.join(temp_dir, "bad_version.bz2") }
48
+
49
+ before { File.write(invalid_version, "BZx9" + ("x" * 100)) }
50
+
51
+ it "returns invalid_version error" do
52
+ result = described_class.validate(invalid_version)
53
+ expect(result.valid?).to be false
54
+ expect(result.error_type).to eq(:invalid_version)
55
+ end
56
+ end
57
+
58
+ context "with invalid block size" do
59
+ let(:invalid_block) { File.join(temp_dir, "bad_block.bz2") }
60
+
61
+ before { File.write(invalid_block, "BZh0" + ("x" * 100)) }
62
+
63
+ it "returns invalid_block_size error" do
64
+ result = described_class.validate(invalid_block)
65
+ expect(result.valid?).to be false
66
+ expect(result.error_type).to eq(:invalid_block_size)
67
+ end
68
+ end
69
+ end
70
+
71
+ describe ".validate_quick" do
72
+ context "with valid header" do
73
+ let(:valid_header_file) { File.join(temp_dir, "valid_header.bz2") }
74
+
75
+ before { File.write(valid_header_file, "BZh9" + ("x" * 100)) }
76
+
77
+ it "returns valid for correct header" do
78
+ result = described_class.validate_quick(valid_header_file)
79
+ expect(result.valid?).to be true
80
+ end
81
+ end
82
+
83
+ context "with invalid header" do
84
+ let(:invalid_file) { File.join(temp_dir, "invalid.bz2") }
85
+
86
+ before { File.write(invalid_file, "XXXX" + ("x" * 100)) }
87
+
88
+ it "returns invalid" do
89
+ result = described_class.validate_quick(invalid_file)
90
+ expect(result.valid?).to be false
91
+ end
92
+ end
93
+ end
94
+
95
+ describe ".validate_magic_bytes" do
96
+ context "with valid bz2 header" do
97
+ let(:valid_file) { File.join(temp_dir, "valid.bz2") }
98
+
99
+ before { File.write(valid_file, "BZh9" + ("x" * 100)) }
100
+
101
+ it "returns valid result" do
102
+ result = described_class.validate_magic_bytes(valid_file)
103
+ expect(result.valid?).to be true
104
+ expect(result.details[:version]).to eq("h")
105
+ expect(result.details[:block_size]).to eq(9)
106
+ end
107
+ end
108
+
109
+ context "with different block sizes" do
110
+ (1..9).each do |size|
111
+ it "accepts block size #{size}" do
112
+ file = File.join(temp_dir, "block#{size}.bz2")
113
+ File.write(file, "BZh#{size}" + ("x" * 100))
114
+ result = described_class.validate_magic_bytes(file)
115
+ expect(result.valid?).to be true
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ describe ".find_bzip2_command" do
122
+ it "returns a string path or nil" do
123
+ result = described_class.find_bzip2_command
124
+ expect(result.nil? || result.is_a?(String)).to be true
125
+ end
126
+ end
127
+
128
+ describe ".file_info" do
129
+ context "with valid bz2 header" do
130
+ let(:valid_file) { File.join(temp_dir, "info_test.bz2") }
131
+
132
+ before { File.write(valid_file, "BZh5" + ("data" * 100)) }
133
+
134
+ it "returns file information hash" do
135
+ info = described_class.file_info(valid_file)
136
+ expect(info).to be_a(Hash)
137
+ expect(info[:path]).to eq(valid_file)
138
+ expect(info[:size]).to be > 0
139
+ expect(info[:valid_header]).to be true
140
+ expect(info[:version]).to eq("h")
141
+ expect(info[:block_size]).to eq(5)
142
+ expect(info[:mtime]).to be_a(Time)
143
+ end
144
+ end
145
+
146
+ context "with non-existent file" do
147
+ it "returns nil" do
148
+ info = described_class.file_info("/nonexistent/file.bz2")
149
+ expect(info).to be_nil
150
+ end
151
+ end
152
+ end
153
+
154
+ describe "ValidationResult" do
155
+ describe "#valid?" do
156
+ it "returns true when valid is true" do
157
+ result = described_class::ValidationResult.new(valid: true)
158
+ expect(result.valid?).to be true
159
+ end
160
+
161
+ it "returns false when valid is false" do
162
+ result = described_class::ValidationResult.new(valid: false)
163
+ expect(result.valid?).to be false
164
+ end
165
+ end
166
+
167
+ describe "#to_s" do
168
+ it "returns success message for valid result" do
169
+ result = described_class::ValidationResult.new(valid: true)
170
+ expect(result.to_s).to eq("Valid bz2 file")
171
+ end
172
+
173
+ it "returns error message for invalid result" do
174
+ result = described_class::ValidationResult.new(valid: false, message: "Test error")
175
+ expect(result.to_s).to eq("Invalid: Test error")
176
+ end
177
+ end
178
+ end
179
+
180
+ describe "constants" do
181
+ it "has correct BZ2 magic bytes" do
182
+ expect(described_class::BZ2_MAGIC).to eq("BZ")
183
+ end
184
+
185
+ it "has correct BZ2 version" do
186
+ expect(described_class::BZ2_VERSION).to eq("h")
187
+ end
188
+
189
+ it "has valid block size range" do
190
+ expect(described_class::BZ2_BLOCK_SIZES).to eq(("1".."9").to_a)
191
+ end
192
+ end
193
+ end