kreuzberg 4.3.5-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.rubocop.yaml +1 -0
- data/.rubocop.yml +543 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +260 -0
- data/README.md +399 -0
- data/Rakefile +34 -0
- data/Steepfile +51 -0
- data/examples/async_patterns.rb +283 -0
- data/extconf.rb +60 -0
- data/kreuzberg.gemspec +253 -0
- data/lib/kreuzberg/api_proxy.rb +125 -0
- data/lib/kreuzberg/cache_api.rb +67 -0
- data/lib/kreuzberg/cli.rb +57 -0
- data/lib/kreuzberg/cli_proxy.rb +118 -0
- data/lib/kreuzberg/config.rb +1241 -0
- data/lib/kreuzberg/djot_content.rb +225 -0
- data/lib/kreuzberg/document_structure.rb +204 -0
- data/lib/kreuzberg/error_context.rb +136 -0
- data/lib/kreuzberg/errors.rb +116 -0
- data/lib/kreuzberg/extraction_api.rb +329 -0
- data/lib/kreuzberg/mcp_proxy.rb +176 -0
- data/lib/kreuzberg/ocr_backend_protocol.rb +40 -0
- data/lib/kreuzberg/post_processor_protocol.rb +15 -0
- data/lib/kreuzberg/result.rb +712 -0
- data/lib/kreuzberg/setup_lib_path.rb +99 -0
- data/lib/kreuzberg/types.rb +414 -0
- data/lib/kreuzberg/validator_protocol.rb +16 -0
- data/lib/kreuzberg/version.rb +5 -0
- data/lib/kreuzberg.rb +102 -0
- data/lib/kreuzberg_rb.so +0 -0
- data/lib/libpdfium.so +0 -0
- data/sig/kreuzberg/internal.rbs +184 -0
- data/sig/kreuzberg.rbs +1337 -0
- data/spec/binding/async_operations_spec.rb +473 -0
- data/spec/binding/batch_operations_spec.rb +677 -0
- data/spec/binding/batch_spec.rb +360 -0
- data/spec/binding/cache_spec.rb +227 -0
- data/spec/binding/cli_proxy_spec.rb +85 -0
- data/spec/binding/cli_spec.rb +55 -0
- data/spec/binding/config_result_spec.rb +377 -0
- data/spec/binding/config_spec.rb +419 -0
- data/spec/binding/config_validation_spec.rb +377 -0
- data/spec/binding/embeddings_spec.rb +816 -0
- data/spec/binding/error_handling_spec.rb +399 -0
- data/spec/binding/error_recovery_spec.rb +488 -0
- data/spec/binding/errors_spec.rb +66 -0
- data/spec/binding/font_config_spec.rb +220 -0
- data/spec/binding/images_spec.rb +732 -0
- data/spec/binding/keywords_extraction_spec.rb +600 -0
- data/spec/binding/metadata_types_spec.rb +1253 -0
- data/spec/binding/pages_extraction_spec.rb +550 -0
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -0
- data/spec/binding/plugins/postprocessor_spec.rb +269 -0
- data/spec/binding/plugins/validator_spec.rb +273 -0
- data/spec/binding/tables_spec.rb +650 -0
- data/spec/fixtures/config.toml +38 -0
- data/spec/fixtures/config.yaml +41 -0
- data/spec/fixtures/invalid_config.toml +3 -0
- data/spec/serialization_spec.rb +134 -0
- data/spec/smoke/package_spec.rb +177 -0
- data/spec/spec_helper.rb +40 -0
- data/spec/unit/config/chunking_config_spec.rb +213 -0
- data/spec/unit/config/embedding_config_spec.rb +343 -0
- data/spec/unit/config/extraction_config_spec.rb +434 -0
- data/spec/unit/config/font_config_spec.rb +285 -0
- data/spec/unit/config/hierarchy_config_spec.rb +314 -0
- data/spec/unit/config/image_extraction_config_spec.rb +209 -0
- data/spec/unit/config/image_preprocessing_config_spec.rb +230 -0
- data/spec/unit/config/keyword_config_spec.rb +229 -0
- data/spec/unit/config/language_detection_config_spec.rb +258 -0
- data/spec/unit/config/ocr_config_spec.rb +171 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/spec/unit/config/page_config_spec.rb +221 -0
- data/spec/unit/config/pdf_config_spec.rb +267 -0
- data/spec/unit/config/postprocessor_config_spec.rb +290 -0
- data/spec/unit/config/tesseract_config_spec.rb +181 -0
- data/spec/unit/config/token_reduction_config_spec.rb +251 -0
- data/test/metadata_types_test.rb +959 -0
- metadata +292 -0
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Kreuzberg::Config::Hierarchy do
|
|
4
|
+
describe '#initialize' do
|
|
5
|
+
it 'creates config with default values' do
|
|
6
|
+
config = described_class.new
|
|
7
|
+
|
|
8
|
+
expect(config.enabled).to be true
|
|
9
|
+
expect(config.k_clusters).to eq 6
|
|
10
|
+
expect(config.include_bbox).to be true
|
|
11
|
+
expect(config.ocr_coverage_threshold).to be_nil
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it 'creates config with custom values' do
|
|
15
|
+
config = described_class.new(
|
|
16
|
+
enabled: false,
|
|
17
|
+
k_clusters: 10,
|
|
18
|
+
include_bbox: false,
|
|
19
|
+
ocr_coverage_threshold: 0.95
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
expect(config.enabled).to be false
|
|
23
|
+
expect(config.k_clusters).to eq 10
|
|
24
|
+
expect(config.include_bbox).to be false
|
|
25
|
+
expect(config.ocr_coverage_threshold).to eq 0.95
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it 'converts k_clusters to integer' do
|
|
29
|
+
config = described_class.new(k_clusters: '8')
|
|
30
|
+
|
|
31
|
+
expect(config.k_clusters).to eq 8
|
|
32
|
+
expect(config.k_clusters).to be_a Integer
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'converts enabled to boolean' do
|
|
36
|
+
config = described_class.new(enabled: 1)
|
|
37
|
+
|
|
38
|
+
expect(config.enabled).to be true
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it 'converts include_bbox to boolean' do
|
|
42
|
+
config = described_class.new(include_bbox: false)
|
|
43
|
+
|
|
44
|
+
expect(config.include_bbox).to be false
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it 'converts ocr_coverage_threshold to float' do
|
|
48
|
+
config = described_class.new(ocr_coverage_threshold: '0.85')
|
|
49
|
+
|
|
50
|
+
expect(config.ocr_coverage_threshold).to eq 0.85
|
|
51
|
+
expect(config.ocr_coverage_threshold).to be_a Float
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
describe '#to_h' do
|
|
56
|
+
it 'serializes to hash with all values' do
|
|
57
|
+
config = described_class.new(
|
|
58
|
+
enabled: true,
|
|
59
|
+
k_clusters: 8,
|
|
60
|
+
include_bbox: true
|
|
61
|
+
)
|
|
62
|
+
hash = config.to_h
|
|
63
|
+
|
|
64
|
+
expect(hash).to be_a Hash
|
|
65
|
+
expect(hash[:enabled]).to be true
|
|
66
|
+
expect(hash[:k_clusters]).to eq 8
|
|
67
|
+
expect(hash[:include_bbox]).to be true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it 'includes ocr_coverage_threshold when present' do
|
|
71
|
+
config = described_class.new(ocr_coverage_threshold: 0.9)
|
|
72
|
+
hash = config.to_h
|
|
73
|
+
|
|
74
|
+
expect(hash[:ocr_coverage_threshold]).to eq 0.9
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it 'compacts nil values from hash' do
|
|
78
|
+
config = described_class.new(enabled: true)
|
|
79
|
+
hash = config.to_h
|
|
80
|
+
|
|
81
|
+
expect(hash.key?(:ocr_coverage_threshold)).to be false
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
describe '.from_h' do
|
|
86
|
+
it 'creates from hash' do
|
|
87
|
+
hash = { enabled: true, k_clusters: 8 }
|
|
88
|
+
config = described_class.from_h(hash)
|
|
89
|
+
|
|
90
|
+
expect(config).to be_a described_class
|
|
91
|
+
expect(config.enabled).to be true
|
|
92
|
+
expect(config.k_clusters).to eq 8
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it 'returns nil for nil input' do
|
|
96
|
+
config = described_class.from_h(nil)
|
|
97
|
+
|
|
98
|
+
expect(config).to be_nil
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it 'returns instance as-is' do
|
|
102
|
+
original = described_class.new(k_clusters: 10)
|
|
103
|
+
config = described_class.from_h(original)
|
|
104
|
+
|
|
105
|
+
expect(config).to be original
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
it 'converts symbol keys in hash' do
|
|
109
|
+
hash = { 'enabled' => true, 'k_clusters' => 8 }
|
|
110
|
+
config = described_class.from_h(hash)
|
|
111
|
+
|
|
112
|
+
expect(config.enabled).to be true
|
|
113
|
+
expect(config.k_clusters).to eq 8
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
describe 'validation' do
|
|
118
|
+
it 'accepts valid k_clusters' do
|
|
119
|
+
expect do
|
|
120
|
+
described_class.new(k_clusters: 5)
|
|
121
|
+
end.not_to raise_error
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it 'accepts valid ocr_coverage_threshold' do
|
|
125
|
+
expect do
|
|
126
|
+
described_class.new(ocr_coverage_threshold: 0.8)
|
|
127
|
+
end.not_to raise_error
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it 'accepts enabled true' do
|
|
131
|
+
expect do
|
|
132
|
+
described_class.new(enabled: true)
|
|
133
|
+
end.not_to raise_error
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
describe 'keyword arguments' do
|
|
138
|
+
it 'accepts all keyword arguments' do
|
|
139
|
+
config = described_class.new(
|
|
140
|
+
enabled: false,
|
|
141
|
+
k_clusters: 12,
|
|
142
|
+
include_bbox: false,
|
|
143
|
+
ocr_coverage_threshold: 0.75
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
expect(config.enabled).to be false
|
|
147
|
+
expect(config.k_clusters).to eq 12
|
|
148
|
+
expect(config.include_bbox).to be false
|
|
149
|
+
expect(config.ocr_coverage_threshold).to eq 0.75
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
describe 'equality' do
|
|
154
|
+
it 'compares configs by value' do
|
|
155
|
+
config1 = described_class.new(
|
|
156
|
+
enabled: true,
|
|
157
|
+
k_clusters: 8
|
|
158
|
+
)
|
|
159
|
+
config2 = described_class.new(
|
|
160
|
+
enabled: true,
|
|
161
|
+
k_clusters: 8
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
expect(config1.enabled).to eq config2.enabled
|
|
165
|
+
expect(config1.k_clusters).to eq config2.k_clusters
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
it 'detects differences in enabled' do
|
|
169
|
+
config1 = described_class.new(enabled: true)
|
|
170
|
+
config2 = described_class.new(enabled: false)
|
|
171
|
+
|
|
172
|
+
expect(config1.enabled).not_to eq config2.enabled
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
it 'detects differences in k_clusters' do
|
|
176
|
+
config1 = described_class.new(k_clusters: 6)
|
|
177
|
+
config2 = described_class.new(k_clusters: 10)
|
|
178
|
+
|
|
179
|
+
expect(config1.k_clusters).not_to eq config2.k_clusters
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
it 'detects differences in ocr_coverage_threshold' do
|
|
183
|
+
config1 = described_class.new(ocr_coverage_threshold: 0.8)
|
|
184
|
+
config2 = described_class.new(ocr_coverage_threshold: 0.9)
|
|
185
|
+
|
|
186
|
+
expect(config1.ocr_coverage_threshold).not_to eq config2.ocr_coverage_threshold
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
describe 'nested config integration' do
|
|
191
|
+
it 'can be nested in PDF config' do
|
|
192
|
+
hierarchy = described_class.new(k_clusters: 8, enabled: true)
|
|
193
|
+
pdf = Kreuzberg::Config::PDF.new(hierarchy: hierarchy)
|
|
194
|
+
|
|
195
|
+
expect(pdf.hierarchy).to be_a described_class
|
|
196
|
+
expect(pdf.hierarchy.k_clusters).to eq 8
|
|
197
|
+
expect(pdf.hierarchy.enabled).to be true
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
it 'accepts hash in PDF config' do
|
|
201
|
+
pdf = Kreuzberg::Config::PDF.new(
|
|
202
|
+
hierarchy: { enabled: true, k_clusters: 10 }
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
expect(pdf.hierarchy).to be_a described_class
|
|
206
|
+
expect(pdf.hierarchy.enabled).to be true
|
|
207
|
+
expect(pdf.hierarchy.k_clusters).to eq 10
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it 'can be nested in Extraction config via PDF' do
|
|
211
|
+
extraction = Kreuzberg::Config::Extraction.new(
|
|
212
|
+
pdf_options: { hierarchy: { k_clusters: 8 } }
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
expect(extraction.pdf_options.hierarchy).to be_a described_class
|
|
216
|
+
expect(extraction.pdf_options.hierarchy.k_clusters).to eq 8
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
describe 'symbol vs string key handling' do
|
|
221
|
+
it 'converts symbol enabled to boolean' do
|
|
222
|
+
config = described_class.new(enabled: true)
|
|
223
|
+
|
|
224
|
+
expect(config.enabled).to be true
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
it 'converts k_clusters string to integer' do
|
|
228
|
+
config = described_class.new(k_clusters: '12')
|
|
229
|
+
|
|
230
|
+
expect(config.k_clusters).to eq 12
|
|
231
|
+
expect(config.k_clusters).to be_a Integer
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
describe 'boolean conversion' do
|
|
236
|
+
it 'converts truthy enabled to true' do
|
|
237
|
+
config = described_class.new(enabled: 1)
|
|
238
|
+
|
|
239
|
+
expect(config.enabled).to be true
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
it 'converts false enabled to false' do
|
|
243
|
+
config = described_class.new(enabled: false)
|
|
244
|
+
|
|
245
|
+
expect(config.enabled).to be false
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
it 'converts truthy include_bbox to true' do
|
|
249
|
+
config = described_class.new(include_bbox: 'yes')
|
|
250
|
+
|
|
251
|
+
expect(config.include_bbox).to be true
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
it 'converts false include_bbox to false' do
|
|
255
|
+
config = described_class.new(include_bbox: false)
|
|
256
|
+
|
|
257
|
+
expect(config.include_bbox).to be false
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
describe 'k_clusters parameter' do
|
|
262
|
+
it 'accepts small k_clusters' do
|
|
263
|
+
config = described_class.new(k_clusters: 3)
|
|
264
|
+
|
|
265
|
+
expect(config.k_clusters).to eq 3
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
it 'accepts large k_clusters' do
|
|
269
|
+
config = described_class.new(k_clusters: 20)
|
|
270
|
+
|
|
271
|
+
expect(config.k_clusters).to eq 20
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
it 'defaults to 6 clusters' do
|
|
275
|
+
config = described_class.new
|
|
276
|
+
|
|
277
|
+
expect(config.k_clusters).to eq 6
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
it 'converts string k_clusters to integer' do
|
|
281
|
+
config = described_class.new(k_clusters: '15')
|
|
282
|
+
|
|
283
|
+
expect(config.k_clusters).to eq 15
|
|
284
|
+
expect(config.k_clusters).to be_a Integer
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
describe 'ocr_coverage_threshold' do
|
|
289
|
+
it 'accepts high threshold values' do
|
|
290
|
+
config = described_class.new(ocr_coverage_threshold: 0.95)
|
|
291
|
+
|
|
292
|
+
expect(config.ocr_coverage_threshold).to eq 0.95
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
it 'accepts low threshold values' do
|
|
296
|
+
config = described_class.new(ocr_coverage_threshold: 0.1)
|
|
297
|
+
|
|
298
|
+
expect(config.ocr_coverage_threshold).to eq 0.1
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
it 'accepts nil for threshold' do
|
|
302
|
+
config = described_class.new(ocr_coverage_threshold: nil)
|
|
303
|
+
|
|
304
|
+
expect(config.ocr_coverage_threshold).to be_nil
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
it 'converts string threshold to float' do
|
|
308
|
+
config = described_class.new(ocr_coverage_threshold: '0.85')
|
|
309
|
+
|
|
310
|
+
expect(config.ocr_coverage_threshold).to eq 0.85
|
|
311
|
+
expect(config.ocr_coverage_threshold).to be_a Float
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Kreuzberg::Config::ImageExtraction do
|
|
4
|
+
describe '#initialize' do
|
|
5
|
+
it 'creates config with default values' do
|
|
6
|
+
config = described_class.new
|
|
7
|
+
|
|
8
|
+
expect(config.extract_images).to be true
|
|
9
|
+
expect(config.target_dpi).to eq 300
|
|
10
|
+
expect(config.max_image_dimension).to eq 2000
|
|
11
|
+
expect(config.auto_adjust_dpi).to be true
|
|
12
|
+
expect(config.min_dpi).to eq 150
|
|
13
|
+
expect(config.max_dpi).to eq 600
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it 'creates config with custom values' do
|
|
17
|
+
config = described_class.new(
|
|
18
|
+
extract_images: false,
|
|
19
|
+
target_dpi: 600,
|
|
20
|
+
max_image_dimension: 4000,
|
|
21
|
+
auto_adjust_dpi: false,
|
|
22
|
+
min_dpi: 100,
|
|
23
|
+
max_dpi: 1200
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
expect(config.extract_images).to be false
|
|
27
|
+
expect(config.target_dpi).to eq 600
|
|
28
|
+
expect(config.max_image_dimension).to eq 4000
|
|
29
|
+
expect(config.auto_adjust_dpi).to be false
|
|
30
|
+
expect(config.min_dpi).to eq 100
|
|
31
|
+
expect(config.max_dpi).to eq 1200
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'converts values to integers' do
|
|
35
|
+
config = described_class.new(
|
|
36
|
+
target_dpi: '300',
|
|
37
|
+
max_image_dimension: '2000',
|
|
38
|
+
min_dpi: '150',
|
|
39
|
+
max_dpi: '600'
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
expect(config.target_dpi).to eq 300
|
|
43
|
+
expect(config.max_image_dimension).to eq 2000
|
|
44
|
+
expect(config.min_dpi).to eq 150
|
|
45
|
+
expect(config.max_dpi).to eq 600
|
|
46
|
+
expect(config.target_dpi).to be_a Integer
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it 'converts boolean values correctly' do
|
|
50
|
+
config = described_class.new(
|
|
51
|
+
extract_images: true,
|
|
52
|
+
auto_adjust_dpi: false
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
expect(config.extract_images).to be true
|
|
56
|
+
expect(config.auto_adjust_dpi).to be false
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
describe '#to_h' do
|
|
61
|
+
it 'serializes to hash with all values' do
|
|
62
|
+
config = described_class.new(
|
|
63
|
+
target_dpi: 300,
|
|
64
|
+
max_image_dimension: 2000
|
|
65
|
+
)
|
|
66
|
+
hash = config.to_h
|
|
67
|
+
|
|
68
|
+
expect(hash).to be_a Hash
|
|
69
|
+
expect(hash[:extract_images]).to be true
|
|
70
|
+
expect(hash[:target_dpi]).to eq 300
|
|
71
|
+
expect(hash[:max_image_dimension]).to eq 2000
|
|
72
|
+
expect(hash[:auto_adjust_dpi]).to be true
|
|
73
|
+
expect(hash[:min_dpi]).to eq 150
|
|
74
|
+
expect(hash[:max_dpi]).to eq 600
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it 'always includes all keys in hash' do
|
|
78
|
+
config = described_class.new
|
|
79
|
+
hash = config.to_h
|
|
80
|
+
|
|
81
|
+
expect(hash.keys).to contain_exactly(
|
|
82
|
+
:extract_images,
|
|
83
|
+
:target_dpi,
|
|
84
|
+
:max_image_dimension,
|
|
85
|
+
:auto_adjust_dpi,
|
|
86
|
+
:min_dpi,
|
|
87
|
+
:max_dpi
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
describe 'validation' do
|
|
93
|
+
it 'accepts valid DPI values' do
|
|
94
|
+
expect do
|
|
95
|
+
described_class.new(target_dpi: 300, min_dpi: 150, max_dpi: 600)
|
|
96
|
+
end.not_to raise_error
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
it 'accepts valid image dimensions' do
|
|
100
|
+
expect do
|
|
101
|
+
described_class.new(max_image_dimension: 4000)
|
|
102
|
+
end.not_to raise_error
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it 'converts float DPI to integer' do
|
|
106
|
+
config = described_class.new(target_dpi: 300.5)
|
|
107
|
+
|
|
108
|
+
expect(config.target_dpi).to eq 300
|
|
109
|
+
expect(config.target_dpi).to be_a Integer
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
describe 'keyword arguments' do
|
|
114
|
+
it 'accepts all keyword arguments' do
|
|
115
|
+
config = described_class.new(
|
|
116
|
+
extract_images: true,
|
|
117
|
+
target_dpi: 600,
|
|
118
|
+
max_image_dimension: 3000,
|
|
119
|
+
auto_adjust_dpi: true,
|
|
120
|
+
min_dpi: 200,
|
|
121
|
+
max_dpi: 800
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
expect(config.extract_images).to be true
|
|
125
|
+
expect(config.target_dpi).to eq 600
|
|
126
|
+
expect(config.max_image_dimension).to eq 3000
|
|
127
|
+
expect(config.auto_adjust_dpi).to be true
|
|
128
|
+
expect(config.min_dpi).to eq 200
|
|
129
|
+
expect(config.max_dpi).to eq 800
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
describe 'equality' do
|
|
134
|
+
it 'compares configs by value' do
|
|
135
|
+
config1 = described_class.new(target_dpi: 300, max_image_dimension: 2000)
|
|
136
|
+
config2 = described_class.new(target_dpi: 300, max_image_dimension: 2000)
|
|
137
|
+
|
|
138
|
+
expect(config1.target_dpi).to eq config2.target_dpi
|
|
139
|
+
expect(config1.max_image_dimension).to eq config2.max_image_dimension
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it 'detects differences in DPI' do
|
|
143
|
+
config1 = described_class.new(target_dpi: 300)
|
|
144
|
+
config2 = described_class.new(target_dpi: 600)
|
|
145
|
+
|
|
146
|
+
expect(config1.target_dpi).not_to eq config2.target_dpi
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
it 'detects differences in extract_images' do
|
|
150
|
+
config1 = described_class.new(extract_images: true)
|
|
151
|
+
config2 = described_class.new(extract_images: false)
|
|
152
|
+
|
|
153
|
+
expect(config1.extract_images).not_to eq config2.extract_images
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
describe 'nested config integration' do
|
|
158
|
+
it 'can be nested in Extraction config' do
|
|
159
|
+
image_config = described_class.new(target_dpi: 600)
|
|
160
|
+
extraction = Kreuzberg::Config::Extraction.new(image_extraction: image_config)
|
|
161
|
+
|
|
162
|
+
expect(extraction.image_extraction).to be_a described_class
|
|
163
|
+
expect(extraction.image_extraction.target_dpi).to eq 600
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
it 'accepts hash in Extraction config' do
|
|
167
|
+
extraction = Kreuzberg::Config::Extraction.new(
|
|
168
|
+
image_extraction: { target_dpi: 600, extract_images: true }
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
expect(extraction.image_extraction).to be_a described_class
|
|
172
|
+
expect(extraction.image_extraction.target_dpi).to eq 600
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
describe 'DPI range' do
|
|
177
|
+
it 'allows realistic DPI values' do
|
|
178
|
+
config = described_class.new(min_dpi: 150, max_dpi: 1200)
|
|
179
|
+
|
|
180
|
+
expect(config.min_dpi).to eq 150
|
|
181
|
+
expect(config.max_dpi).to eq 1200
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
it 'maintains DPI relationships' do
|
|
185
|
+
config = described_class.new(
|
|
186
|
+
target_dpi: 300,
|
|
187
|
+
min_dpi: 100,
|
|
188
|
+
max_dpi: 600
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
expect(config.min_dpi).to be <= config.target_dpi
|
|
192
|
+
expect(config.target_dpi).to be <= config.max_dpi
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
describe 'image dimension constraints' do
|
|
197
|
+
it 'accepts large image dimensions' do
|
|
198
|
+
config = described_class.new(max_image_dimension: 10_000)
|
|
199
|
+
|
|
200
|
+
expect(config.max_image_dimension).to eq 10_000
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
it 'accepts small image dimensions' do
|
|
204
|
+
config = described_class.new(max_image_dimension: 100)
|
|
205
|
+
|
|
206
|
+
expect(config.max_image_dimension).to eq 100
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|