kreuzberg 4.3.5-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.rubocop.yaml +1 -0
- data/.rubocop.yml +543 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +260 -0
- data/README.md +399 -0
- data/Rakefile +34 -0
- data/Steepfile +51 -0
- data/examples/async_patterns.rb +283 -0
- data/extconf.rb +60 -0
- data/kreuzberg.gemspec +253 -0
- data/lib/kreuzberg/api_proxy.rb +125 -0
- data/lib/kreuzberg/cache_api.rb +67 -0
- data/lib/kreuzberg/cli.rb +57 -0
- data/lib/kreuzberg/cli_proxy.rb +118 -0
- data/lib/kreuzberg/config.rb +1241 -0
- data/lib/kreuzberg/djot_content.rb +225 -0
- data/lib/kreuzberg/document_structure.rb +204 -0
- data/lib/kreuzberg/error_context.rb +136 -0
- data/lib/kreuzberg/errors.rb +116 -0
- data/lib/kreuzberg/extraction_api.rb +329 -0
- data/lib/kreuzberg/mcp_proxy.rb +176 -0
- data/lib/kreuzberg/ocr_backend_protocol.rb +40 -0
- data/lib/kreuzberg/post_processor_protocol.rb +15 -0
- data/lib/kreuzberg/result.rb +712 -0
- data/lib/kreuzberg/setup_lib_path.rb +99 -0
- data/lib/kreuzberg/types.rb +414 -0
- data/lib/kreuzberg/validator_protocol.rb +16 -0
- data/lib/kreuzberg/version.rb +5 -0
- data/lib/kreuzberg.rb +102 -0
- data/lib/kreuzberg_rb.so +0 -0
- data/lib/libpdfium.so +0 -0
- data/sig/kreuzberg/internal.rbs +184 -0
- data/sig/kreuzberg.rbs +1337 -0
- data/spec/binding/async_operations_spec.rb +473 -0
- data/spec/binding/batch_operations_spec.rb +677 -0
- data/spec/binding/batch_spec.rb +360 -0
- data/spec/binding/cache_spec.rb +227 -0
- data/spec/binding/cli_proxy_spec.rb +85 -0
- data/spec/binding/cli_spec.rb +55 -0
- data/spec/binding/config_result_spec.rb +377 -0
- data/spec/binding/config_spec.rb +419 -0
- data/spec/binding/config_validation_spec.rb +377 -0
- data/spec/binding/embeddings_spec.rb +816 -0
- data/spec/binding/error_handling_spec.rb +399 -0
- data/spec/binding/error_recovery_spec.rb +488 -0
- data/spec/binding/errors_spec.rb +66 -0
- data/spec/binding/font_config_spec.rb +220 -0
- data/spec/binding/images_spec.rb +732 -0
- data/spec/binding/keywords_extraction_spec.rb +600 -0
- data/spec/binding/metadata_types_spec.rb +1253 -0
- data/spec/binding/pages_extraction_spec.rb +550 -0
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -0
- data/spec/binding/plugins/postprocessor_spec.rb +269 -0
- data/spec/binding/plugins/validator_spec.rb +273 -0
- data/spec/binding/tables_spec.rb +650 -0
- data/spec/fixtures/config.toml +38 -0
- data/spec/fixtures/config.yaml +41 -0
- data/spec/fixtures/invalid_config.toml +3 -0
- data/spec/serialization_spec.rb +134 -0
- data/spec/smoke/package_spec.rb +177 -0
- data/spec/spec_helper.rb +40 -0
- data/spec/unit/config/chunking_config_spec.rb +213 -0
- data/spec/unit/config/embedding_config_spec.rb +343 -0
- data/spec/unit/config/extraction_config_spec.rb +434 -0
- data/spec/unit/config/font_config_spec.rb +285 -0
- data/spec/unit/config/hierarchy_config_spec.rb +314 -0
- data/spec/unit/config/image_extraction_config_spec.rb +209 -0
- data/spec/unit/config/image_preprocessing_config_spec.rb +230 -0
- data/spec/unit/config/keyword_config_spec.rb +229 -0
- data/spec/unit/config/language_detection_config_spec.rb +258 -0
- data/spec/unit/config/ocr_config_spec.rb +171 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/spec/unit/config/page_config_spec.rb +221 -0
- data/spec/unit/config/pdf_config_spec.rb +267 -0
- data/spec/unit/config/postprocessor_config_spec.rb +290 -0
- data/spec/unit/config/tesseract_config_spec.rb +181 -0
- data/spec/unit/config/token_reduction_config_spec.rb +251 -0
- data/test/metadata_types_test.rb +959 -0
- metadata +292 -0
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Kreuzberg::Config::PostProcessor do
|
|
4
|
+
describe '#initialize' do
|
|
5
|
+
it 'creates config with default values' do
|
|
6
|
+
config = described_class.new
|
|
7
|
+
|
|
8
|
+
expect(config.enabled).to be true
|
|
9
|
+
expect(config.enabled_processors).to be_nil
|
|
10
|
+
expect(config.disabled_processors).to be_nil
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it 'creates config with enabled true' do
|
|
14
|
+
config = described_class.new(enabled: true)
|
|
15
|
+
|
|
16
|
+
expect(config.enabled).to be true
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'creates config with enabled false' do
|
|
20
|
+
config = described_class.new(enabled: false)
|
|
21
|
+
|
|
22
|
+
expect(config.enabled).to be false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'creates config with enabled_processors list' do
|
|
26
|
+
config = described_class.new(
|
|
27
|
+
enabled: true,
|
|
28
|
+
enabled_processors: %w[quality formatting]
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
expect(config.enabled_processors).to eq %w[quality formatting]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'creates config with disabled_processors list' do
|
|
35
|
+
config = described_class.new(
|
|
36
|
+
enabled: true,
|
|
37
|
+
disabled_processors: %w[token_reduction]
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
expect(config.disabled_processors).to eq %w[token_reduction]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it 'converts enabled_processors to strings' do
|
|
44
|
+
config = described_class.new(enabled_processors: %i[quality formatting])
|
|
45
|
+
|
|
46
|
+
expect(config.enabled_processors).to eq %w[quality formatting]
|
|
47
|
+
expect(config.enabled_processors.all?(String)).to be true
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it 'converts disabled_processors to strings' do
|
|
51
|
+
config = described_class.new(disabled_processors: [:quality])
|
|
52
|
+
|
|
53
|
+
expect(config.disabled_processors).to eq %w[quality]
|
|
54
|
+
expect(config.disabled_processors.all?(String)).to be true
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it 'converts enabled to boolean' do
|
|
58
|
+
config = described_class.new(enabled: 1)
|
|
59
|
+
|
|
60
|
+
expect(config.enabled).to be true
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
describe '#to_h' do
|
|
65
|
+
it 'serializes to hash with default values' do
|
|
66
|
+
config = described_class.new
|
|
67
|
+
hash = config.to_h
|
|
68
|
+
|
|
69
|
+
expect(hash).to be_a Hash
|
|
70
|
+
expect(hash[:enabled]).to be true
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it 'includes enabled_processors in hash when present' do
|
|
74
|
+
config = described_class.new(enabled_processors: %w[quality])
|
|
75
|
+
hash = config.to_h
|
|
76
|
+
|
|
77
|
+
expect(hash[:enabled_processors]).to eq %w[quality]
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it 'includes disabled_processors in hash when present' do
|
|
81
|
+
config = described_class.new(disabled_processors: %w[token_reduction])
|
|
82
|
+
hash = config.to_h
|
|
83
|
+
|
|
84
|
+
expect(hash[:disabled_processors]).to eq %w[token_reduction]
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it 'compacts nil values from hash' do
|
|
88
|
+
config = described_class.new(enabled: true)
|
|
89
|
+
hash = config.to_h
|
|
90
|
+
|
|
91
|
+
expect(hash.key?(:enabled_processors)).to be false
|
|
92
|
+
expect(hash.key?(:disabled_processors)).to be false
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
describe 'validation' do
|
|
97
|
+
it 'accepts enabled true' do
|
|
98
|
+
expect do
|
|
99
|
+
described_class.new(enabled: true)
|
|
100
|
+
end.not_to raise_error
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
it 'accepts enabled false' do
|
|
104
|
+
expect do
|
|
105
|
+
described_class.new(enabled: false)
|
|
106
|
+
end.not_to raise_error
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it 'accepts enabled_processors list' do
|
|
110
|
+
expect do
|
|
111
|
+
described_class.new(enabled_processors: %w[quality formatting])
|
|
112
|
+
end.not_to raise_error
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
it 'accepts disabled_processors list' do
|
|
116
|
+
expect do
|
|
117
|
+
described_class.new(disabled_processors: %w[token_reduction])
|
|
118
|
+
end.not_to raise_error
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
it 'accepts both enabled and disabled processors' do
|
|
122
|
+
expect do
|
|
123
|
+
described_class.new(
|
|
124
|
+
enabled_processors: %w[quality],
|
|
125
|
+
disabled_processors: %w[formatting]
|
|
126
|
+
)
|
|
127
|
+
end.not_to raise_error
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
describe 'keyword arguments' do
|
|
132
|
+
it 'accepts all keyword arguments' do
|
|
133
|
+
config = described_class.new(
|
|
134
|
+
enabled: true,
|
|
135
|
+
enabled_processors: %w[quality],
|
|
136
|
+
disabled_processors: %w[token_reduction]
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
expect(config.enabled).to be true
|
|
140
|
+
expect(config.enabled_processors).to eq %w[quality]
|
|
141
|
+
expect(config.disabled_processors).to eq %w[token_reduction]
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
describe 'equality' do
|
|
146
|
+
it 'compares configs by value' do
|
|
147
|
+
config1 = described_class.new(
|
|
148
|
+
enabled: true,
|
|
149
|
+
enabled_processors: %w[quality]
|
|
150
|
+
)
|
|
151
|
+
config2 = described_class.new(
|
|
152
|
+
enabled: true,
|
|
153
|
+
enabled_processors: %w[quality]
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
expect(config1.enabled).to eq config2.enabled
|
|
157
|
+
expect(config1.enabled_processors).to eq config2.enabled_processors
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
it 'detects differences in enabled' do
|
|
161
|
+
config1 = described_class.new(enabled: true)
|
|
162
|
+
config2 = described_class.new(enabled: false)
|
|
163
|
+
|
|
164
|
+
expect(config1.enabled).not_to eq config2.enabled
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
it 'detects differences in enabled_processors' do
|
|
168
|
+
config1 = described_class.new(enabled_processors: %w[quality])
|
|
169
|
+
config2 = described_class.new(enabled_processors: %w[formatting])
|
|
170
|
+
|
|
171
|
+
expect(config1.enabled_processors).not_to eq config2.enabled_processors
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
describe 'nested config integration' do
|
|
176
|
+
it 'can be nested in Extraction config' do
|
|
177
|
+
postprocessor = described_class.new(
|
|
178
|
+
enabled: true,
|
|
179
|
+
enabled_processors: %w[quality]
|
|
180
|
+
)
|
|
181
|
+
extraction = Kreuzberg::Config::Extraction.new(postprocessor: postprocessor)
|
|
182
|
+
|
|
183
|
+
expect(extraction.postprocessor).to be_a described_class
|
|
184
|
+
expect(extraction.postprocessor.enabled).to be true
|
|
185
|
+
expect(extraction.postprocessor.enabled_processors).to eq %w[quality]
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
it 'accepts hash in Extraction config' do
|
|
189
|
+
extraction = Kreuzberg::Config::Extraction.new(
|
|
190
|
+
postprocessor: {
|
|
191
|
+
enabled: true,
|
|
192
|
+
enabled_processors: %w[quality formatting]
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
expect(extraction.postprocessor).to be_a described_class
|
|
197
|
+
expect(extraction.postprocessor.enabled).to be true
|
|
198
|
+
expect(extraction.postprocessor.enabled_processors).to eq %w[quality formatting]
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
describe 'symbol vs string key handling' do
|
|
203
|
+
it 'converts symbol enabled_processors to strings' do
|
|
204
|
+
config = described_class.new(enabled_processors: %i[quality formatting])
|
|
205
|
+
|
|
206
|
+
expect(config.enabled_processors).to eq %w[quality formatting]
|
|
207
|
+
expect(config.enabled_processors.all?(String)).to be true
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it 'converts symbol disabled_processors to strings' do
|
|
211
|
+
config = described_class.new(disabled_processors: [:token_reduction])
|
|
212
|
+
|
|
213
|
+
expect(config.disabled_processors).to eq %w[token_reduction]
|
|
214
|
+
expect(config.disabled_processors.all?(String)).to be true
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
describe 'processor lists' do
|
|
219
|
+
it 'stores empty enabled_processors list' do
|
|
220
|
+
config = described_class.new(enabled_processors: [])
|
|
221
|
+
|
|
222
|
+
expect(config.enabled_processors).to eq []
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
it 'stores single enabled_processor' do
|
|
226
|
+
config = described_class.new(enabled_processors: %w[quality])
|
|
227
|
+
|
|
228
|
+
expect(config.enabled_processors).to eq %w[quality]
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
it 'stores multiple enabled_processors' do
|
|
232
|
+
processors = %w[quality formatting cleanup]
|
|
233
|
+
config = described_class.new(enabled_processors: processors)
|
|
234
|
+
|
|
235
|
+
expect(config.enabled_processors).to eq processors
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
it 'stores multiple disabled_processors' do
|
|
239
|
+
processors = %w[token_reduction duplicate_removal]
|
|
240
|
+
config = described_class.new(disabled_processors: processors)
|
|
241
|
+
|
|
242
|
+
expect(config.disabled_processors).to eq processors
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
describe 'boolean conversion' do
|
|
247
|
+
it 'converts truthy enabled to true' do
|
|
248
|
+
config = described_class.new(enabled: 1)
|
|
249
|
+
|
|
250
|
+
expect(config.enabled).to be true
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
it 'converts false enabled to false' do
|
|
254
|
+
config = described_class.new(enabled: false)
|
|
255
|
+
|
|
256
|
+
expect(config.enabled).to be false
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
it 'converts string true to true' do
|
|
260
|
+
config = described_class.new(enabled: 'yes')
|
|
261
|
+
|
|
262
|
+
expect(config.enabled).to be true
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
describe 'default behavior' do
|
|
267
|
+
it 'defaults to enabled' do
|
|
268
|
+
config = described_class.new
|
|
269
|
+
|
|
270
|
+
expect(config.enabled).to be true
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
it 'defaults to no specific processors' do
|
|
274
|
+
config = described_class.new
|
|
275
|
+
|
|
276
|
+
expect(config.enabled_processors).to be_nil
|
|
277
|
+
expect(config.disabled_processors).to be_nil
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
it 'allows disabling while specifying processors' do
|
|
281
|
+
config = described_class.new(
|
|
282
|
+
enabled: false,
|
|
283
|
+
enabled_processors: %w[quality]
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
expect(config.enabled).to be false
|
|
287
|
+
expect(config.enabled_processors).to eq %w[quality]
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
end
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Kreuzberg::Config::Tesseract do
|
|
4
|
+
describe '#initialize' do
|
|
5
|
+
it 'creates config with no arguments' do
|
|
6
|
+
config = described_class.new
|
|
7
|
+
|
|
8
|
+
expect(config).to be_a described_class
|
|
9
|
+
expect(config.options).to eq({})
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it 'creates config with custom options' do
|
|
13
|
+
config = described_class.new(dpi: 300, psm: 3)
|
|
14
|
+
|
|
15
|
+
expect(config.options[:dpi]).to eq 300
|
|
16
|
+
expect(config.options[:psm]).to eq 3
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'converts string keys to symbols' do
|
|
20
|
+
config = described_class.new('oem' => 1, 'lang' => 'eng')
|
|
21
|
+
|
|
22
|
+
expect(config.options[:oem]).to eq 1
|
|
23
|
+
expect(config.options[:lang]).to eq 'eng'
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it 'accepts preprocessing as hash' do
|
|
27
|
+
config = described_class.new(preprocessing: { target_dpi: 300 })
|
|
28
|
+
|
|
29
|
+
expect(config.options[:preprocessing]).to be_a Kreuzberg::Config::ImagePreprocessing
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it 'accepts preprocessing as instance' do
|
|
33
|
+
preprocessing = Kreuzberg::Config::ImagePreprocessing.new(target_dpi: 600)
|
|
34
|
+
config = described_class.new(preprocessing: preprocessing)
|
|
35
|
+
|
|
36
|
+
expect(config.options[:preprocessing]).to be_a Kreuzberg::Config::ImagePreprocessing
|
|
37
|
+
expect(config.options[:preprocessing].target_dpi).to eq 600
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
describe '#to_h' do
|
|
42
|
+
it 'returns options as hash' do
|
|
43
|
+
config = described_class.new(dpi: 300, psm: 3)
|
|
44
|
+
hash = config.to_h
|
|
45
|
+
|
|
46
|
+
expect(hash).to be_a Hash
|
|
47
|
+
expect(hash[:dpi]).to eq 300
|
|
48
|
+
expect(hash[:psm]).to eq 3
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it 'includes nested preprocessing in hash' do
|
|
52
|
+
config = described_class.new(
|
|
53
|
+
preprocessing: { target_dpi: 300, denoise: true }
|
|
54
|
+
)
|
|
55
|
+
hash = config.to_h
|
|
56
|
+
|
|
57
|
+
expect(hash[:preprocessing]).to be_a Kreuzberg::Config::ImagePreprocessing
|
|
58
|
+
# Access the config object's attributes
|
|
59
|
+
expect(hash[:preprocessing].target_dpi).to eq 300
|
|
60
|
+
expect(hash[:preprocessing].denoise).to be true
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it 'returns duplicate hash not original' do
|
|
64
|
+
config = described_class.new(value: 'test')
|
|
65
|
+
hash1 = config.to_h
|
|
66
|
+
hash2 = config.to_h
|
|
67
|
+
|
|
68
|
+
expect(hash1).to eq hash2
|
|
69
|
+
expect(hash1).not_to be hash2
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
describe 'validation' do
|
|
74
|
+
it 'rejects invalid preprocessing type' do
|
|
75
|
+
expect do
|
|
76
|
+
described_class.new(preprocessing: 'invalid')
|
|
77
|
+
end.to raise_error ArgumentError, /preprocessing must be.*ImagePreprocessing.*Hash/
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it 'accepts valid preprocessing hash' do
|
|
81
|
+
expect do
|
|
82
|
+
described_class.new(preprocessing: { target_dpi: 300 })
|
|
83
|
+
end.not_to raise_error
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it 'accepts valid preprocessing instance' do
|
|
87
|
+
preprocessing = Kreuzberg::Config::ImagePreprocessing.new
|
|
88
|
+
expect do
|
|
89
|
+
described_class.new(preprocessing: preprocessing)
|
|
90
|
+
end.not_to raise_error
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
describe 'keyword arguments' do
|
|
95
|
+
it 'accepts arbitrary keyword arguments' do
|
|
96
|
+
config = described_class.new(
|
|
97
|
+
dpi: 300,
|
|
98
|
+
psm: 3,
|
|
99
|
+
oem: 1,
|
|
100
|
+
custom_option: 'value'
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
expect(config.options[:dpi]).to eq 300
|
|
104
|
+
expect(config.options[:psm]).to eq 3
|
|
105
|
+
expect(config.options[:oem]).to eq 1
|
|
106
|
+
expect(config.options[:custom_option]).to eq 'value'
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it 'stores all options with symbol keys' do
|
|
110
|
+
config = described_class.new(foo: 'bar', baz: 42)
|
|
111
|
+
|
|
112
|
+
expect(config.options.keys).to all be_a Symbol
|
|
113
|
+
expect(config.options[:foo]).to eq 'bar'
|
|
114
|
+
expect(config.options[:baz]).to eq 42
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
describe 'equality' do
|
|
119
|
+
it 'compares configs by options value' do
|
|
120
|
+
config1 = described_class.new(dpi: 300, psm: 3)
|
|
121
|
+
config2 = described_class.new(dpi: 300, psm: 3)
|
|
122
|
+
|
|
123
|
+
expect(config1.options).to eq config2.options
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
it 'detects differences in options' do
|
|
127
|
+
config1 = described_class.new(dpi: 300)
|
|
128
|
+
config2 = described_class.new(dpi: 600)
|
|
129
|
+
|
|
130
|
+
expect(config1.options).not_to eq config2.options
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
describe 'nested config integration' do
|
|
135
|
+
it 'can be nested in OCR config' do
|
|
136
|
+
tesseract = described_class.new(dpi: 300, psm: 3)
|
|
137
|
+
ocr = Kreuzberg::Config::OCR.new(tesseract_config: tesseract)
|
|
138
|
+
|
|
139
|
+
expect(ocr.tesseract_config).to be_a described_class
|
|
140
|
+
expect(ocr.tesseract_config.options[:dpi]).to eq 300
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
it 'accepts preprocessing nested in tesseract' do
|
|
144
|
+
preprocessing_data = { target_dpi: 600, denoise: true }
|
|
145
|
+
tesseract = described_class.new(preprocessing: preprocessing_data)
|
|
146
|
+
|
|
147
|
+
expect(tesseract.options[:preprocessing]).to be_a Kreuzberg::Config::ImagePreprocessing
|
|
148
|
+
expect(tesseract.options[:preprocessing].denoise).to be true
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
describe 'symbol vs string key handling' do
|
|
153
|
+
it 'normalizes all keys to symbols' do
|
|
154
|
+
config = described_class.new(
|
|
155
|
+
'string_key' => 'value1',
|
|
156
|
+
symbol_key: 'value2'
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
expect(config.options.keys).to all be_a Symbol
|
|
160
|
+
expect(config.options[:string_key]).to eq 'value1'
|
|
161
|
+
expect(config.options[:symbol_key]).to eq 'value2'
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
it 'preserves string values while converting keys to symbols' do
|
|
165
|
+
config = described_class.new('test_key' => 'test_value')
|
|
166
|
+
|
|
167
|
+
expect(config.options[:test_key]).to eq 'test_value'
|
|
168
|
+
expect(config.options[:test_key]).to be_a String
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
describe 'immutability concerns' do
|
|
173
|
+
it 'stores options but does not freeze them by default' do
|
|
174
|
+
config = described_class.new(value: 'test')
|
|
175
|
+
|
|
176
|
+
# The config itself can be modified by re-assigning instance variables
|
|
177
|
+
# This is a design choice that allows for mutability
|
|
178
|
+
expect(config.options).to respond_to(:merge)
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|