kreuzberg 4.3.5-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yaml +1 -0
  5. data/.rubocop.yml +543 -0
  6. data/Gemfile +8 -0
  7. data/Gemfile.lock +260 -0
  8. data/README.md +399 -0
  9. data/Rakefile +34 -0
  10. data/Steepfile +51 -0
  11. data/examples/async_patterns.rb +283 -0
  12. data/extconf.rb +60 -0
  13. data/kreuzberg.gemspec +253 -0
  14. data/lib/kreuzberg/api_proxy.rb +125 -0
  15. data/lib/kreuzberg/cache_api.rb +67 -0
  16. data/lib/kreuzberg/cli.rb +57 -0
  17. data/lib/kreuzberg/cli_proxy.rb +118 -0
  18. data/lib/kreuzberg/config.rb +1241 -0
  19. data/lib/kreuzberg/djot_content.rb +225 -0
  20. data/lib/kreuzberg/document_structure.rb +204 -0
  21. data/lib/kreuzberg/error_context.rb +136 -0
  22. data/lib/kreuzberg/errors.rb +116 -0
  23. data/lib/kreuzberg/extraction_api.rb +329 -0
  24. data/lib/kreuzberg/mcp_proxy.rb +176 -0
  25. data/lib/kreuzberg/ocr_backend_protocol.rb +40 -0
  26. data/lib/kreuzberg/post_processor_protocol.rb +15 -0
  27. data/lib/kreuzberg/result.rb +712 -0
  28. data/lib/kreuzberg/setup_lib_path.rb +99 -0
  29. data/lib/kreuzberg/types.rb +414 -0
  30. data/lib/kreuzberg/validator_protocol.rb +16 -0
  31. data/lib/kreuzberg/version.rb +5 -0
  32. data/lib/kreuzberg.rb +102 -0
  33. data/lib/kreuzberg_rb.so +0 -0
  34. data/lib/libpdfium.so +0 -0
  35. data/sig/kreuzberg/internal.rbs +184 -0
  36. data/sig/kreuzberg.rbs +1337 -0
  37. data/spec/binding/async_operations_spec.rb +473 -0
  38. data/spec/binding/batch_operations_spec.rb +677 -0
  39. data/spec/binding/batch_spec.rb +360 -0
  40. data/spec/binding/cache_spec.rb +227 -0
  41. data/spec/binding/cli_proxy_spec.rb +85 -0
  42. data/spec/binding/cli_spec.rb +55 -0
  43. data/spec/binding/config_result_spec.rb +377 -0
  44. data/spec/binding/config_spec.rb +419 -0
  45. data/spec/binding/config_validation_spec.rb +377 -0
  46. data/spec/binding/embeddings_spec.rb +816 -0
  47. data/spec/binding/error_handling_spec.rb +399 -0
  48. data/spec/binding/error_recovery_spec.rb +488 -0
  49. data/spec/binding/errors_spec.rb +66 -0
  50. data/spec/binding/font_config_spec.rb +220 -0
  51. data/spec/binding/images_spec.rb +732 -0
  52. data/spec/binding/keywords_extraction_spec.rb +600 -0
  53. data/spec/binding/metadata_types_spec.rb +1253 -0
  54. data/spec/binding/pages_extraction_spec.rb +550 -0
  55. data/spec/binding/plugins/ocr_backend_spec.rb +307 -0
  56. data/spec/binding/plugins/postprocessor_spec.rb +269 -0
  57. data/spec/binding/plugins/validator_spec.rb +273 -0
  58. data/spec/binding/tables_spec.rb +650 -0
  59. data/spec/fixtures/config.toml +38 -0
  60. data/spec/fixtures/config.yaml +41 -0
  61. data/spec/fixtures/invalid_config.toml +3 -0
  62. data/spec/serialization_spec.rb +134 -0
  63. data/spec/smoke/package_spec.rb +177 -0
  64. data/spec/spec_helper.rb +40 -0
  65. data/spec/unit/config/chunking_config_spec.rb +213 -0
  66. data/spec/unit/config/embedding_config_spec.rb +343 -0
  67. data/spec/unit/config/extraction_config_spec.rb +434 -0
  68. data/spec/unit/config/font_config_spec.rb +285 -0
  69. data/spec/unit/config/hierarchy_config_spec.rb +314 -0
  70. data/spec/unit/config/image_extraction_config_spec.rb +209 -0
  71. data/spec/unit/config/image_preprocessing_config_spec.rb +230 -0
  72. data/spec/unit/config/keyword_config_spec.rb +229 -0
  73. data/spec/unit/config/language_detection_config_spec.rb +258 -0
  74. data/spec/unit/config/ocr_config_spec.rb +171 -0
  75. data/spec/unit/config/output_format_spec.rb +380 -0
  76. data/spec/unit/config/page_config_spec.rb +221 -0
  77. data/spec/unit/config/pdf_config_spec.rb +267 -0
  78. data/spec/unit/config/postprocessor_config_spec.rb +290 -0
  79. data/spec/unit/config/tesseract_config_spec.rb +181 -0
  80. data/spec/unit/config/token_reduction_config_spec.rb +251 -0
  81. data/test/metadata_types_test.rb +959 -0
  82. metadata +292 -0
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Kreuzberg::Config::ImagePreprocessing do
4
+ describe '#initialize' do
5
+ it 'creates config with default values' do
6
+ config = described_class.new
7
+
8
+ expect(config.target_dpi).to eq 300
9
+ expect(config.auto_rotate).to be true
10
+ expect(config.deskew).to be true
11
+ expect(config.denoise).to be false
12
+ expect(config.contrast_enhance).to be true
13
+ expect(config.binarization_method).to eq 'otsu'
14
+ expect(config.invert_colors).to be false
15
+ end
16
+
17
+ it 'creates config with custom values' do
18
+ config = described_class.new(
19
+ target_dpi: 600,
20
+ auto_rotate: false,
21
+ deskew: false,
22
+ denoise: true,
23
+ contrast_enhance: false,
24
+ binarization_method: 'sauvola',
25
+ invert_colors: true
26
+ )
27
+
28
+ expect(config.target_dpi).to eq 600
29
+ expect(config.auto_rotate).to be false
30
+ expect(config.deskew).to be false
31
+ expect(config.denoise).to be true
32
+ expect(config.contrast_enhance).to be false
33
+ expect(config.binarization_method).to eq 'sauvola'
34
+ expect(config.invert_colors).to be true
35
+ end
36
+
37
+ it 'converts target_dpi to integer' do
38
+ config = described_class.new(target_dpi: '300')
39
+
40
+ expect(config.target_dpi).to eq 300
41
+ expect(config.target_dpi).to be_a Integer
42
+ end
43
+
44
+ it 'converts binarization_method to string' do
45
+ config = described_class.new(binarization_method: :niblack)
46
+
47
+ expect(config.binarization_method).to eq 'niblack'
48
+ expect(config.binarization_method).to be_a String
49
+ end
50
+ end
51
+
52
+ describe '#to_h' do
53
+ it 'serializes to hash with all values' do
54
+ config = described_class.new(target_dpi: 300, denoise: true)
55
+ hash = config.to_h
56
+
57
+ expect(hash).to be_a Hash
58
+ expect(hash[:target_dpi]).to eq 300
59
+ expect(hash[:denoise]).to be true
60
+ expect(hash[:auto_rotate]).to be true
61
+ expect(hash[:binarization_method]).to eq 'otsu'
62
+ end
63
+
64
+ it 'always includes all keys in hash' do
65
+ config = described_class.new
66
+ hash = config.to_h
67
+
68
+ expect(hash.keys).to contain_exactly(
69
+ :target_dpi,
70
+ :auto_rotate,
71
+ :deskew,
72
+ :denoise,
73
+ :contrast_enhance,
74
+ :binarization_method,
75
+ :invert_colors
76
+ )
77
+ end
78
+ end
79
+
80
+ describe 'validation' do
81
+ it 'rejects invalid binarization method' do
82
+ expect do
83
+ described_class.new(binarization_method: 'invalid_method')
84
+ end.to raise_error ArgumentError, /Invalid binarization_method/
85
+ end
86
+
87
+ it 'accepts all valid binarization methods' do
88
+ valid_methods = %w[otsu sauvola niblack wolf bradley adaptive]
89
+
90
+ valid_methods.each do |method|
91
+ expect do
92
+ described_class.new(binarization_method: method)
93
+ end.not_to raise_error
94
+ end
95
+ end
96
+
97
+ it 'accepts binarization method as symbol' do
98
+ expect do
99
+ described_class.new(binarization_method: :sauvola)
100
+ end.not_to raise_error
101
+ end
102
+ end
103
+
104
+ describe 'keyword arguments' do
105
+ it 'accepts all keyword arguments' do
106
+ config = described_class.new(
107
+ target_dpi: 600,
108
+ auto_rotate: true,
109
+ deskew: false,
110
+ denoise: true,
111
+ contrast_enhance: false,
112
+ binarization_method: 'bradley',
113
+ invert_colors: true
114
+ )
115
+
116
+ expect(config.target_dpi).to eq 600
117
+ expect(config.auto_rotate).to be true
118
+ expect(config.deskew).to be false
119
+ expect(config.denoise).to be true
120
+ expect(config.contrast_enhance).to be false
121
+ expect(config.binarization_method).to eq 'bradley'
122
+ expect(config.invert_colors).to be true
123
+ end
124
+ end
125
+
126
+ describe 'equality' do
127
+ it 'compares configs by value' do
128
+ config1 = described_class.new(
129
+ target_dpi: 300,
130
+ binarization_method: 'otsu',
131
+ denoise: true
132
+ )
133
+ config2 = described_class.new(
134
+ target_dpi: 300,
135
+ binarization_method: 'otsu',
136
+ denoise: true
137
+ )
138
+
139
+ expect(config1.target_dpi).to eq config2.target_dpi
140
+ expect(config1.binarization_method).to eq config2.binarization_method
141
+ expect(config1.denoise).to eq config2.denoise
142
+ end
143
+
144
+ it 'detects differences in target_dpi' do
145
+ config1 = described_class.new(target_dpi: 300)
146
+ config2 = described_class.new(target_dpi: 600)
147
+
148
+ expect(config1.target_dpi).not_to eq config2.target_dpi
149
+ end
150
+
151
+ it 'detects differences in binarization_method' do
152
+ config1 = described_class.new(binarization_method: 'otsu')
153
+ config2 = described_class.new(binarization_method: 'sauvola')
154
+
155
+ expect(config1.binarization_method).not_to eq config2.binarization_method
156
+ end
157
+ end
158
+
159
+ describe 'nested config integration' do
160
+ it 'can be nested in Tesseract config' do
161
+ preprocessing = described_class.new(denoise: true)
162
+ tesseract = Kreuzberg::Config::Tesseract.new(preprocessing: preprocessing)
163
+
164
+ expect(tesseract.options[:preprocessing]).to be_a described_class
165
+ expect(tesseract.options[:preprocessing].denoise).to be true
166
+ end
167
+ end
168
+
169
+ describe 'symbol vs string key handling' do
170
+ it 'converts symbol binarization method to string' do
171
+ config = described_class.new(binarization_method: :bradley)
172
+
173
+ expect(config.binarization_method).to eq 'bradley'
174
+ expect(config.binarization_method).to be_a String
175
+ end
176
+
177
+ it 'converts string target_dpi to integer' do
178
+ config = described_class.new(target_dpi: '600')
179
+
180
+ expect(config.target_dpi).to eq 600
181
+ expect(config.target_dpi).to be_a Integer
182
+ end
183
+ end
184
+
185
+ describe 'boolean conversion' do
186
+ it 'converts truthy values to boolean' do
187
+ config = described_class.new(
188
+ auto_rotate: 1,
189
+ deskew: 'yes',
190
+ denoise: true
191
+ )
192
+
193
+ expect(config.auto_rotate).to be true
194
+ expect(config.deskew).to be true
195
+ expect(config.denoise).to be true
196
+ end
197
+
198
+ it 'converts false values to boolean' do
199
+ config = described_class.new(
200
+ auto_rotate: false,
201
+ deskew: false,
202
+ denoise: false
203
+ )
204
+
205
+ expect(config.auto_rotate).to be false
206
+ expect(config.deskew).to be false
207
+ expect(config.denoise).to be false
208
+ end
209
+ end
210
+
211
+ describe 'DPI configuration' do
212
+ it 'accepts realistic DPI values' do
213
+ config = described_class.new(target_dpi: 300)
214
+
215
+ expect(config.target_dpi).to eq 300
216
+ end
217
+
218
+ it 'accepts high DPI values' do
219
+ config = described_class.new(target_dpi: 1200)
220
+
221
+ expect(config.target_dpi).to eq 1200
222
+ end
223
+
224
+ it 'accepts low DPI values' do
225
+ config = described_class.new(target_dpi: 72)
226
+
227
+ expect(config.target_dpi).to eq 72
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,229 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Kreuzberg::Config::Keywords do
4
+ describe '#initialize' do
5
+ it 'creates config with default values' do
6
+ config = described_class.new
7
+
8
+ expect(config.algorithm).to be_nil
9
+ expect(config.max_keywords).to be_nil
10
+ expect(config.min_score).to be_nil
11
+ expect(config.ngram_range).to be_nil
12
+ expect(config.language).to be_nil
13
+ expect(config.yake_params).to be_nil
14
+ expect(config.rake_params).to be_nil
15
+ end
16
+
17
+ it 'creates config with custom values' do
18
+ config = described_class.new(
19
+ algorithm: 'yake',
20
+ max_keywords: 10,
21
+ min_score: 0.5,
22
+ ngram_range: [1, 3],
23
+ language: 'en'
24
+ )
25
+
26
+ expect(config.algorithm).to eq 'yake'
27
+ expect(config.max_keywords).to eq 10
28
+ expect(config.min_score).to eq 0.5
29
+ expect(config.ngram_range).to eq [1, 3]
30
+ expect(config.language).to eq 'en'
31
+ end
32
+
33
+ it 'accepts yake_params as instance' do
34
+ yake_params = Kreuzberg::Config::KeywordYakeParams.new(window_size: 3)
35
+ config = described_class.new(yake_params: yake_params)
36
+
37
+ expect(config.yake_params).to be_a Kreuzberg::Config::KeywordYakeParams
38
+ expect(config.yake_params.window_size).to eq 3
39
+ end
40
+
41
+ it 'converts yake_params hash to instance' do
42
+ config = described_class.new(yake_params: { window_size: 2 })
43
+
44
+ expect(config.yake_params).to be_a Kreuzberg::Config::KeywordYakeParams
45
+ expect(config.yake_params.window_size).to eq 2
46
+ end
47
+
48
+ it 'accepts rake_params as instance' do
49
+ rake_params = Kreuzberg::Config::KeywordRakeParams.new(min_word_length: 3)
50
+ config = described_class.new(rake_params: rake_params)
51
+
52
+ expect(config.rake_params).to be_a Kreuzberg::Config::KeywordRakeParams
53
+ end
54
+
55
+ it 'converts rake_params hash to instance' do
56
+ config = described_class.new(rake_params: { min_word_length: 2 })
57
+
58
+ expect(config.rake_params).to be_a Kreuzberg::Config::KeywordRakeParams
59
+ expect(config.rake_params.min_word_length).to eq 2
60
+ end
61
+ end
62
+
63
+ describe '#to_h' do
64
+ it 'serializes to hash' do
65
+ config = described_class.new(algorithm: 'yake', max_keywords: 10)
66
+ hash = config.to_h
67
+
68
+ expect(hash).to be_a Hash
69
+ expect(hash[:algorithm]).to eq 'yake'
70
+ expect(hash[:max_keywords]).to eq 10
71
+ end
72
+
73
+ it 'includes nested params in hash' do
74
+ config = described_class.new(
75
+ algorithm: 'yake',
76
+ yake_params: { window_size: 3 }
77
+ )
78
+ hash = config.to_h
79
+
80
+ expect(hash[:yake_params]).to be_a Hash
81
+ expect(hash[:yake_params][:window_size]).to eq 3
82
+ end
83
+
84
+ it 'compacts nil values from hash' do
85
+ config = described_class.new(algorithm: 'rake')
86
+ hash = config.to_h
87
+
88
+ expect(hash.key?(:max_keywords)).to be false
89
+ expect(hash.key?(:yake_params)).to be false
90
+ end
91
+ end
92
+
93
+ describe 'validation' do
94
+ it 'accepts valid algorithm names' do
95
+ expect do
96
+ described_class.new(algorithm: 'yake')
97
+ end.not_to raise_error
98
+ end
99
+
100
+ it 'accepts valid max_keywords' do
101
+ expect do
102
+ described_class.new(max_keywords: 20)
103
+ end.not_to raise_error
104
+ end
105
+
106
+ it 'raises error for invalid yake_params type' do
107
+ expect do
108
+ described_class.new(yake_params: 'invalid')
109
+ end.to raise_error ArgumentError, /Expected.*KeywordYakeParams.*Hash.*nil/
110
+ end
111
+
112
+ it 'raises error for invalid rake_params type' do
113
+ expect do
114
+ described_class.new(rake_params: 'invalid')
115
+ end.to raise_error ArgumentError, /Expected.*KeywordRakeParams.*Hash.*nil/
116
+ end
117
+ end
118
+
119
+ describe 'keyword arguments' do
120
+ it 'accepts all keyword arguments' do
121
+ config = described_class.new(
122
+ algorithm: 'yake',
123
+ max_keywords: 15,
124
+ min_score: 0.7,
125
+ ngram_range: [1, 2],
126
+ language: 'fr',
127
+ yake_params: { window_size: 3 }
128
+ )
129
+
130
+ expect(config.algorithm).to eq 'yake'
131
+ expect(config.max_keywords).to eq 15
132
+ expect(config.min_score).to eq 0.7
133
+ expect(config.ngram_range).to eq [1, 2]
134
+ expect(config.language).to eq 'fr'
135
+ expect(config.yake_params).to be_a Kreuzberg::Config::KeywordYakeParams
136
+ end
137
+ end
138
+
139
+ describe 'equality' do
140
+ it 'compares configs by value' do
141
+ config1 = described_class.new(algorithm: 'yake', max_keywords: 10)
142
+ config2 = described_class.new(algorithm: 'yake', max_keywords: 10)
143
+
144
+ expect(config1.algorithm).to eq config2.algorithm
145
+ expect(config1.max_keywords).to eq config2.max_keywords
146
+ end
147
+
148
+ it 'detects differences in algorithm' do
149
+ config1 = described_class.new(algorithm: 'yake')
150
+ config2 = described_class.new(algorithm: 'rake')
151
+
152
+ expect(config1.algorithm).not_to eq config2.algorithm
153
+ end
154
+
155
+ it 'detects differences in max_keywords' do
156
+ config1 = described_class.new(max_keywords: 10)
157
+ config2 = described_class.new(max_keywords: 20)
158
+
159
+ expect(config1.max_keywords).not_to eq config2.max_keywords
160
+ end
161
+ end
162
+
163
+ describe 'nested config integration' do
164
+ it 'can be nested in Extraction config' do
165
+ keywords = described_class.new(algorithm: 'yake', max_keywords: 15)
166
+ extraction = Kreuzberg::Config::Extraction.new(keywords: keywords)
167
+
168
+ expect(extraction.keywords).to be_a described_class
169
+ expect(extraction.keywords.algorithm).to eq 'yake'
170
+ expect(extraction.keywords.max_keywords).to eq 15
171
+ end
172
+
173
+ it 'accepts hash in Extraction config' do
174
+ extraction = Kreuzberg::Config::Extraction.new(
175
+ keywords: { algorithm: 'rake', max_keywords: 10 }
176
+ )
177
+
178
+ expect(extraction.keywords).to be_a described_class
179
+ expect(extraction.keywords.algorithm).to eq 'rake'
180
+ expect(extraction.keywords.max_keywords).to eq 10
181
+ end
182
+ end
183
+
184
+ describe 'symbol vs string key handling' do
185
+ it 'converts symbol algorithm to string' do
186
+ config = described_class.new(algorithm: :yake)
187
+
188
+ expect(config.algorithm).to eq 'yake'
189
+ expect(config.algorithm).to be_a String
190
+ end
191
+
192
+ it 'converts symbol language to string' do
193
+ config = described_class.new(language: :eng)
194
+
195
+ expect(config.language).to eq 'eng'
196
+ expect(config.language).to be_a String
197
+ end
198
+
199
+ it 'converts ngram_range values to integers' do
200
+ config = described_class.new(ngram_range: %w[1 3])
201
+
202
+ expect(config.ngram_range).to eq [1, 3]
203
+ expect(config.ngram_range.all?(Integer)).to be true
204
+ end
205
+ end
206
+
207
+ describe 'parameter conversions' do
208
+ it 'converts max_keywords to integer' do
209
+ config = described_class.new(max_keywords: '20')
210
+
211
+ expect(config.max_keywords).to eq 20
212
+ expect(config.max_keywords).to be_a Integer
213
+ end
214
+
215
+ it 'converts min_score to float' do
216
+ config = described_class.new(min_score: '0.75')
217
+
218
+ expect(config.min_score).to eq 0.75
219
+ expect(config.min_score).to be_a Float
220
+ end
221
+
222
+ it 'converts ngram_range to array of integers' do
223
+ config = described_class.new(ngram_range: [1, 2])
224
+
225
+ expect(config.ngram_range).to eq [1, 2]
226
+ expect(config.ngram_range.all?(Integer)).to be true
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,258 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Kreuzberg::Config::LanguageDetection do
4
+ describe '#initialize' do
5
+ it 'creates config with default values' do
6
+ config = described_class.new
7
+
8
+ expect(config.enabled).to be false
9
+ expect(config.min_confidence).to eq 0.5
10
+ expect(config.detect_multiple).to be false
11
+ end
12
+
13
+ it 'creates config with custom values' do
14
+ config = described_class.new(
15
+ enabled: true,
16
+ min_confidence: 0.9,
17
+ detect_multiple: true
18
+ )
19
+
20
+ expect(config.enabled).to be true
21
+ expect(config.min_confidence).to eq 0.9
22
+ expect(config.detect_multiple).to be true
23
+ end
24
+
25
+ it 'converts enabled to boolean' do
26
+ config = described_class.new(enabled: 1)
27
+
28
+ expect(config.enabled).to be true
29
+ expect(config.enabled).to be_a TrueClass
30
+ end
31
+
32
+ it 'converts min_confidence to float' do
33
+ config = described_class.new(min_confidence: '0.75')
34
+
35
+ expect(config.min_confidence).to eq 0.75
36
+ expect(config.min_confidence).to be_a Float
37
+ end
38
+
39
+ it 'converts detect_multiple to boolean' do
40
+ config = described_class.new(detect_multiple: 'yes')
41
+
42
+ expect(config.detect_multiple).to be true
43
+ end
44
+ end
45
+
46
+ describe '#to_h' do
47
+ it 'serializes to hash with all values' do
48
+ config = described_class.new(
49
+ enabled: true,
50
+ min_confidence: 0.8,
51
+ detect_multiple: true
52
+ )
53
+ hash = config.to_h
54
+
55
+ expect(hash).to be_a Hash
56
+ expect(hash[:enabled]).to be true
57
+ expect(hash[:min_confidence]).to eq 0.8
58
+ expect(hash[:detect_multiple]).to be true
59
+ end
60
+
61
+ it 'always includes all keys in hash' do
62
+ config = described_class.new
63
+ hash = config.to_h
64
+
65
+ expect(hash.keys).to contain_exactly(
66
+ :enabled,
67
+ :min_confidence,
68
+ :detect_multiple
69
+ )
70
+ end
71
+ end
72
+
73
+ describe 'validation' do
74
+ it 'accepts confidence value of 0.5' do
75
+ expect do
76
+ described_class.new(min_confidence: 0.5)
77
+ end.not_to raise_error
78
+ end
79
+
80
+ it 'accepts confidence value of 0.0' do
81
+ expect do
82
+ described_class.new(min_confidence: 0.0)
83
+ end.not_to raise_error
84
+ end
85
+
86
+ it 'accepts confidence value of 1.0' do
87
+ expect do
88
+ described_class.new(min_confidence: 1.0)
89
+ end.not_to raise_error
90
+ end
91
+
92
+ it 'accepts boolean enabled' do
93
+ expect do
94
+ described_class.new(enabled: true)
95
+ end.not_to raise_error
96
+ end
97
+ end
98
+
99
+ describe 'keyword arguments' do
100
+ it 'accepts all keyword arguments' do
101
+ config = described_class.new(
102
+ enabled: true,
103
+ min_confidence: 0.85,
104
+ detect_multiple: true
105
+ )
106
+
107
+ expect(config.enabled).to be true
108
+ expect(config.min_confidence).to eq 0.85
109
+ expect(config.detect_multiple).to be true
110
+ end
111
+ end
112
+
113
+ describe 'equality' do
114
+ it 'compares configs by value' do
115
+ config1 = described_class.new(
116
+ enabled: true,
117
+ min_confidence: 0.8
118
+ )
119
+ config2 = described_class.new(
120
+ enabled: true,
121
+ min_confidence: 0.8
122
+ )
123
+
124
+ expect(config1.enabled).to eq config2.enabled
125
+ expect(config1.min_confidence).to eq config2.min_confidence
126
+ end
127
+
128
+ it 'detects differences in enabled' do
129
+ config1 = described_class.new(enabled: true)
130
+ config2 = described_class.new(enabled: false)
131
+
132
+ expect(config1.enabled).not_to eq config2.enabled
133
+ end
134
+
135
+ it 'detects differences in min_confidence' do
136
+ config1 = described_class.new(min_confidence: 0.5)
137
+ config2 = described_class.new(min_confidence: 0.9)
138
+
139
+ expect(config1.min_confidence).not_to eq config2.min_confidence
140
+ end
141
+
142
+ it 'detects differences in detect_multiple' do
143
+ config1 = described_class.new(detect_multiple: true)
144
+ config2 = described_class.new(detect_multiple: false)
145
+
146
+ expect(config1.detect_multiple).not_to eq config2.detect_multiple
147
+ end
148
+ end
149
+
150
+ describe 'nested config integration' do
151
+ it 'can be nested in Extraction config' do
152
+ lang_detect = described_class.new(enabled: true, min_confidence: 0.9)
153
+ extraction = Kreuzberg::Config::Extraction.new(language_detection: lang_detect)
154
+
155
+ expect(extraction.language_detection).to be_a described_class
156
+ expect(extraction.language_detection.enabled).to be true
157
+ expect(extraction.language_detection.min_confidence).to eq 0.9
158
+ end
159
+
160
+ it 'accepts hash in Extraction config' do
161
+ extraction = Kreuzberg::Config::Extraction.new(
162
+ language_detection: { enabled: true, min_confidence: 0.75 }
163
+ )
164
+
165
+ expect(extraction.language_detection).to be_a described_class
166
+ expect(extraction.language_detection.enabled).to be true
167
+ expect(extraction.language_detection.min_confidence).to eq 0.75
168
+ end
169
+ end
170
+
171
+ describe 'symbol vs string key handling' do
172
+ it 'accepts symbol and string enabled values' do
173
+ config = described_class.new(enabled: true)
174
+
175
+ expect(config.enabled).to be true
176
+ end
177
+
178
+ it 'converts min_confidence string to float' do
179
+ config = described_class.new(min_confidence: '0.95')
180
+
181
+ expect(config.min_confidence).to eq 0.95
182
+ expect(config.min_confidence).to be_a Float
183
+ end
184
+ end
185
+
186
+ describe 'boolean conversion' do
187
+ it 'converts truthy enabled to true' do
188
+ config = described_class.new(enabled: 1)
189
+
190
+ expect(config.enabled).to be true
191
+ end
192
+
193
+ it 'converts false enabled to false' do
194
+ config = described_class.new(enabled: false)
195
+
196
+ expect(config.enabled).to be false
197
+ end
198
+
199
+ it 'converts truthy detect_multiple to true' do
200
+ config = described_class.new(detect_multiple: 'yes')
201
+
202
+ expect(config.detect_multiple).to be true
203
+ end
204
+
205
+ it 'converts false detect_multiple to false' do
206
+ config = described_class.new(detect_multiple: false)
207
+
208
+ expect(config.detect_multiple).to be false
209
+ end
210
+ end
211
+
212
+ describe 'confidence range' do
213
+ it 'accepts minimum confidence value' do
214
+ config = described_class.new(min_confidence: 0.0)
215
+
216
+ expect(config.min_confidence).to eq 0.0
217
+ end
218
+
219
+ it 'accepts maximum confidence value' do
220
+ config = described_class.new(min_confidence: 1.0)
221
+
222
+ expect(config.min_confidence).to eq 1.0
223
+ end
224
+
225
+ it 'accepts mid-range confidence value' do
226
+ config = described_class.new(min_confidence: 0.6)
227
+
228
+ expect(config.min_confidence).to eq 0.6
229
+ end
230
+
231
+ it 'preserves high precision confidence values' do
232
+ config = described_class.new(min_confidence: 0.123456)
233
+
234
+ expect(config.min_confidence).to be_within(0.00001).of(0.123456)
235
+ end
236
+ end
237
+
238
+ describe 'multiple language detection' do
239
+ it 'allows enabling multiple language detection' do
240
+ config = described_class.new(detect_multiple: true)
241
+
242
+ expect(config.detect_multiple).to be true
243
+ end
244
+
245
+ it 'defaults to single language detection' do
246
+ config = described_class.new
247
+
248
+ expect(config.detect_multiple).to be false
249
+ end
250
+
251
+ it 'can be disabled when enabled is true' do
252
+ config = described_class.new(enabled: true, detect_multiple: false)
253
+
254
+ expect(config.enabled).to be true
255
+ expect(config.detect_multiple).to be false
256
+ end
257
+ end
258
+ end