kreuzberg 4.3.5-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.rubocop.yaml +1 -0
- data/.rubocop.yml +543 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +260 -0
- data/README.md +399 -0
- data/Rakefile +34 -0
- data/Steepfile +51 -0
- data/examples/async_patterns.rb +283 -0
- data/extconf.rb +60 -0
- data/kreuzberg.gemspec +253 -0
- data/lib/kreuzberg/api_proxy.rb +125 -0
- data/lib/kreuzberg/cache_api.rb +67 -0
- data/lib/kreuzberg/cli.rb +57 -0
- data/lib/kreuzberg/cli_proxy.rb +118 -0
- data/lib/kreuzberg/config.rb +1241 -0
- data/lib/kreuzberg/djot_content.rb +225 -0
- data/lib/kreuzberg/document_structure.rb +204 -0
- data/lib/kreuzberg/error_context.rb +136 -0
- data/lib/kreuzberg/errors.rb +116 -0
- data/lib/kreuzberg/extraction_api.rb +329 -0
- data/lib/kreuzberg/mcp_proxy.rb +176 -0
- data/lib/kreuzberg/ocr_backend_protocol.rb +40 -0
- data/lib/kreuzberg/post_processor_protocol.rb +15 -0
- data/lib/kreuzberg/result.rb +712 -0
- data/lib/kreuzberg/setup_lib_path.rb +99 -0
- data/lib/kreuzberg/types.rb +414 -0
- data/lib/kreuzberg/validator_protocol.rb +16 -0
- data/lib/kreuzberg/version.rb +5 -0
- data/lib/kreuzberg.rb +102 -0
- data/lib/kreuzberg_rb.so +0 -0
- data/lib/libpdfium.so +0 -0
- data/sig/kreuzberg/internal.rbs +184 -0
- data/sig/kreuzberg.rbs +1337 -0
- data/spec/binding/async_operations_spec.rb +473 -0
- data/spec/binding/batch_operations_spec.rb +677 -0
- data/spec/binding/batch_spec.rb +360 -0
- data/spec/binding/cache_spec.rb +227 -0
- data/spec/binding/cli_proxy_spec.rb +85 -0
- data/spec/binding/cli_spec.rb +55 -0
- data/spec/binding/config_result_spec.rb +377 -0
- data/spec/binding/config_spec.rb +419 -0
- data/spec/binding/config_validation_spec.rb +377 -0
- data/spec/binding/embeddings_spec.rb +816 -0
- data/spec/binding/error_handling_spec.rb +399 -0
- data/spec/binding/error_recovery_spec.rb +488 -0
- data/spec/binding/errors_spec.rb +66 -0
- data/spec/binding/font_config_spec.rb +220 -0
- data/spec/binding/images_spec.rb +732 -0
- data/spec/binding/keywords_extraction_spec.rb +600 -0
- data/spec/binding/metadata_types_spec.rb +1253 -0
- data/spec/binding/pages_extraction_spec.rb +550 -0
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -0
- data/spec/binding/plugins/postprocessor_spec.rb +269 -0
- data/spec/binding/plugins/validator_spec.rb +273 -0
- data/spec/binding/tables_spec.rb +650 -0
- data/spec/fixtures/config.toml +38 -0
- data/spec/fixtures/config.yaml +41 -0
- data/spec/fixtures/invalid_config.toml +3 -0
- data/spec/serialization_spec.rb +134 -0
- data/spec/smoke/package_spec.rb +177 -0
- data/spec/spec_helper.rb +40 -0
- data/spec/unit/config/chunking_config_spec.rb +213 -0
- data/spec/unit/config/embedding_config_spec.rb +343 -0
- data/spec/unit/config/extraction_config_spec.rb +434 -0
- data/spec/unit/config/font_config_spec.rb +285 -0
- data/spec/unit/config/hierarchy_config_spec.rb +314 -0
- data/spec/unit/config/image_extraction_config_spec.rb +209 -0
- data/spec/unit/config/image_preprocessing_config_spec.rb +230 -0
- data/spec/unit/config/keyword_config_spec.rb +229 -0
- data/spec/unit/config/language_detection_config_spec.rb +258 -0
- data/spec/unit/config/ocr_config_spec.rb +171 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/spec/unit/config/page_config_spec.rb +221 -0
- data/spec/unit/config/pdf_config_spec.rb +267 -0
- data/spec/unit/config/postprocessor_config_spec.rb +290 -0
- data/spec/unit/config/tesseract_config_spec.rb +181 -0
- data/spec/unit/config/token_reduction_config_spec.rb +251 -0
- data/test/metadata_types_test.rb +959 -0
- metadata +292 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Kreuzberg::Config::PageConfig do
|
|
4
|
+
describe '#initialize' do
|
|
5
|
+
it 'creates config with default values' do
|
|
6
|
+
config = described_class.new
|
|
7
|
+
|
|
8
|
+
expect(config.extract_pages).to be false
|
|
9
|
+
expect(config.insert_page_markers).to be false
|
|
10
|
+
expect(config.marker_format).to eq "\n\n<!-- PAGE {page_num} -->\n\n"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it 'creates config with custom values' do
|
|
14
|
+
config = described_class.new(
|
|
15
|
+
extract_pages: true,
|
|
16
|
+
insert_page_markers: true,
|
|
17
|
+
marker_format: '--- PAGE {page_num} ---'
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
expect(config.extract_pages).to be true
|
|
21
|
+
expect(config.insert_page_markers).to be true
|
|
22
|
+
expect(config.marker_format).to eq '--- PAGE {page_num} ---'
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'converts boolean values' do
|
|
26
|
+
config = described_class.new(
|
|
27
|
+
extract_pages: true,
|
|
28
|
+
insert_page_markers: false
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
expect(config.extract_pages).to be true
|
|
32
|
+
expect(config.insert_page_markers).to be false
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'converts marker_format to string' do
|
|
36
|
+
config = described_class.new(marker_format: :default)
|
|
37
|
+
|
|
38
|
+
expect(config.marker_format).to be_a String
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
describe '#to_h' do
|
|
43
|
+
it 'serializes to hash with all values' do
|
|
44
|
+
config = described_class.new(extract_pages: true)
|
|
45
|
+
hash = config.to_h
|
|
46
|
+
|
|
47
|
+
expect(hash).to be_a Hash
|
|
48
|
+
expect(hash[:extract_pages]).to be true
|
|
49
|
+
expect(hash[:insert_page_markers]).to be false
|
|
50
|
+
expect(hash[:marker_format]).to eq "\n\n<!-- PAGE {page_num} -->\n\n"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it 'always includes all keys in hash' do
|
|
54
|
+
config = described_class.new
|
|
55
|
+
hash = config.to_h
|
|
56
|
+
|
|
57
|
+
expect(hash.keys).to contain_exactly(
|
|
58
|
+
:extract_pages,
|
|
59
|
+
:insert_page_markers,
|
|
60
|
+
:marker_format
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
describe 'validation' do
|
|
66
|
+
it 'accepts boolean extract_pages' do
|
|
67
|
+
expect do
|
|
68
|
+
described_class.new(extract_pages: true)
|
|
69
|
+
end.not_to raise_error
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it 'accepts boolean insert_page_markers' do
|
|
73
|
+
expect do
|
|
74
|
+
described_class.new(insert_page_markers: true)
|
|
75
|
+
end.not_to raise_error
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it 'accepts custom marker formats' do
|
|
79
|
+
expect do
|
|
80
|
+
described_class.new(marker_format: '===== PAGE {page_num} =====')
|
|
81
|
+
end.not_to raise_error
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
describe 'keyword arguments' do
|
|
86
|
+
it 'accepts all keyword arguments' do
|
|
87
|
+
config = described_class.new(
|
|
88
|
+
extract_pages: true,
|
|
89
|
+
insert_page_markers: true,
|
|
90
|
+
marker_format: 'Page: {page_num}'
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
expect(config.extract_pages).to be true
|
|
94
|
+
expect(config.insert_page_markers).to be true
|
|
95
|
+
expect(config.marker_format).to eq 'Page: {page_num}'
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
describe 'equality' do
|
|
100
|
+
it 'compares configs by value' do
|
|
101
|
+
config1 = described_class.new(
|
|
102
|
+
extract_pages: true,
|
|
103
|
+
insert_page_markers: true,
|
|
104
|
+
marker_format: '--- PAGE {page_num} ---'
|
|
105
|
+
)
|
|
106
|
+
config2 = described_class.new(
|
|
107
|
+
extract_pages: true,
|
|
108
|
+
insert_page_markers: true,
|
|
109
|
+
marker_format: '--- PAGE {page_num} ---'
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
expect(config1.extract_pages).to eq config2.extract_pages
|
|
113
|
+
expect(config1.insert_page_markers).to eq config2.insert_page_markers
|
|
114
|
+
expect(config1.marker_format).to eq config2.marker_format
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it 'detects differences in extract_pages' do
|
|
118
|
+
config1 = described_class.new(extract_pages: true)
|
|
119
|
+
config2 = described_class.new(extract_pages: false)
|
|
120
|
+
|
|
121
|
+
expect(config1.extract_pages).not_to eq config2.extract_pages
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it 'detects differences in marker_format' do
|
|
125
|
+
config1 = described_class.new(marker_format: 'Format A')
|
|
126
|
+
config2 = described_class.new(marker_format: 'Format B')
|
|
127
|
+
|
|
128
|
+
expect(config1.marker_format).not_to eq config2.marker_format
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
describe 'nested config integration' do
|
|
133
|
+
it 'can be nested in Extraction config' do
|
|
134
|
+
pages = described_class.new(extract_pages: true)
|
|
135
|
+
extraction = Kreuzberg::Config::Extraction.new(pages: pages)
|
|
136
|
+
|
|
137
|
+
expect(extraction.pages).to be_a described_class
|
|
138
|
+
expect(extraction.pages.extract_pages).to be true
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
it 'accepts hash in Extraction config' do
|
|
142
|
+
extraction = Kreuzberg::Config::Extraction.new(
|
|
143
|
+
pages: { extract_pages: true, insert_page_markers: true }
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
expect(extraction.pages).to be_a described_class
|
|
147
|
+
expect(extraction.pages.extract_pages).to be true
|
|
148
|
+
expect(extraction.pages.insert_page_markers).to be true
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
describe 'marker format' do
|
|
153
|
+
it 'preserves custom marker format' do
|
|
154
|
+
format = '=== PAGE {page_num} ==='
|
|
155
|
+
config = described_class.new(marker_format: format)
|
|
156
|
+
|
|
157
|
+
expect(config.marker_format).to eq format
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
it 'preserves default marker format' do
|
|
161
|
+
config = described_class.new
|
|
162
|
+
|
|
163
|
+
expect(config.marker_format).to include '{page_num}'
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
it 'allows empty marker format' do
|
|
167
|
+
config = described_class.new(marker_format: '')
|
|
168
|
+
|
|
169
|
+
expect(config.marker_format).to eq ''
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
it 'handles multiline marker formats' do
|
|
173
|
+
format = "\n--- PAGE {page_num} ---\n"
|
|
174
|
+
config = described_class.new(marker_format: format)
|
|
175
|
+
|
|
176
|
+
expect(config.marker_format).to eq format
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
describe 'symbol vs string key handling' do
|
|
181
|
+
it 'converts symbol values to strings' do
|
|
182
|
+
config = described_class.new(marker_format: :default_format)
|
|
183
|
+
|
|
184
|
+
expect(config.marker_format).to be_a String
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
it 'preserves string marker format' do
|
|
188
|
+
format = 'Custom Format'
|
|
189
|
+
config = described_class.new(marker_format: format)
|
|
190
|
+
|
|
191
|
+
expect(config.marker_format).to eq format
|
|
192
|
+
expect(config.marker_format).to be_a String
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
describe 'boolean conversion' do
|
|
197
|
+
it 'converts truthy extract_pages to true' do
|
|
198
|
+
config = described_class.new(extract_pages: 1)
|
|
199
|
+
|
|
200
|
+
expect(config.extract_pages).to be true
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
it 'converts false extract_pages to false' do
|
|
204
|
+
config = described_class.new(extract_pages: false)
|
|
205
|
+
|
|
206
|
+
expect(config.extract_pages).to be false
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
it 'converts truthy insert_page_markers to true' do
|
|
210
|
+
config = described_class.new(insert_page_markers: 'yes')
|
|
211
|
+
|
|
212
|
+
expect(config.insert_page_markers).to be true
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
it 'converts false insert_page_markers to false' do
|
|
216
|
+
config = described_class.new(insert_page_markers: false)
|
|
217
|
+
|
|
218
|
+
expect(config.insert_page_markers).to be false
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Kreuzberg::Config::PDF do
|
|
4
|
+
describe '#initialize' do
|
|
5
|
+
it 'creates config with default values' do
|
|
6
|
+
config = described_class.new
|
|
7
|
+
|
|
8
|
+
expect(config.extract_images).to be false
|
|
9
|
+
expect(config.passwords).to be_nil
|
|
10
|
+
expect(config.extract_metadata).to be true
|
|
11
|
+
expect(config.font_config).to be_nil
|
|
12
|
+
expect(config.hierarchy).to be_nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'creates config with custom values' do
|
|
16
|
+
config = described_class.new(
|
|
17
|
+
extract_images: true,
|
|
18
|
+
passwords: %w[secret backup],
|
|
19
|
+
extract_metadata: false
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
expect(config.extract_images).to be true
|
|
23
|
+
expect(config.passwords).to eq %w[secret backup]
|
|
24
|
+
expect(config.extract_metadata).to be false
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it 'accepts passwords as single string' do
|
|
28
|
+
config = described_class.new(passwords: 'secret')
|
|
29
|
+
|
|
30
|
+
expect(config.passwords).to eq ['secret']
|
|
31
|
+
expect(config.passwords).to be_a Array
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'accepts passwords as array of strings' do
|
|
35
|
+
config = described_class.new(passwords: %w[pwd1 pwd2 pwd3])
|
|
36
|
+
|
|
37
|
+
expect(config.passwords).to eq %w[pwd1 pwd2 pwd3]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it 'converts passwords to strings' do
|
|
41
|
+
config = described_class.new(passwords: [123, :symbol])
|
|
42
|
+
|
|
43
|
+
expect(config.passwords).to eq %w[123 symbol]
|
|
44
|
+
expect(config.passwords.all?(String)).to be true
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it 'accepts font_config as instance' do
|
|
48
|
+
font_config = Kreuzberg::Config::FontConfig.new(enabled: true)
|
|
49
|
+
config = described_class.new(font_config: font_config)
|
|
50
|
+
|
|
51
|
+
expect(config.font_config).to be_a Kreuzberg::Config::FontConfig
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it 'converts font_config hash to instance' do
|
|
55
|
+
config = described_class.new(font_config: { enabled: false })
|
|
56
|
+
|
|
57
|
+
expect(config.font_config).to be_a Kreuzberg::Config::FontConfig
|
|
58
|
+
expect(config.font_config.enabled).to be false
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it 'accepts hierarchy as instance' do
|
|
62
|
+
hierarchy = Kreuzberg::Config::Hierarchy.new(enabled: true)
|
|
63
|
+
config = described_class.new(hierarchy: hierarchy)
|
|
64
|
+
|
|
65
|
+
expect(config.hierarchy).to be_a Kreuzberg::Config::Hierarchy
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it 'converts hierarchy hash to instance' do
|
|
69
|
+
config = described_class.new(hierarchy: { enabled: true, k_clusters: 8 })
|
|
70
|
+
|
|
71
|
+
expect(config.hierarchy).to be_a Kreuzberg::Config::Hierarchy
|
|
72
|
+
expect(config.hierarchy.enabled).to be true
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
describe '#to_h' do
|
|
77
|
+
it 'serializes to hash with default values' do
|
|
78
|
+
config = described_class.new
|
|
79
|
+
hash = config.to_h
|
|
80
|
+
|
|
81
|
+
expect(hash).to be_a Hash
|
|
82
|
+
expect(hash[:extract_images]).to be false
|
|
83
|
+
expect(hash[:extract_metadata]).to be true
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it 'includes passwords in hash when present' do
|
|
87
|
+
config = described_class.new(passwords: %w[secret backup])
|
|
88
|
+
hash = config.to_h
|
|
89
|
+
|
|
90
|
+
expect(hash[:passwords]).to eq %w[secret backup]
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it 'includes font_config in hash when present' do
|
|
94
|
+
config = described_class.new(font_config: { enabled: true })
|
|
95
|
+
hash = config.to_h
|
|
96
|
+
|
|
97
|
+
expect(hash[:font_config]).to be_a Hash
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
it 'includes hierarchy in hash when present' do
|
|
101
|
+
config = described_class.new(hierarchy: { enabled: true })
|
|
102
|
+
hash = config.to_h
|
|
103
|
+
|
|
104
|
+
expect(hash[:hierarchy]).to be_a Hash
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
it 'compacts nil values from hash' do
|
|
108
|
+
config = described_class.new(extract_images: true)
|
|
109
|
+
hash = config.to_h
|
|
110
|
+
|
|
111
|
+
expect(hash.key?(:passwords)).to be false
|
|
112
|
+
expect(hash.key?(:font_config)).to be false
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
describe 'validation' do
|
|
117
|
+
it 'rejects invalid font_config type' do
|
|
118
|
+
expect do
|
|
119
|
+
described_class.new(font_config: 'invalid')
|
|
120
|
+
end.to raise_error ArgumentError, /Expected.*FontConfig.*Hash.*nil/
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
it 'rejects invalid hierarchy type' do
|
|
124
|
+
expect do
|
|
125
|
+
described_class.new(hierarchy: 'invalid')
|
|
126
|
+
end.to raise_error ArgumentError, /Expected.*Hierarchy.*Hash.*nil/
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
it 'accepts valid boolean extract_images' do
|
|
130
|
+
expect do
|
|
131
|
+
described_class.new(extract_images: true)
|
|
132
|
+
end.not_to raise_error
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
describe 'keyword arguments' do
|
|
137
|
+
it 'accepts all keyword arguments' do
|
|
138
|
+
config = described_class.new(
|
|
139
|
+
extract_images: true,
|
|
140
|
+
passwords: %w[pwd1 pwd2],
|
|
141
|
+
extract_metadata: false,
|
|
142
|
+
font_config: { enabled: true },
|
|
143
|
+
hierarchy: { k_clusters: 10 }
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
expect(config.extract_images).to be true
|
|
147
|
+
expect(config.passwords).to eq %w[pwd1 pwd2]
|
|
148
|
+
expect(config.extract_metadata).to be false
|
|
149
|
+
expect(config.font_config).to be_a Kreuzberg::Config::FontConfig
|
|
150
|
+
expect(config.hierarchy).to be_a Kreuzberg::Config::Hierarchy
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
describe 'equality' do
|
|
155
|
+
it 'compares configs by value' do
|
|
156
|
+
config1 = described_class.new(
|
|
157
|
+
extract_images: true,
|
|
158
|
+
extract_metadata: false
|
|
159
|
+
)
|
|
160
|
+
config2 = described_class.new(
|
|
161
|
+
extract_images: true,
|
|
162
|
+
extract_metadata: false
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
expect(config1.extract_images).to eq config2.extract_images
|
|
166
|
+
expect(config1.extract_metadata).to eq config2.extract_metadata
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
it 'detects differences in extract_images' do
|
|
170
|
+
config1 = described_class.new(extract_images: true)
|
|
171
|
+
config2 = described_class.new(extract_images: false)
|
|
172
|
+
|
|
173
|
+
expect(config1.extract_images).not_to eq config2.extract_images
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
it 'detects differences in passwords' do
|
|
177
|
+
config1 = described_class.new(passwords: %w[pwd1])
|
|
178
|
+
config2 = described_class.new(passwords: %w[pwd2])
|
|
179
|
+
|
|
180
|
+
expect(config1.passwords).not_to eq config2.passwords
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
describe 'nested config integration' do
|
|
185
|
+
it 'can be nested in Extraction config' do
|
|
186
|
+
pdf = described_class.new(extract_images: true)
|
|
187
|
+
extraction = Kreuzberg::Config::Extraction.new(pdf_options: pdf)
|
|
188
|
+
|
|
189
|
+
expect(extraction.pdf_options).to be_a described_class
|
|
190
|
+
expect(extraction.pdf_options.extract_images).to be true
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
it 'accepts hash in Extraction config' do
|
|
194
|
+
extraction = Kreuzberg::Config::Extraction.new(
|
|
195
|
+
pdf_options: { extract_images: true, passwords: ['secret'] }
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
expect(extraction.pdf_options).to be_a described_class
|
|
199
|
+
expect(extraction.pdf_options.extract_images).to be true
|
|
200
|
+
expect(extraction.pdf_options.passwords).to eq ['secret']
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
describe 'font_config assignment' do
|
|
205
|
+
it 'allows setting font_config after initialization' do
|
|
206
|
+
config = described_class.new
|
|
207
|
+
font_config = Kreuzberg::Config::FontConfig.new(enabled: true)
|
|
208
|
+
config.font_config = font_config
|
|
209
|
+
|
|
210
|
+
expect(config.font_config).to be_a Kreuzberg::Config::FontConfig
|
|
211
|
+
expect(config.font_config.enabled).to be true
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
it 'converts hash to font_config instance on assignment' do
|
|
215
|
+
config = described_class.new
|
|
216
|
+
config.font_config = { enabled: false }
|
|
217
|
+
|
|
218
|
+
expect(config.font_config).to be_a Kreuzberg::Config::FontConfig
|
|
219
|
+
expect(config.font_config.enabled).to be false
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
describe 'hierarchy assignment' do
|
|
224
|
+
it 'allows setting hierarchy after initialization' do
|
|
225
|
+
config = described_class.new
|
|
226
|
+
hierarchy = Kreuzberg::Config::Hierarchy.new(enabled: true)
|
|
227
|
+
config.hierarchy = hierarchy
|
|
228
|
+
|
|
229
|
+
expect(config.hierarchy).to be_a Kreuzberg::Config::Hierarchy
|
|
230
|
+
expect(config.hierarchy.enabled).to be true
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
it 'converts hash to hierarchy instance on assignment' do
|
|
234
|
+
config = described_class.new
|
|
235
|
+
config.hierarchy = { enabled: true, k_clusters: 6 }
|
|
236
|
+
|
|
237
|
+
expect(config.hierarchy).to be_a Kreuzberg::Config::Hierarchy
|
|
238
|
+
expect(config.hierarchy.enabled).to be true
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
describe 'boolean conversion' do
|
|
243
|
+
it 'converts truthy extract_images to true' do
|
|
244
|
+
config = described_class.new(extract_images: 1)
|
|
245
|
+
|
|
246
|
+
expect(config.extract_images).to be true
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
it 'converts false extract_images to false' do
|
|
250
|
+
config = described_class.new(extract_images: false)
|
|
251
|
+
|
|
252
|
+
expect(config.extract_images).to be false
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
it 'converts truthy extract_metadata to true' do
|
|
256
|
+
config = described_class.new(extract_metadata: 'yes')
|
|
257
|
+
|
|
258
|
+
expect(config.extract_metadata).to be true
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
it 'converts false extract_metadata to false' do
|
|
262
|
+
config = described_class.new(extract_metadata: false)
|
|
263
|
+
|
|
264
|
+
expect(config.extract_metadata).to be false
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|