kreuzberg 4.3.5-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.rubocop.yaml +1 -0
- data/.rubocop.yml +543 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +260 -0
- data/README.md +399 -0
- data/Rakefile +34 -0
- data/Steepfile +51 -0
- data/examples/async_patterns.rb +283 -0
- data/extconf.rb +60 -0
- data/kreuzberg.gemspec +253 -0
- data/lib/kreuzberg/api_proxy.rb +125 -0
- data/lib/kreuzberg/cache_api.rb +67 -0
- data/lib/kreuzberg/cli.rb +57 -0
- data/lib/kreuzberg/cli_proxy.rb +118 -0
- data/lib/kreuzberg/config.rb +1241 -0
- data/lib/kreuzberg/djot_content.rb +225 -0
- data/lib/kreuzberg/document_structure.rb +204 -0
- data/lib/kreuzberg/error_context.rb +136 -0
- data/lib/kreuzberg/errors.rb +116 -0
- data/lib/kreuzberg/extraction_api.rb +329 -0
- data/lib/kreuzberg/mcp_proxy.rb +176 -0
- data/lib/kreuzberg/ocr_backend_protocol.rb +40 -0
- data/lib/kreuzberg/post_processor_protocol.rb +15 -0
- data/lib/kreuzberg/result.rb +712 -0
- data/lib/kreuzberg/setup_lib_path.rb +99 -0
- data/lib/kreuzberg/types.rb +414 -0
- data/lib/kreuzberg/validator_protocol.rb +16 -0
- data/lib/kreuzberg/version.rb +5 -0
- data/lib/kreuzberg.rb +102 -0
- data/lib/kreuzberg_rb.so +0 -0
- data/lib/libpdfium.so +0 -0
- data/sig/kreuzberg/internal.rbs +184 -0
- data/sig/kreuzberg.rbs +1337 -0
- data/spec/binding/async_operations_spec.rb +473 -0
- data/spec/binding/batch_operations_spec.rb +677 -0
- data/spec/binding/batch_spec.rb +360 -0
- data/spec/binding/cache_spec.rb +227 -0
- data/spec/binding/cli_proxy_spec.rb +85 -0
- data/spec/binding/cli_spec.rb +55 -0
- data/spec/binding/config_result_spec.rb +377 -0
- data/spec/binding/config_spec.rb +419 -0
- data/spec/binding/config_validation_spec.rb +377 -0
- data/spec/binding/embeddings_spec.rb +816 -0
- data/spec/binding/error_handling_spec.rb +399 -0
- data/spec/binding/error_recovery_spec.rb +488 -0
- data/spec/binding/errors_spec.rb +66 -0
- data/spec/binding/font_config_spec.rb +220 -0
- data/spec/binding/images_spec.rb +732 -0
- data/spec/binding/keywords_extraction_spec.rb +600 -0
- data/spec/binding/metadata_types_spec.rb +1253 -0
- data/spec/binding/pages_extraction_spec.rb +550 -0
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -0
- data/spec/binding/plugins/postprocessor_spec.rb +269 -0
- data/spec/binding/plugins/validator_spec.rb +273 -0
- data/spec/binding/tables_spec.rb +650 -0
- data/spec/fixtures/config.toml +38 -0
- data/spec/fixtures/config.yaml +41 -0
- data/spec/fixtures/invalid_config.toml +3 -0
- data/spec/serialization_spec.rb +134 -0
- data/spec/smoke/package_spec.rb +177 -0
- data/spec/spec_helper.rb +40 -0
- data/spec/unit/config/chunking_config_spec.rb +213 -0
- data/spec/unit/config/embedding_config_spec.rb +343 -0
- data/spec/unit/config/extraction_config_spec.rb +434 -0
- data/spec/unit/config/font_config_spec.rb +285 -0
- data/spec/unit/config/hierarchy_config_spec.rb +314 -0
- data/spec/unit/config/image_extraction_config_spec.rb +209 -0
- data/spec/unit/config/image_preprocessing_config_spec.rb +230 -0
- data/spec/unit/config/keyword_config_spec.rb +229 -0
- data/spec/unit/config/language_detection_config_spec.rb +258 -0
- data/spec/unit/config/ocr_config_spec.rb +171 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/spec/unit/config/page_config_spec.rb +221 -0
- data/spec/unit/config/pdf_config_spec.rb +267 -0
- data/spec/unit/config/postprocessor_config_spec.rb +290 -0
- data/spec/unit/config/tesseract_config_spec.rb +181 -0
- data/spec/unit/config/token_reduction_config_spec.rb +251 -0
- data/test/metadata_types_test.rb +959 -0
- metadata +292 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe Kreuzberg::Config::FontConfig do
|
|
4
|
+
describe 'initialization' do
|
|
5
|
+
it 'creates font config with defaults' do
|
|
6
|
+
config = described_class.new
|
|
7
|
+
|
|
8
|
+
expect(config.enabled).to be true
|
|
9
|
+
expect(config.custom_font_dirs).to be_nil
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it 'creates font config with custom values' do
|
|
13
|
+
config = described_class.new(
|
|
14
|
+
enabled: true,
|
|
15
|
+
custom_font_dirs: ['/usr/share/fonts/custom']
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
expect(config.enabled).to be true
|
|
19
|
+
expect(config.custom_font_dirs).to eq(['/usr/share/fonts/custom'])
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it 'creates font config with enabled=false' do
|
|
23
|
+
config = described_class.new(enabled: false)
|
|
24
|
+
|
|
25
|
+
expect(config.enabled).to be false
|
|
26
|
+
expect(config.custom_font_dirs).to be_nil
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it 'creates font config with custom directories' do
|
|
30
|
+
dirs = ['/usr/share/fonts/custom', '~/my-fonts']
|
|
31
|
+
config = described_class.new(custom_font_dirs: dirs)
|
|
32
|
+
|
|
33
|
+
expect(config.enabled).to be true
|
|
34
|
+
expect(config.custom_font_dirs).to eq(dirs)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it 'creates font config with all parameters' do
|
|
38
|
+
dirs = ['/path/to/fonts']
|
|
39
|
+
config = described_class.new(enabled: true, custom_font_dirs: dirs)
|
|
40
|
+
|
|
41
|
+
expect(config.enabled).to be true
|
|
42
|
+
expect(config.custom_font_dirs).to eq(dirs)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
describe 'attribute access' do
|
|
47
|
+
it 'allows setting enabled via attr_accessor' do
|
|
48
|
+
config = described_class.new
|
|
49
|
+
config.enabled = false
|
|
50
|
+
|
|
51
|
+
expect(config.enabled).to be false
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it 'allows setting custom_font_dirs via attr_accessor' do
|
|
55
|
+
config = described_class.new
|
|
56
|
+
dirs = ['/new/path']
|
|
57
|
+
config.custom_font_dirs = dirs
|
|
58
|
+
|
|
59
|
+
expect(config.custom_font_dirs).to eq(dirs)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it 'allows clearing custom_font_dirs' do
|
|
63
|
+
config = described_class.new(custom_font_dirs: ['/path1', '/path2'])
|
|
64
|
+
config.custom_font_dirs = nil
|
|
65
|
+
|
|
66
|
+
expect(config.custom_font_dirs).to be_nil
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
describe 'custom directories' do
|
|
71
|
+
it 'handles empty custom directories array' do
|
|
72
|
+
config = described_class.new(custom_font_dirs: [])
|
|
73
|
+
|
|
74
|
+
expect(config.custom_font_dirs).to eq([])
|
|
75
|
+
expect(config.custom_font_dirs.length).to eq(0)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it 'handles multiple custom directories' do
|
|
79
|
+
dirs = ['/path1', '/path2', '/path3', '~/fonts', './relative-fonts']
|
|
80
|
+
config = described_class.new(custom_font_dirs: dirs)
|
|
81
|
+
|
|
82
|
+
expect(config.custom_font_dirs).to eq(dirs)
|
|
83
|
+
expect(config.custom_font_dirs.length).to eq(5)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it 'preserves directory paths with tilde' do
|
|
87
|
+
dirs = ['~/my-fonts', '~/Documents/fonts']
|
|
88
|
+
config = described_class.new(custom_font_dirs: dirs)
|
|
89
|
+
|
|
90
|
+
expect(config.custom_font_dirs).to include('~/my-fonts')
|
|
91
|
+
expect(config.custom_font_dirs).to include('~/Documents/fonts')
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it 'preserves relative paths' do
|
|
95
|
+
dirs = ['./fonts', '../fonts']
|
|
96
|
+
config = described_class.new(custom_font_dirs: dirs)
|
|
97
|
+
|
|
98
|
+
expect(config.custom_font_dirs).to include('./fonts')
|
|
99
|
+
expect(config.custom_font_dirs).to include('../fonts')
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
describe 'conversion' do
|
|
104
|
+
it 'converts to hash' do
|
|
105
|
+
config = described_class.new(
|
|
106
|
+
enabled: true,
|
|
107
|
+
custom_font_dirs: ['/fonts']
|
|
108
|
+
)
|
|
109
|
+
hash = config.to_h
|
|
110
|
+
|
|
111
|
+
expect(hash).to be_a(Hash)
|
|
112
|
+
expect(hash[:enabled]).to be(true)
|
|
113
|
+
expect(hash[:custom_font_dirs]).to eq(['/fonts'])
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
it 'converts to hash with nil custom_dirs' do
|
|
117
|
+
config = described_class.new(enabled: false)
|
|
118
|
+
hash = config.to_h
|
|
119
|
+
|
|
120
|
+
expect(hash).to be_a(Hash)
|
|
121
|
+
expect(hash[:enabled]).to be(false)
|
|
122
|
+
expect(hash[:custom_font_dirs]).to be_nil
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
it 'converts to hash and includes all keys' do
|
|
126
|
+
config = described_class.new(
|
|
127
|
+
enabled: true,
|
|
128
|
+
custom_font_dirs: ['/fonts']
|
|
129
|
+
)
|
|
130
|
+
hash = config.to_h
|
|
131
|
+
|
|
132
|
+
expect(hash).to be_a(Hash)
|
|
133
|
+
expect(hash).to include(enabled: true)
|
|
134
|
+
expect(hash).to include(custom_font_dirs: ['/fonts'])
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
describe 'integration with PdfConfig' do
|
|
139
|
+
it 'integrates with PdfConfig' do
|
|
140
|
+
font_config = described_class.new(
|
|
141
|
+
enabled: true,
|
|
142
|
+
custom_font_dirs: ['/fonts']
|
|
143
|
+
)
|
|
144
|
+
pdf_config = Kreuzberg::Config::PDF.new(font_config: font_config)
|
|
145
|
+
|
|
146
|
+
expect(pdf_config.font_config).not_to be_nil
|
|
147
|
+
expect(pdf_config.font_config.enabled).to be true
|
|
148
|
+
expect(pdf_config.font_config.custom_font_dirs).to eq(['/fonts'])
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
it 'integrates with PdfConfig with all parameters' do
|
|
152
|
+
font_config = described_class.new(
|
|
153
|
+
enabled: true,
|
|
154
|
+
custom_font_dirs: ['/custom-fonts']
|
|
155
|
+
)
|
|
156
|
+
pdf_config = Kreuzberg::Config::PDF.new(
|
|
157
|
+
extract_images: true,
|
|
158
|
+
passwords: ['pass1'],
|
|
159
|
+
extract_metadata: true,
|
|
160
|
+
font_config: font_config
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
expect(pdf_config.extract_images).to be true
|
|
164
|
+
expect(pdf_config.passwords).to eq(['pass1'])
|
|
165
|
+
expect(pdf_config.extract_metadata).to be true
|
|
166
|
+
expect(pdf_config.font_config.enabled).to be true
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
it 'allows setting font_config via setter' do
|
|
170
|
+
pdf_config = Kreuzberg::Config::PDF.new
|
|
171
|
+
font_config = described_class.new(enabled: false)
|
|
172
|
+
|
|
173
|
+
pdf_config.font_config = font_config
|
|
174
|
+
|
|
175
|
+
expect(pdf_config.font_config).not_to be_nil
|
|
176
|
+
expect(pdf_config.font_config.enabled).to be false
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
it 'allows clearing font_config via setter' do
|
|
180
|
+
font_config = described_class.new(custom_font_dirs: ['/fonts'])
|
|
181
|
+
pdf_config = Kreuzberg::Config::PDF.new(font_config: font_config)
|
|
182
|
+
|
|
183
|
+
expect(pdf_config.font_config).not_to be_nil
|
|
184
|
+
|
|
185
|
+
pdf_config.font_config = nil
|
|
186
|
+
|
|
187
|
+
expect(pdf_config.font_config).to be_nil
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
describe 'edge cases' do
|
|
192
|
+
it 'handles disabled with custom directories' do
|
|
193
|
+
config = described_class.new(
|
|
194
|
+
enabled: false,
|
|
195
|
+
custom_font_dirs: ['/fonts']
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
expect(config.enabled).to be false
|
|
199
|
+
expect(config.custom_font_dirs).to eq(['/fonts'])
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
it 'handles multiple changes to enabled flag' do
|
|
203
|
+
config = described_class.new
|
|
204
|
+
config.enabled = false
|
|
205
|
+
config.enabled = true
|
|
206
|
+
config.enabled = false
|
|
207
|
+
|
|
208
|
+
expect(config.enabled).to be false
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
it 'handles multiple changes to custom directories' do
|
|
212
|
+
config = described_class.new
|
|
213
|
+
config.custom_font_dirs = ['/path1']
|
|
214
|
+
config.custom_font_dirs = ['/path1', '/path2']
|
|
215
|
+
config.custom_font_dirs = []
|
|
216
|
+
|
|
217
|
+
expect(config.custom_font_dirs).to eq([])
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|