kreuzberg 4.3.5-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yaml +1 -0
  5. data/.rubocop.yml +543 -0
  6. data/Gemfile +8 -0
  7. data/Gemfile.lock +260 -0
  8. data/README.md +399 -0
  9. data/Rakefile +34 -0
  10. data/Steepfile +51 -0
  11. data/examples/async_patterns.rb +283 -0
  12. data/extconf.rb +60 -0
  13. data/kreuzberg.gemspec +253 -0
  14. data/lib/kreuzberg/api_proxy.rb +125 -0
  15. data/lib/kreuzberg/cache_api.rb +67 -0
  16. data/lib/kreuzberg/cli.rb +57 -0
  17. data/lib/kreuzberg/cli_proxy.rb +118 -0
  18. data/lib/kreuzberg/config.rb +1241 -0
  19. data/lib/kreuzberg/djot_content.rb +225 -0
  20. data/lib/kreuzberg/document_structure.rb +204 -0
  21. data/lib/kreuzberg/error_context.rb +136 -0
  22. data/lib/kreuzberg/errors.rb +116 -0
  23. data/lib/kreuzberg/extraction_api.rb +329 -0
  24. data/lib/kreuzberg/mcp_proxy.rb +176 -0
  25. data/lib/kreuzberg/ocr_backend_protocol.rb +40 -0
  26. data/lib/kreuzberg/post_processor_protocol.rb +15 -0
  27. data/lib/kreuzberg/result.rb +712 -0
  28. data/lib/kreuzberg/setup_lib_path.rb +99 -0
  29. data/lib/kreuzberg/types.rb +414 -0
  30. data/lib/kreuzberg/validator_protocol.rb +16 -0
  31. data/lib/kreuzberg/version.rb +5 -0
  32. data/lib/kreuzberg.rb +102 -0
  33. data/lib/kreuzberg_rb.so +0 -0
  34. data/lib/libpdfium.so +0 -0
  35. data/sig/kreuzberg/internal.rbs +184 -0
  36. data/sig/kreuzberg.rbs +1337 -0
  37. data/spec/binding/async_operations_spec.rb +473 -0
  38. data/spec/binding/batch_operations_spec.rb +677 -0
  39. data/spec/binding/batch_spec.rb +360 -0
  40. data/spec/binding/cache_spec.rb +227 -0
  41. data/spec/binding/cli_proxy_spec.rb +85 -0
  42. data/spec/binding/cli_spec.rb +55 -0
  43. data/spec/binding/config_result_spec.rb +377 -0
  44. data/spec/binding/config_spec.rb +419 -0
  45. data/spec/binding/config_validation_spec.rb +377 -0
  46. data/spec/binding/embeddings_spec.rb +816 -0
  47. data/spec/binding/error_handling_spec.rb +399 -0
  48. data/spec/binding/error_recovery_spec.rb +488 -0
  49. data/spec/binding/errors_spec.rb +66 -0
  50. data/spec/binding/font_config_spec.rb +220 -0
  51. data/spec/binding/images_spec.rb +732 -0
  52. data/spec/binding/keywords_extraction_spec.rb +600 -0
  53. data/spec/binding/metadata_types_spec.rb +1253 -0
  54. data/spec/binding/pages_extraction_spec.rb +550 -0
  55. data/spec/binding/plugins/ocr_backend_spec.rb +307 -0
  56. data/spec/binding/plugins/postprocessor_spec.rb +269 -0
  57. data/spec/binding/plugins/validator_spec.rb +273 -0
  58. data/spec/binding/tables_spec.rb +650 -0
  59. data/spec/fixtures/config.toml +38 -0
  60. data/spec/fixtures/config.yaml +41 -0
  61. data/spec/fixtures/invalid_config.toml +3 -0
  62. data/spec/serialization_spec.rb +134 -0
  63. data/spec/smoke/package_spec.rb +177 -0
  64. data/spec/spec_helper.rb +40 -0
  65. data/spec/unit/config/chunking_config_spec.rb +213 -0
  66. data/spec/unit/config/embedding_config_spec.rb +343 -0
  67. data/spec/unit/config/extraction_config_spec.rb +434 -0
  68. data/spec/unit/config/font_config_spec.rb +285 -0
  69. data/spec/unit/config/hierarchy_config_spec.rb +314 -0
  70. data/spec/unit/config/image_extraction_config_spec.rb +209 -0
  71. data/spec/unit/config/image_preprocessing_config_spec.rb +230 -0
  72. data/spec/unit/config/keyword_config_spec.rb +229 -0
  73. data/spec/unit/config/language_detection_config_spec.rb +258 -0
  74. data/spec/unit/config/ocr_config_spec.rb +171 -0
  75. data/spec/unit/config/output_format_spec.rb +380 -0
  76. data/spec/unit/config/page_config_spec.rb +221 -0
  77. data/spec/unit/config/pdf_config_spec.rb +267 -0
  78. data/spec/unit/config/postprocessor_config_spec.rb +290 -0
  79. data/spec/unit/config/tesseract_config_spec.rb +181 -0
  80. data/spec/unit/config/token_reduction_config_spec.rb +251 -0
  81. data/test/metadata_types_test.rb +959 -0
  82. metadata +292 -0
@@ -0,0 +1,220 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Kreuzberg::Config::FontConfig do
4
+ describe 'initialization' do
5
+ it 'creates font config with defaults' do
6
+ config = described_class.new
7
+
8
+ expect(config.enabled).to be true
9
+ expect(config.custom_font_dirs).to be_nil
10
+ end
11
+
12
+ it 'creates font config with custom values' do
13
+ config = described_class.new(
14
+ enabled: true,
15
+ custom_font_dirs: ['/usr/share/fonts/custom']
16
+ )
17
+
18
+ expect(config.enabled).to be true
19
+ expect(config.custom_font_dirs).to eq(['/usr/share/fonts/custom'])
20
+ end
21
+
22
+ it 'creates font config with enabled=false' do
23
+ config = described_class.new(enabled: false)
24
+
25
+ expect(config.enabled).to be false
26
+ expect(config.custom_font_dirs).to be_nil
27
+ end
28
+
29
+ it 'creates font config with custom directories' do
30
+ dirs = ['/usr/share/fonts/custom', '~/my-fonts']
31
+ config = described_class.new(custom_font_dirs: dirs)
32
+
33
+ expect(config.enabled).to be true
34
+ expect(config.custom_font_dirs).to eq(dirs)
35
+ end
36
+
37
+ it 'creates font config with all parameters' do
38
+ dirs = ['/path/to/fonts']
39
+ config = described_class.new(enabled: true, custom_font_dirs: dirs)
40
+
41
+ expect(config.enabled).to be true
42
+ expect(config.custom_font_dirs).to eq(dirs)
43
+ end
44
+ end
45
+
46
+ describe 'attribute access' do
47
+ it 'allows setting enabled via attr_accessor' do
48
+ config = described_class.new
49
+ config.enabled = false
50
+
51
+ expect(config.enabled).to be false
52
+ end
53
+
54
+ it 'allows setting custom_font_dirs via attr_accessor' do
55
+ config = described_class.new
56
+ dirs = ['/new/path']
57
+ config.custom_font_dirs = dirs
58
+
59
+ expect(config.custom_font_dirs).to eq(dirs)
60
+ end
61
+
62
+ it 'allows clearing custom_font_dirs' do
63
+ config = described_class.new(custom_font_dirs: ['/path1', '/path2'])
64
+ config.custom_font_dirs = nil
65
+
66
+ expect(config.custom_font_dirs).to be_nil
67
+ end
68
+ end
69
+
70
+ describe 'custom directories' do
71
+ it 'handles empty custom directories array' do
72
+ config = described_class.new(custom_font_dirs: [])
73
+
74
+ expect(config.custom_font_dirs).to eq([])
75
+ expect(config.custom_font_dirs.length).to eq(0)
76
+ end
77
+
78
+ it 'handles multiple custom directories' do
79
+ dirs = ['/path1', '/path2', '/path3', '~/fonts', './relative-fonts']
80
+ config = described_class.new(custom_font_dirs: dirs)
81
+
82
+ expect(config.custom_font_dirs).to eq(dirs)
83
+ expect(config.custom_font_dirs.length).to eq(5)
84
+ end
85
+
86
+ it 'preserves directory paths with tilde' do
87
+ dirs = ['~/my-fonts', '~/Documents/fonts']
88
+ config = described_class.new(custom_font_dirs: dirs)
89
+
90
+ expect(config.custom_font_dirs).to include('~/my-fonts')
91
+ expect(config.custom_font_dirs).to include('~/Documents/fonts')
92
+ end
93
+
94
+ it 'preserves relative paths' do
95
+ dirs = ['./fonts', '../fonts']
96
+ config = described_class.new(custom_font_dirs: dirs)
97
+
98
+ expect(config.custom_font_dirs).to include('./fonts')
99
+ expect(config.custom_font_dirs).to include('../fonts')
100
+ end
101
+ end
102
+
103
+ describe 'conversion' do
104
+ it 'converts to hash' do
105
+ config = described_class.new(
106
+ enabled: true,
107
+ custom_font_dirs: ['/fonts']
108
+ )
109
+ hash = config.to_h
110
+
111
+ expect(hash).to be_a(Hash)
112
+ expect(hash[:enabled]).to be(true)
113
+ expect(hash[:custom_font_dirs]).to eq(['/fonts'])
114
+ end
115
+
116
+ it 'converts to hash with nil custom_dirs' do
117
+ config = described_class.new(enabled: false)
118
+ hash = config.to_h
119
+
120
+ expect(hash).to be_a(Hash)
121
+ expect(hash[:enabled]).to be(false)
122
+ expect(hash[:custom_font_dirs]).to be_nil
123
+ end
124
+
125
+ it 'converts to hash and includes all keys' do
126
+ config = described_class.new(
127
+ enabled: true,
128
+ custom_font_dirs: ['/fonts']
129
+ )
130
+ hash = config.to_h
131
+
132
+ expect(hash).to be_a(Hash)
133
+ expect(hash).to include(enabled: true)
134
+ expect(hash).to include(custom_font_dirs: ['/fonts'])
135
+ end
136
+ end
137
+
138
+ describe 'integration with PdfConfig' do
139
+ it 'integrates with PdfConfig' do
140
+ font_config = described_class.new(
141
+ enabled: true,
142
+ custom_font_dirs: ['/fonts']
143
+ )
144
+ pdf_config = Kreuzberg::Config::PDF.new(font_config: font_config)
145
+
146
+ expect(pdf_config.font_config).not_to be_nil
147
+ expect(pdf_config.font_config.enabled).to be true
148
+ expect(pdf_config.font_config.custom_font_dirs).to eq(['/fonts'])
149
+ end
150
+
151
+ it 'integrates with PdfConfig with all parameters' do
152
+ font_config = described_class.new(
153
+ enabled: true,
154
+ custom_font_dirs: ['/custom-fonts']
155
+ )
156
+ pdf_config = Kreuzberg::Config::PDF.new(
157
+ extract_images: true,
158
+ passwords: ['pass1'],
159
+ extract_metadata: true,
160
+ font_config: font_config
161
+ )
162
+
163
+ expect(pdf_config.extract_images).to be true
164
+ expect(pdf_config.passwords).to eq(['pass1'])
165
+ expect(pdf_config.extract_metadata).to be true
166
+ expect(pdf_config.font_config.enabled).to be true
167
+ end
168
+
169
+ it 'allows setting font_config via setter' do
170
+ pdf_config = Kreuzberg::Config::PDF.new
171
+ font_config = described_class.new(enabled: false)
172
+
173
+ pdf_config.font_config = font_config
174
+
175
+ expect(pdf_config.font_config).not_to be_nil
176
+ expect(pdf_config.font_config.enabled).to be false
177
+ end
178
+
179
+ it 'allows clearing font_config via setter' do
180
+ font_config = described_class.new(custom_font_dirs: ['/fonts'])
181
+ pdf_config = Kreuzberg::Config::PDF.new(font_config: font_config)
182
+
183
+ expect(pdf_config.font_config).not_to be_nil
184
+
185
+ pdf_config.font_config = nil
186
+
187
+ expect(pdf_config.font_config).to be_nil
188
+ end
189
+ end
190
+
191
+ describe 'edge cases' do
192
+ it 'handles disabled with custom directories' do
193
+ config = described_class.new(
194
+ enabled: false,
195
+ custom_font_dirs: ['/fonts']
196
+ )
197
+
198
+ expect(config.enabled).to be false
199
+ expect(config.custom_font_dirs).to eq(['/fonts'])
200
+ end
201
+
202
+ it 'handles multiple changes to enabled flag' do
203
+ config = described_class.new
204
+ config.enabled = false
205
+ config.enabled = true
206
+ config.enabled = false
207
+
208
+ expect(config.enabled).to be false
209
+ end
210
+
211
+ it 'handles multiple changes to custom directories' do
212
+ config = described_class.new
213
+ config.custom_font_dirs = ['/path1']
214
+ config.custom_font_dirs = ['/path1', '/path2']
215
+ config.custom_font_dirs = []
216
+
217
+ expect(config.custom_font_dirs).to eq([])
218
+ end
219
+ end
220
+ end