kreuzberg 4.9.9-aarch64-linux → 4.10.0.pre.rc.9-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kreuzberg.rb +0 -104
  3. data/lib/kreuzberg_rb.so +0 -0
  4. metadata +8 -219
  5. data/README.md +0 -431
  6. data/lib/kreuzberg/api_proxy.rb +0 -125
  7. data/lib/kreuzberg/cache_api.rb +0 -67
  8. data/lib/kreuzberg/cli.rb +0 -57
  9. data/lib/kreuzberg/cli_proxy.rb +0 -118
  10. data/lib/kreuzberg/config.rb +0 -1472
  11. data/lib/kreuzberg/djot_content.rb +0 -225
  12. data/lib/kreuzberg/document_structure.rb +0 -204
  13. data/lib/kreuzberg/error_context.rb +0 -136
  14. data/lib/kreuzberg/errors.rb +0 -123
  15. data/lib/kreuzberg/extraction_api.rb +0 -401
  16. data/lib/kreuzberg/mcp_proxy.rb +0 -176
  17. data/lib/kreuzberg/ocr_backend_protocol.rb +0 -40
  18. data/lib/kreuzberg/post_processor_protocol.rb +0 -15
  19. data/lib/kreuzberg/result.rb +0 -827
  20. data/lib/kreuzberg/setup_lib_path.rb +0 -99
  21. data/lib/kreuzberg/types.rb +0 -624
  22. data/lib/kreuzberg/validator_protocol.rb +0 -16
  23. data/lib/kreuzberg/version.rb +0 -5
  24. data/sig/kreuzberg/internal.rbs +0 -184
  25. data/sig/kreuzberg.rbs +0 -1859
  26. data/spec/binding/async_operations_spec.rb +0 -473
  27. data/spec/binding/batch_operations_spec.rb +0 -678
  28. data/spec/binding/batch_spec.rb +0 -360
  29. data/spec/binding/cache_spec.rb +0 -227
  30. data/spec/binding/cli_proxy_spec.rb +0 -85
  31. data/spec/binding/cli_spec.rb +0 -55
  32. data/spec/binding/config_spec.rb +0 -419
  33. data/spec/binding/config_validation_spec.rb +0 -377
  34. data/spec/binding/embeddings_spec.rb +0 -134
  35. data/spec/binding/error_handling_spec.rb +0 -399
  36. data/spec/binding/error_recovery_spec.rb +0 -488
  37. data/spec/binding/errors_spec.rb +0 -66
  38. data/spec/binding/font_config_spec.rb +0 -220
  39. data/spec/binding/images_spec.rb +0 -155
  40. data/spec/binding/keywords_extraction_spec.rb +0 -52
  41. data/spec/binding/pages_extraction_spec.rb +0 -101
  42. data/spec/binding/plugins/ocr_backend_spec.rb +0 -307
  43. data/spec/binding/plugins/postprocessor_spec.rb +0 -269
  44. data/spec/binding/plugins/validator_spec.rb +0 -273
  45. data/spec/binding/render_spec.rb +0 -91
  46. data/spec/binding/tables_spec.rb +0 -183
  47. data/spec/spec_helper.rb +0 -40
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aacc1f63d019544296c5656a1d7dc67be30f68825cf000ce2f21e76f2825f992
4
- data.tar.gz: 0c0166b5d45ff5d5683433180ef5820a137587f6654f28275b56a826e555a229
3
+ metadata.gz: 96aec4e399a9653d86c0b593c9ee6f8a598bdf0aa3669841eecaf390bd3d5ab3
4
+ data.tar.gz: d28a7d2f594d587b003fb7a57d073a3461a0191296b372423c556aaf6eeee8a2
5
5
  SHA512:
6
- metadata.gz: 0fb5344b6eb3d3195f9c1f4bd72c6daece523411a85290fcdecc92aa32f1d6dd6171e3b2ecb70c04b6a022a6e984f5e0419986671b89027048a2ec908cfbaddb
7
- data.tar.gz: a98060626bf52f9f30e3c2551882d47c37ff3a60252a3601fadfffd7194fc69bdaa2304e1aa3d9463ed9d5d83ff2bbcac412c5bb4e2d69c374c6f4d9a574ac59
6
+ metadata.gz: a20a03936747b42e62e90289b7085cd24bdacf301032c0842e28a89f243f5a3926455ed8084fc1cd859dfcf74b6a8f2342c832b1f50fd03edcda9d9bd658e3e1
7
+ data.tar.gz: e3d02a1a0578119b8d908bfd0d3a2f881348bdfbe9c1b3688720a26b1a46313246210023d2655f5ba92c10ac2879c684b8973e848a480340aae0173feb42692a
data/lib/kreuzberg.rb CHANGED
@@ -1,107 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'kreuzberg/setup_lib_path'
4
- Kreuzberg::SetupLibPath.configure
5
-
6
- require_relative 'kreuzberg/version'
7
3
  require 'kreuzberg_rb'
8
-
9
- # Kreuzberg is a Ruby binding for the Rust core library providing document extraction,
10
- # text extraction, and OCR capabilities.
11
- module Kreuzberg
12
- autoload :Config, 'kreuzberg/config'
13
- autoload :Result, 'kreuzberg/result'
14
- autoload :CLI, 'kreuzberg/cli'
15
- autoload :CLIProxy, 'kreuzberg/cli_proxy'
16
- autoload :APIProxy, 'kreuzberg/api_proxy'
17
- autoload :MCPProxy, 'kreuzberg/mcp_proxy'
18
- autoload :Errors, 'kreuzberg/errors'
19
- autoload :ErrorContext, 'kreuzberg/error_context'
20
- autoload :PostProcessorProtocol, 'kreuzberg/post_processor_protocol'
21
- autoload :ValidatorProtocol, 'kreuzberg/validator_protocol'
22
- autoload :OcrBackendProtocol, 'kreuzberg/ocr_backend_protocol'
23
-
24
- autoload :BoundingBox, 'kreuzberg/types'
25
- autoload :ElementMetadata, 'kreuzberg/types'
26
- autoload :Element, 'kreuzberg/types'
27
- autoload :HtmlMetadata, 'kreuzberg/types'
28
- autoload :HeaderMetadata, 'kreuzberg/types'
29
- autoload :LinkMetadata, 'kreuzberg/types'
30
- autoload :ImageMetadata, 'kreuzberg/types'
31
- autoload :StructuredData, 'kreuzberg/types'
32
- autoload :ExtractedKeyword, 'kreuzberg/types'
33
- autoload :ProcessingWarning, 'kreuzberg/types'
34
- autoload :DocumentBoundingBox, 'kreuzberg/types'
35
- autoload :DocumentAnnotation, 'kreuzberg/types'
36
- autoload :DocumentNode, 'kreuzberg/types'
37
- autoload :DocumentStructure, 'kreuzberg/types'
38
- autoload :PdfAnnotation, 'kreuzberg/types'
39
- autoload :PdfAnnotationBoundingBox, 'kreuzberg/types'
40
- autoload :KeywordAlgorithm, 'kreuzberg/types'
41
-
42
- ExtractionConfig = Config::Extraction
43
- PageConfig = Config::PageConfig
44
-
45
- @__cache_tracker = { entries: 0, bytes: 0 }
46
-
47
- class << self
48
- alias native_extract_file_sync extract_file_sync
49
- alias native_extract_bytes_sync extract_bytes_sync
50
- alias native_batch_extract_files_sync batch_extract_files_sync
51
- alias native_extract_file extract_file
52
- alias native_extract_bytes extract_bytes
53
- alias native_batch_extract_files batch_extract_files
54
- alias native_batch_extract_bytes_sync batch_extract_bytes_sync
55
- alias native_batch_extract_bytes batch_extract_bytes
56
- alias native_clear_cache clear_cache
57
- alias native_cache_stats cache_stats
58
- alias native_embed_sync embed_sync
59
- alias native_embed embed
60
-
61
- private :native_extract_file_sync, :native_extract_bytes_sync, :native_batch_extract_files_sync
62
- private :native_extract_file, :native_extract_bytes, :native_batch_extract_files
63
- private :native_batch_extract_bytes_sync, :native_batch_extract_bytes
64
- private :native_embed_sync, :native_embed
65
- end
66
-
67
- module_function :register_post_processor
68
-
69
- module_function :unregister_post_processor
70
-
71
- module_function :clear_post_processors
72
-
73
- module_function :register_validator
74
-
75
- module_function :unregister_validator
76
-
77
- module_function :clear_validators
78
-
79
- module_function :list_validators
80
-
81
- module_function :list_post_processors
82
-
83
- module_function :register_ocr_backend
84
-
85
- module_function :unregister_ocr_backend
86
-
87
- module_function :list_ocr_backends
88
-
89
- module_function :detect_mime_type
90
-
91
- module_function :detect_mime_type_from_path
92
-
93
- module_function :validate_mime_type
94
-
95
- module_function :get_extensions_for_mime
96
-
97
- module_function :embed_sync
98
-
99
- module_function :embed
100
- end
101
-
102
- require_relative 'kreuzberg/cache_api'
103
- require_relative 'kreuzberg/extraction_api'
104
- require_relative 'kreuzberg/djot_content'
105
-
106
- Kreuzberg.singleton_class.prepend(Kreuzberg::CacheAPI)
107
- Kreuzberg.singleton_class.prepend(Kreuzberg::ExtractionAPI)
data/lib/kreuzberg_rb.so CHANGED
Binary file
metadata CHANGED
@@ -1,237 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.9.9
4
+ version: 4.10.0.pre.rc.9
5
5
  platform: aarch64-linux
6
6
  authors:
7
- - Na'aman Hirschfeld
7
+ - Na'aman Hirschfeld <naaman@kreuzberg.dev>
8
8
  autorequire:
9
- bindir: exe
9
+ bindir: bin
10
10
  cert_chain: []
11
- date: 2026-06-05 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '4.0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '4.0'
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '13.0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '13.0'
41
- - !ruby/object:Gem::Dependency
42
- name: rake-compiler
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: '1.2'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '1.2'
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '3.12'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - "~>"
67
- - !ruby/object:Gem::Version
68
- version: '3.12'
69
- - !ruby/object:Gem::Dependency
70
- name: sorbet-runtime
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - "~>"
74
- - !ruby/object:Gem::Version
75
- version: '0.5'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - "~>"
81
- - !ruby/object:Gem::Version
82
- version: '0.5'
83
- - !ruby/object:Gem::Dependency
84
- name: rbs
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - "~>"
88
- - !ruby/object:Gem::Version
89
- version: '4.0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - "~>"
95
- - !ruby/object:Gem::Version
96
- version: '4.0'
97
- - !ruby/object:Gem::Dependency
98
- name: rubocop
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: '1.66'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: '1.66'
111
- - !ruby/object:Gem::Dependency
112
- name: rubocop-performance
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '1.21'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '1.21'
125
- - !ruby/object:Gem::Dependency
126
- name: rubocop-rspec
127
- requirement: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - "~>"
130
- - !ruby/object:Gem::Version
131
- version: '3.0'
132
- type: :development
133
- prerelease: false
134
- version_requirements: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - "~>"
137
- - !ruby/object:Gem::Version
138
- version: '3.0'
139
- - !ruby/object:Gem::Dependency
140
- name: steep
141
- requirement: !ruby/object:Gem::Requirement
142
- requirements:
143
- - - "~>"
144
- - !ruby/object:Gem::Version
145
- version: '2.0'
146
- type: :development
147
- prerelease: false
148
- version_requirements: !ruby/object:Gem::Requirement
149
- requirements:
150
- - - "~>"
151
- - !ruby/object:Gem::Version
152
- version: '2.0'
153
- - !ruby/object:Gem::Dependency
154
- name: yard
155
- requirement: !ruby/object:Gem::Requirement
156
- requirements:
157
- - - "~>"
158
- - !ruby/object:Gem::Version
159
- version: '0.9'
160
- type: :development
161
- prerelease: false
162
- version_requirements: !ruby/object:Gem::Requirement
163
- requirements:
164
- - - "~>"
165
- - !ruby/object:Gem::Version
166
- version: '0.9'
167
- description: |
168
- Kreuzberg is a high-performance document intelligence library with a Rust core and native
169
- Ruby bindings via Magnus. Extract text, metadata, and structured data from 75+ file formats
170
- including PDF, DOCX, PPTX, XLSX, HTML, RTF, images (with OCR), email, archives, and more.
171
- Features async/sync APIs, text chunking, language detection, and keyword extraction.
11
+ date: 2026-04-29 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: High-performance document intelligence library
172
14
  email:
173
- - nhirschfeld@gmail.com
174
15
  executables: []
175
16
  extensions: []
176
17
  extra_rdoc_files: []
177
18
  files:
178
- - README.md
179
19
  - lib/kreuzberg.rb
180
- - lib/kreuzberg/api_proxy.rb
181
- - lib/kreuzberg/cache_api.rb
182
- - lib/kreuzberg/cli.rb
183
- - lib/kreuzberg/cli_proxy.rb
184
- - lib/kreuzberg/config.rb
185
- - lib/kreuzberg/djot_content.rb
186
- - lib/kreuzberg/document_structure.rb
187
- - lib/kreuzberg/error_context.rb
188
- - lib/kreuzberg/errors.rb
189
- - lib/kreuzberg/extraction_api.rb
190
- - lib/kreuzberg/mcp_proxy.rb
191
- - lib/kreuzberg/ocr_backend_protocol.rb
192
- - lib/kreuzberg/post_processor_protocol.rb
193
- - lib/kreuzberg/result.rb
194
- - lib/kreuzberg/setup_lib_path.rb
195
- - lib/kreuzberg/types.rb
196
- - lib/kreuzberg/validator_protocol.rb
197
- - lib/kreuzberg/version.rb
198
20
  - lib/kreuzberg_rb.so
199
21
  - lib/libpdfium.so
200
- - sig/kreuzberg.rbs
201
- - sig/kreuzberg/internal.rbs
202
- - spec/binding/async_operations_spec.rb
203
- - spec/binding/batch_operations_spec.rb
204
- - spec/binding/batch_spec.rb
205
- - spec/binding/cache_spec.rb
206
- - spec/binding/cli_proxy_spec.rb
207
- - spec/binding/cli_spec.rb
208
- - spec/binding/config_spec.rb
209
- - spec/binding/config_validation_spec.rb
210
- - spec/binding/embeddings_spec.rb
211
- - spec/binding/error_handling_spec.rb
212
- - spec/binding/error_recovery_spec.rb
213
- - spec/binding/errors_spec.rb
214
- - spec/binding/font_config_spec.rb
215
- - spec/binding/images_spec.rb
216
- - spec/binding/keywords_extraction_spec.rb
217
- - spec/binding/pages_extraction_spec.rb
218
- - spec/binding/plugins/ocr_backend_spec.rb
219
- - spec/binding/plugins/postprocessor_spec.rb
220
- - spec/binding/plugins/validator_spec.rb
221
- - spec/binding/render_spec.rb
222
- - spec/binding/tables_spec.rb
223
- - spec/spec_helper.rb
224
22
  homepage: https://github.com/kreuzberg-dev/kreuzberg
225
23
  licenses:
226
24
  - Elastic-2.0
227
25
  metadata:
228
- homepage_uri: https://github.com/kreuzberg-dev/kreuzberg
229
- source_code_uri: https://github.com/kreuzberg-dev/kreuzberg
230
- changelog_uri: https://github.com/kreuzberg-dev/kreuzberg/blob/main/CHANGELOG.md
231
- documentation_uri: https://docs.kreuzberg.dev
232
- bug_tracker_uri: https://github.com/kreuzberg-dev/kreuzberg/issues
26
+ keywords: document,extraction,pdf,ocr,text
233
27
  rubygems_mfa_required: 'true'
234
- keywords: document-intelligence,document-extraction,text-extraction,ocr,pdf,rust,native-extension,nlp,rag
235
28
  post_install_message:
236
29
  rdoc_options: []
237
30
  require_paths:
@@ -241,9 +34,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
241
34
  - - ">="
242
35
  - !ruby/object:Gem::Version
243
36
  version: 3.2.0
244
- - - "<"
245
- - !ruby/object:Gem::Version
246
- version: '5.0'
247
37
  required_rubygems_version: !ruby/object:Gem::Requirement
248
38
  requirements:
249
39
  - - ">="
@@ -253,6 +43,5 @@ requirements: []
253
43
  rubygems_version: 3.5.22
254
44
  signing_key:
255
45
  specification_version: 4
256
- summary: Document intelligence library — extract text from PDFs, Office docs, images,
257
- and 75+ formats
46
+ summary: High-performance document intelligence library
258
47
  test_files: []