kreuzberg 4.3.7-aarch64-linux → 4.4.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +24 -10
- data/README.md +4 -1
- data/examples/async_patterns.rb +9 -10
- data/lib/kreuzberg/result.rb +9 -12
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg_rb.so +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a713fea01562f31fbb7c757bbf278dfaa4929fc55a5696d77e5b83157e286423
|
|
4
|
+
data.tar.gz: e9ef2c909e3ca0f1653824a7b2517342858e6795c4f35191cc7d03a938890c4d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f8750cd9e0c9031c07e1ab5ad7f4b7ccc2649c70c652f7b4cba4b2b440d6e7d645bc049de9eef26e910ea8581bbacad152bc474fe1e0f5d3f52805c7fd45206d
|
|
7
|
+
data.tar.gz: fb3091b0637338d9727751132bc1f72b3dd0aa450fc2616506bfbc3fe5de0de7f08c2842f94b6cb7498999333fb7b7d727c1c8660d8f827e627fd851b9604ef1
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.
|
|
4
|
+
kreuzberg (4.4.0)
|
|
5
5
|
rb_sys (~> 0.9.119)
|
|
6
6
|
sorbet-runtime (~> 0.5)
|
|
7
7
|
|
|
@@ -21,6 +21,8 @@ GEM
|
|
|
21
21
|
securerandom (>= 0.3)
|
|
22
22
|
tzinfo (~> 2.0, >= 2.0.5)
|
|
23
23
|
uri (>= 0.13.1)
|
|
24
|
+
addressable (2.8.9)
|
|
25
|
+
public_suffix (>= 2.0.2, < 8.0)
|
|
24
26
|
ast (2.4.3)
|
|
25
27
|
base64 (0.3.0)
|
|
26
28
|
bigdecimal (4.0.1)
|
|
@@ -48,6 +50,9 @@ GEM
|
|
|
48
50
|
concurrent-ruby (~> 1.0)
|
|
49
51
|
io-console (0.8.2)
|
|
50
52
|
json (2.18.1)
|
|
53
|
+
json-schema (6.1.0)
|
|
54
|
+
addressable (~> 2.8)
|
|
55
|
+
bigdecimal (>= 3.1, < 5)
|
|
51
56
|
language_server-protocol (3.17.0.5)
|
|
52
57
|
lint_roller (1.1.0)
|
|
53
58
|
listen (3.10.0)
|
|
@@ -55,8 +60,11 @@ GEM
|
|
|
55
60
|
rb-fsevent (~> 0.10, >= 0.10.3)
|
|
56
61
|
rb-inotify (~> 0.9, >= 0.9.10)
|
|
57
62
|
logger (1.7.0)
|
|
63
|
+
mcp (0.7.1)
|
|
64
|
+
json-schema (>= 4.1)
|
|
58
65
|
method_source (1.1.0)
|
|
59
|
-
minitest (6.0.
|
|
66
|
+
minitest (6.0.2)
|
|
67
|
+
drb (~> 2.0)
|
|
60
68
|
prism (~> 1.5)
|
|
61
69
|
mutex_m (0.3.0)
|
|
62
70
|
parallel (1.27.0)
|
|
@@ -71,6 +79,7 @@ GEM
|
|
|
71
79
|
pry-byebug (3.12.0)
|
|
72
80
|
byebug (~> 13.0)
|
|
73
81
|
pry (>= 0.13, < 0.17)
|
|
82
|
+
public_suffix (7.0.2)
|
|
74
83
|
racc (1.8.1)
|
|
75
84
|
rainbow (3.1.1)
|
|
76
85
|
rake (13.3.1)
|
|
@@ -97,14 +106,15 @@ GEM
|
|
|
97
106
|
rspec-expectations (3.13.5)
|
|
98
107
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
99
108
|
rspec-support (~> 3.13.0)
|
|
100
|
-
rspec-mocks (3.13.
|
|
109
|
+
rspec-mocks (3.13.8)
|
|
101
110
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
102
111
|
rspec-support (~> 3.13.0)
|
|
103
112
|
rspec-support (3.13.7)
|
|
104
|
-
rubocop (1.
|
|
113
|
+
rubocop (1.85.0)
|
|
105
114
|
json (~> 2.3)
|
|
106
115
|
language_server-protocol (~> 3.17.0.2)
|
|
107
116
|
lint_roller (~> 1.1.0)
|
|
117
|
+
mcp (~> 0.6)
|
|
108
118
|
parallel (~> 1.10)
|
|
109
119
|
parser (>= 3.3.0.2)
|
|
110
120
|
rainbow (>= 2.2.2, < 4.0)
|
|
@@ -124,7 +134,7 @@ GEM
|
|
|
124
134
|
rubocop (~> 1.81)
|
|
125
135
|
ruby-progressbar (1.13.0)
|
|
126
136
|
securerandom (0.4.1)
|
|
127
|
-
sorbet-runtime (0.6.
|
|
137
|
+
sorbet-runtime (0.6.12971)
|
|
128
138
|
steep (1.10.0)
|
|
129
139
|
activesupport (>= 5.1)
|
|
130
140
|
concurrent-ruby (>= 1.1.10)
|
|
@@ -185,6 +195,7 @@ DEPENDENCIES
|
|
|
185
195
|
|
|
186
196
|
CHECKSUMS
|
|
187
197
|
activesupport (8.1.2) sha256=88842578ccd0d40f658289b0e8c842acfe9af751afee2e0744a7873f50b6fdae
|
|
198
|
+
addressable (2.8.9) sha256=cc154fcbe689711808a43601dee7b980238ce54368d23e127421753e46895485
|
|
188
199
|
ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
|
|
189
200
|
base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
|
|
190
201
|
bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
|
|
@@ -210,19 +221,22 @@ CHECKSUMS
|
|
|
210
221
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
211
222
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
212
223
|
json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
|
|
213
|
-
|
|
224
|
+
json-schema (6.1.0) sha256=6bf70a2cfb6dfd5a06da28093fa8190f324c88eabd36a7f47097f227321dc702
|
|
225
|
+
kreuzberg (4.4.0)
|
|
214
226
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
215
227
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
216
228
|
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
|
217
229
|
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
|
|
230
|
+
mcp (0.7.1) sha256=fa967895d6952bad0d981ea907731d8528d2c246d2079d56a9c8bae83d14f1c7
|
|
218
231
|
method_source (1.1.0) sha256=181301c9c45b731b4769bc81e8860e72f9161ad7d66dd99103c9ab84f560f5c5
|
|
219
|
-
minitest (6.0.
|
|
232
|
+
minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
|
|
220
233
|
mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
|
|
221
234
|
parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
|
|
222
235
|
parser (3.3.10.2) sha256=6f60c84aa4bdcedb6d1a2434b738fe8a8136807b6adc8f7f53b97da9bc4e9357
|
|
223
236
|
prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
|
|
224
237
|
pry (0.16.0) sha256=d76c69065698ed1f85e717bd33d7942c38a50868f6b0673c636192b3d1b6054e
|
|
225
238
|
pry-byebug (3.12.0) sha256=594e094ae8a8390a7ad4c7b36ae36e13304ed02664c67417d108dc5f7213d1b7
|
|
239
|
+
public_suffix (7.0.2) sha256=9114090c8e4e7135c1fd0e7acfea33afaab38101884320c65aaa0ffb8e26a857
|
|
226
240
|
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
|
227
241
|
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
|
|
228
242
|
rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
|
|
@@ -237,15 +251,15 @@ CHECKSUMS
|
|
|
237
251
|
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
|
|
238
252
|
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
|
|
239
253
|
rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
|
|
240
|
-
rspec-mocks (3.13.
|
|
254
|
+
rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
|
|
241
255
|
rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
|
|
242
|
-
rubocop (1.
|
|
256
|
+
rubocop (1.85.0) sha256=317407feb681a07d54f64d2f9e1d6b6af1ce7678e51cd658e3ad8bd66da48c01
|
|
243
257
|
rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
|
|
244
258
|
rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
|
|
245
259
|
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
|
246
260
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
247
261
|
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
248
|
-
sorbet-runtime (0.6.
|
|
262
|
+
sorbet-runtime (0.6.12971) sha256=1c2c75a262f88c4fbdb36b5617b0b11bfc7c69b11a500b3334bd67d075288a45
|
|
249
263
|
steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
|
|
250
264
|
strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
|
|
251
265
|
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.0" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -36,6 +36,9 @@
|
|
|
36
36
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
|
|
37
37
|
<img src="https://img.shields.io/badge/Docker-007ec6?logo=docker&logoColor=white" alt="Docker">
|
|
38
38
|
</a>
|
|
39
|
+
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
40
|
+
<img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C">
|
|
41
|
+
</a>
|
|
39
42
|
|
|
40
43
|
<!-- Project Info -->
|
|
41
44
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
|
data/examples/async_patterns.rb
CHANGED
|
@@ -215,18 +215,17 @@ end
|
|
|
215
215
|
# ============================================================================
|
|
216
216
|
|
|
217
217
|
# Example OCR backend implementation for custom processing.
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
218
|
+
def register_ocr_backend
|
|
219
|
+
backend = Class.new do
|
|
220
|
+
def process_image(image_bytes, language)
|
|
221
|
+
"Extracted text from #{image_bytes.length} bytes using #{language}"
|
|
222
|
+
end
|
|
222
223
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
end
|
|
224
|
+
def supports_language?(lang)
|
|
225
|
+
%w[eng deu fra].include?(lang)
|
|
226
|
+
end
|
|
227
|
+
end.new
|
|
227
228
|
|
|
228
|
-
def register_ocr_backend
|
|
229
|
-
backend = CustomOcrBackend.new
|
|
230
229
|
Kreuzberg.register_ocr_backend('custom', backend)
|
|
231
230
|
|
|
232
231
|
config = {
|
data/lib/kreuzberg/result.rb
CHANGED
|
@@ -24,7 +24,7 @@ module Kreuzberg
|
|
|
24
24
|
# @return [Integer] Page number where table was found
|
|
25
25
|
# @!attribute [r] bounding_box
|
|
26
26
|
# @return [BoundingBox, nil] Bounding box of the table on the page
|
|
27
|
-
Table = Struct.new(:cells, :markdown, :page_number, :bounding_box
|
|
27
|
+
Table = Struct.new(:cells, :markdown, :page_number, :bounding_box) do
|
|
28
28
|
def to_h
|
|
29
29
|
{ cells: cells, markdown: markdown, page_number: page_number, bounding_box: bounding_box&.to_h }
|
|
30
30
|
end
|
|
@@ -51,8 +51,7 @@ module Kreuzberg
|
|
|
51
51
|
:total_chunks,
|
|
52
52
|
:first_page,
|
|
53
53
|
:last_page,
|
|
54
|
-
:embedding
|
|
55
|
-
keyword_init: true
|
|
54
|
+
:embedding
|
|
56
55
|
) do
|
|
57
56
|
def to_h
|
|
58
57
|
{
|
|
@@ -81,8 +80,7 @@ module Kreuzberg
|
|
|
81
80
|
:is_mask,
|
|
82
81
|
:description,
|
|
83
82
|
:bounding_box,
|
|
84
|
-
:ocr_result
|
|
85
|
-
keyword_init: true
|
|
83
|
+
:ocr_result
|
|
86
84
|
) do
|
|
87
85
|
def to_h
|
|
88
86
|
{
|
|
@@ -118,7 +116,7 @@ module Kreuzberg
|
|
|
118
116
|
# @return [String] The hierarchy level (h1-h6 or body)
|
|
119
117
|
# @!attribute [r] bbox
|
|
120
118
|
# @return [Array<Float>, nil] Bounding box (left, top, right, bottom)
|
|
121
|
-
HierarchicalBlock = Struct.new(:text, :font_size, :level, :bbox
|
|
119
|
+
HierarchicalBlock = Struct.new(:text, :font_size, :level, :bbox) do
|
|
122
120
|
def to_h
|
|
123
121
|
{ text: text, font_size: font_size, level: level, bbox: bbox }
|
|
124
122
|
end
|
|
@@ -128,7 +126,7 @@ module Kreuzberg
|
|
|
128
126
|
# @return [Integer] Number of hierarchy blocks
|
|
129
127
|
# @!attribute [r] blocks
|
|
130
128
|
# @return [Array<HierarchicalBlock>] Hierarchical blocks
|
|
131
|
-
PageHierarchy = Struct.new(:block_count, :blocks
|
|
129
|
+
PageHierarchy = Struct.new(:block_count, :blocks) do
|
|
132
130
|
def to_h
|
|
133
131
|
{ block_count: block_count, blocks: blocks.map(&:to_h) }
|
|
134
132
|
end
|
|
@@ -144,7 +142,7 @@ module Kreuzberg
|
|
|
144
142
|
# @return [Array<Image>] Images on this page
|
|
145
143
|
# @!attribute [r] hierarchy
|
|
146
144
|
# @return [PageHierarchy, nil] Hierarchy information for the page
|
|
147
|
-
PageContent = Struct.new(:page_number, :content, :tables, :images, :hierarchy, :is_blank
|
|
145
|
+
PageContent = Struct.new(:page_number, :content, :tables, :images, :hierarchy, :is_blank) do
|
|
148
146
|
def to_h
|
|
149
147
|
{
|
|
150
148
|
page_number: page_number,
|
|
@@ -165,7 +163,7 @@ module Kreuzberg
|
|
|
165
163
|
# @return [Float] Right x-coordinate
|
|
166
164
|
# @!attribute [r] y1
|
|
167
165
|
# @return [Float] Top y-coordinate
|
|
168
|
-
ElementBoundingBox = Struct.new(:x0, :y0, :x1, :y1
|
|
166
|
+
ElementBoundingBox = Struct.new(:x0, :y0, :x1, :y1) do
|
|
169
167
|
def to_h
|
|
170
168
|
{ x0: x0, y0: y0, x1: x1, y1: y1 }
|
|
171
169
|
end
|
|
@@ -186,8 +184,7 @@ module Kreuzberg
|
|
|
186
184
|
:filename,
|
|
187
185
|
:coordinates,
|
|
188
186
|
:element_index,
|
|
189
|
-
:additional
|
|
190
|
-
keyword_init: true
|
|
187
|
+
:additional
|
|
191
188
|
) do
|
|
192
189
|
def to_h
|
|
193
190
|
{
|
|
@@ -208,7 +205,7 @@ module Kreuzberg
|
|
|
208
205
|
# @return [String] Text content of the element
|
|
209
206
|
# @!attribute [r] metadata
|
|
210
207
|
# @return [ElementMetadataStruct] Metadata about the element
|
|
211
|
-
ElementStruct = Struct.new(:element_id, :element_type, :text, :metadata
|
|
208
|
+
ElementStruct = Struct.new(:element_id, :element_type, :text, :metadata) do
|
|
212
209
|
def to_h
|
|
213
210
|
{
|
|
214
211
|
element_id: element_id,
|
data/lib/kreuzberg/version.rb
CHANGED
data/lib/kreuzberg_rb.so
CHANGED
|
Binary file
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.
|
|
4
|
+
version: 4.4.0
|
|
5
5
|
platform: aarch64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-02-
|
|
11
|
+
date: 2026-02-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|