kreuzberg 4.8.3-aarch64-linux → 4.8.5-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -3
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg_rb.so +0 -0
- data/sig/kreuzberg.rbs +85 -15
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3a8ebc29c703cfb07252f68d86a5dd37f17eb06d79bccce78f82eada2d732e9c
|
|
4
|
+
data.tar.gz: e711b683c3bdfa37ea1b43bc9e50bc7f21a4e2fca9e4dee4e8c2a2c2664e1bc5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a50806a6cee6edfcf55db3a62d748c293c7b01f20f2744540d520489ca6bbbfe89d65fb3d0132e5e3f723121b835fdc99ac3856559910f227f708e43931070d9
|
|
7
|
+
data.tar.gz: 26c2f577491b2be1f89e7f49832e308bdc7aabb7453b761a7a72692316a8fce84268d346a3adc545e1308a1b7f3b471d4b671b3f9f4a9ac1ef65dd07139ddb62
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.8.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.8.5" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -39,10 +39,13 @@
|
|
|
39
39
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
|
|
40
40
|
<img src="https://img.shields.io/badge/Docker-007ec6?logo=docker&logoColor=white" alt="Docker">
|
|
41
41
|
</a>
|
|
42
|
+
<a href="https://artifacthub.io/packages/search?repo=kreuzberg">
|
|
43
|
+
<img src="https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/kreuzberg" alt="Artifact Hub">
|
|
44
|
+
</a>
|
|
42
45
|
|
|
43
46
|
<!-- Project Info -->
|
|
44
47
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
|
|
45
|
-
<img src="https://img.shields.io/badge/License-
|
|
48
|
+
<img src="https://img.shields.io/badge/License-MIT-007ec6" alt="License">
|
|
46
49
|
</a>
|
|
47
50
|
<a href="https://docs.kreuzberg.dev">
|
|
48
51
|
<img src="https://img.shields.io/badge/docs-kreuzberg.dev-007ec6" alt="Documentation">
|
|
@@ -419,7 +422,7 @@ Contributions are welcome! See [Contributing Guide](https://github.com/kreuzberg
|
|
|
419
422
|
|
|
420
423
|
## License
|
|
421
424
|
|
|
422
|
-
|
|
425
|
+
MIT License - see LICENSE file for details.
|
|
423
426
|
|
|
424
427
|
## Support
|
|
425
428
|
|
data/lib/kreuzberg/version.rb
CHANGED
data/lib/kreuzberg_rb.so
CHANGED
|
Binary file
|
data/sig/kreuzberg.rbs
CHANGED
|
@@ -18,7 +18,8 @@ module Kreuzberg
|
|
|
18
18
|
type element_type = 'title' | 'narrative_text' | 'heading' | 'list_item' | 'table' | 'image' | 'page_break' | 'code_block' | 'block_quote' | 'footer' | 'header'
|
|
19
19
|
|
|
20
20
|
# Bounding box coordinates for element positioning (T::Struct from types.rb)
|
|
21
|
-
class BoundingBox
|
|
21
|
+
class BoundingBox
|
|
22
|
+
attr_reader x0: Float
|
|
22
23
|
attr_reader y0: Float
|
|
23
24
|
attr_reader x1: Float
|
|
24
25
|
attr_reader y1: Float
|
|
@@ -28,7 +29,8 @@ module Kreuzberg
|
|
|
28
29
|
end
|
|
29
30
|
|
|
30
31
|
# Metadata for a semantic element (T::Struct from types.rb)
|
|
31
|
-
class ElementMetadata
|
|
32
|
+
class ElementMetadata
|
|
33
|
+
attr_reader page_number: Integer?
|
|
32
34
|
attr_reader filename: String?
|
|
33
35
|
attr_reader coordinates: BoundingBox?
|
|
34
36
|
attr_reader element_index: Integer?
|
|
@@ -39,7 +41,8 @@ module Kreuzberg
|
|
|
39
41
|
end
|
|
40
42
|
|
|
41
43
|
# Semantic element extracted from document (T::Struct from types.rb)
|
|
42
|
-
class Element
|
|
44
|
+
class Element
|
|
45
|
+
attr_reader element_id: String
|
|
43
46
|
attr_reader element_type: String
|
|
44
47
|
attr_reader text: String
|
|
45
48
|
attr_reader metadata: ElementMetadata
|
|
@@ -49,7 +52,8 @@ module Kreuzberg
|
|
|
49
52
|
end
|
|
50
53
|
|
|
51
54
|
# Header/Heading metadata (T::Struct from types.rb)
|
|
52
|
-
class HeaderMetadata
|
|
55
|
+
class HeaderMetadata
|
|
56
|
+
attr_reader level: Integer
|
|
53
57
|
attr_reader text: String
|
|
54
58
|
attr_reader id: String?
|
|
55
59
|
attr_reader depth: Integer
|
|
@@ -60,7 +64,8 @@ module Kreuzberg
|
|
|
60
64
|
end
|
|
61
65
|
|
|
62
66
|
# Link metadata (T::Struct from types.rb)
|
|
63
|
-
class LinkMetadata
|
|
67
|
+
class LinkMetadata
|
|
68
|
+
attr_reader href: String
|
|
64
69
|
attr_reader text: String
|
|
65
70
|
attr_reader title: String?
|
|
66
71
|
attr_reader link_type: String
|
|
@@ -72,7 +77,8 @@ module Kreuzberg
|
|
|
72
77
|
end
|
|
73
78
|
|
|
74
79
|
# Image metadata (T::Struct from types.rb)
|
|
75
|
-
class ImageMetadata
|
|
80
|
+
class ImageMetadata
|
|
81
|
+
attr_reader src: String
|
|
76
82
|
attr_reader alt: String?
|
|
77
83
|
attr_reader title: String?
|
|
78
84
|
attr_reader dimensions: Array[Integer]?
|
|
@@ -84,7 +90,8 @@ module Kreuzberg
|
|
|
84
90
|
end
|
|
85
91
|
|
|
86
92
|
# Structured data metadata (T::Struct from types.rb)
|
|
87
|
-
class StructuredData
|
|
93
|
+
class StructuredData
|
|
94
|
+
attr_reader data_type: String
|
|
88
95
|
attr_reader raw_json: String
|
|
89
96
|
attr_reader schema_type: String?
|
|
90
97
|
|
|
@@ -210,7 +217,8 @@ module Kreuzberg
|
|
|
210
217
|
end
|
|
211
218
|
|
|
212
219
|
# HTML metadata (T::Struct from types.rb)
|
|
213
|
-
class HtmlMetadata
|
|
220
|
+
class HtmlMetadata
|
|
221
|
+
attr_reader title: String?
|
|
214
222
|
attr_reader description: String?
|
|
215
223
|
attr_reader author: String?
|
|
216
224
|
attr_reader copyright: String?
|
|
@@ -261,7 +269,8 @@ module Kreuzberg
|
|
|
261
269
|
end
|
|
262
270
|
|
|
263
271
|
# Extracted keyword with relevance metadata (T::Struct from types.rb)
|
|
264
|
-
class ExtractedKeyword
|
|
272
|
+
class ExtractedKeyword
|
|
273
|
+
attr_reader text: String
|
|
265
274
|
attr_reader score: Float
|
|
266
275
|
attr_reader algorithm: String
|
|
267
276
|
attr_reader positions: Array[Integer]?
|
|
@@ -271,15 +280,39 @@ module Kreuzberg
|
|
|
271
280
|
end
|
|
272
281
|
|
|
273
282
|
# Processing warning from a pipeline stage (T::Struct from types.rb)
|
|
274
|
-
class ProcessingWarning
|
|
283
|
+
class ProcessingWarning
|
|
284
|
+
attr_reader source: String
|
|
275
285
|
attr_reader message: String
|
|
276
286
|
|
|
277
287
|
def initialize: (source: String, message: String) -> void
|
|
278
288
|
def serialize: () -> Hash[Symbol, untyped]
|
|
279
289
|
end
|
|
280
290
|
|
|
291
|
+
# LLM token usage from an LLM-assisted extraction step (T::Struct from types.rb)
|
|
292
|
+
class LlmUsage
|
|
293
|
+
attr_reader model: String
|
|
294
|
+
attr_reader source: String
|
|
295
|
+
attr_reader input_tokens: Integer?
|
|
296
|
+
attr_reader output_tokens: Integer?
|
|
297
|
+
attr_reader total_tokens: Integer?
|
|
298
|
+
attr_reader estimated_cost: Float?
|
|
299
|
+
attr_reader finish_reason: String?
|
|
300
|
+
|
|
301
|
+
def initialize: (
|
|
302
|
+
model: String,
|
|
303
|
+
source: String,
|
|
304
|
+
?input_tokens: Integer?,
|
|
305
|
+
?output_tokens: Integer?,
|
|
306
|
+
?total_tokens: Integer?,
|
|
307
|
+
?estimated_cost: Float?,
|
|
308
|
+
?finish_reason: String?
|
|
309
|
+
) -> void
|
|
310
|
+
def serialize: () -> Hash[Symbol, untyped]
|
|
311
|
+
end
|
|
312
|
+
|
|
281
313
|
# Bounding box for document node positioning (T::Struct from types.rb)
|
|
282
|
-
class DocumentBoundingBox
|
|
314
|
+
class DocumentBoundingBox
|
|
315
|
+
attr_reader x0: Float
|
|
283
316
|
attr_reader y0: Float
|
|
284
317
|
attr_reader x1: Float
|
|
285
318
|
attr_reader y1: Float
|
|
@@ -289,7 +322,8 @@ module Kreuzberg
|
|
|
289
322
|
end
|
|
290
323
|
|
|
291
324
|
# Annotation for a document node (T::Struct from types.rb)
|
|
292
|
-
class DocumentAnnotation
|
|
325
|
+
class DocumentAnnotation
|
|
326
|
+
attr_reader key: String
|
|
293
327
|
attr_reader value: String
|
|
294
328
|
|
|
295
329
|
def initialize: (key: String, value: String) -> void
|
|
@@ -297,7 +331,8 @@ module Kreuzberg
|
|
|
297
331
|
end
|
|
298
332
|
|
|
299
333
|
# Single node in the document structure tree (T::Struct from types.rb)
|
|
300
|
-
class DocumentNode
|
|
334
|
+
class DocumentNode
|
|
335
|
+
attr_reader id: String
|
|
301
336
|
attr_reader content: String
|
|
302
337
|
attr_reader parent: Integer?
|
|
303
338
|
attr_reader children: Array[Integer]
|
|
@@ -322,7 +357,8 @@ module Kreuzberg
|
|
|
322
357
|
end
|
|
323
358
|
|
|
324
359
|
# Structured document representation (T::Struct from types.rb)
|
|
325
|
-
class DocumentStructure
|
|
360
|
+
class DocumentStructure
|
|
361
|
+
attr_reader nodes: Array[DocumentNode]
|
|
326
362
|
|
|
327
363
|
def initialize: (nodes: Array[DocumentNode]) -> void
|
|
328
364
|
def serialize: () -> Hash[Symbol, untyped]
|
|
@@ -927,7 +963,10 @@ module Kreuzberg
|
|
|
927
963
|
extracted_keywords: Array[extracted_keyword_hash]?,
|
|
928
964
|
quality_score: Float?,
|
|
929
965
|
processing_warnings: Array[processing_warning_hash]?,
|
|
930
|
-
annotations: Array[pdf_annotation_hash]
|
|
966
|
+
annotations: Array[pdf_annotation_hash]?,
|
|
967
|
+
uris: Array[uri_hash]?,
|
|
968
|
+
children: Array[archive_entry_hash]?,
|
|
969
|
+
llm_usage: Array[llm_usage_hash]?
|
|
931
970
|
}
|
|
932
971
|
|
|
933
972
|
type extracted_keyword_hash = {
|
|
@@ -942,6 +981,29 @@ module Kreuzberg
|
|
|
942
981
|
message: String
|
|
943
982
|
}
|
|
944
983
|
|
|
984
|
+
type llm_usage_hash = {
|
|
985
|
+
model: String,
|
|
986
|
+
source: String,
|
|
987
|
+
input_tokens: Integer?,
|
|
988
|
+
output_tokens: Integer?,
|
|
989
|
+
total_tokens: Integer?,
|
|
990
|
+
estimated_cost: Float?,
|
|
991
|
+
finish_reason: String?
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
type uri_hash = {
|
|
995
|
+
url: String,
|
|
996
|
+
label: String?,
|
|
997
|
+
page: Integer?,
|
|
998
|
+
kind: String
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
type archive_entry_hash = {
|
|
1002
|
+
path: String,
|
|
1003
|
+
mime_type: String,
|
|
1004
|
+
result: extraction_result_hash?
|
|
1005
|
+
}
|
|
1006
|
+
|
|
945
1007
|
type page_content_hash = {
|
|
946
1008
|
page_number: Integer,
|
|
947
1009
|
content: String,
|
|
@@ -1483,6 +1545,9 @@ module Kreuzberg
|
|
|
1483
1545
|
attr_reader quality_score: Float?
|
|
1484
1546
|
attr_reader processing_warnings: Array[ProcessingWarning]?
|
|
1485
1547
|
attr_reader annotations: Array[PdfAnnotation]?
|
|
1548
|
+
attr_reader uris: Array[uri_hash]?
|
|
1549
|
+
attr_reader children: Array[archive_entry_hash]?
|
|
1550
|
+
attr_reader llm_usage: Array[LlmUsage]?
|
|
1486
1551
|
|
|
1487
1552
|
# PDF annotation extracted from a document page (Struct from result.rb)
|
|
1488
1553
|
class PdfAnnotation
|
|
@@ -1521,6 +1586,11 @@ module Kreuzberg
|
|
|
1521
1586
|
def parse_document_structure: (Hash[String, untyped]? document_data) -> DocumentStructure?
|
|
1522
1587
|
def parse_extracted_keywords: (Array[extracted_keyword_hash]? keywords_data) -> Array[ExtractedKeyword]?
|
|
1523
1588
|
def parse_processing_warnings: (Array[processing_warning_hash]? warnings_data) -> Array[ProcessingWarning]
|
|
1589
|
+
def parse_uris: (Array[uri_hash]? uris_data) -> Array[uri_hash]?
|
|
1590
|
+
def build_uri: (Hash[String, untyped] u_hash) -> uri_hash
|
|
1591
|
+
def parse_children: (Array[untyped]? children_data) -> Array[archive_entry_hash]?
|
|
1592
|
+
def build_archive_entry: (Hash[String, untyped] c_hash) -> archive_entry_hash
|
|
1593
|
+
def parse_llm_usage: (Array[llm_usage_hash]? usage_data) -> Array[LlmUsage]?
|
|
1524
1594
|
def get_value: (Hash[String | Symbol, untyped] hash, String key, ?untyped default) -> untyped
|
|
1525
1595
|
def serialize_tables: () -> Array[table_hash]
|
|
1526
1596
|
def serialize_chunks: () -> Array[chunk_hash]?
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.8.
|
|
4
|
+
version: 4.8.5
|
|
5
5
|
platform: aarch64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|