kreuzberg 4.8.3-aarch64-linux → 4.8.5-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c40936ccc97a7f9a34ee0d3482f7ecc8286afdcc98b3e66843973c99f683cb5
4
- data.tar.gz: a11e4538d5c8e6a1f5c8e61a6d41a2abcc80975d6c3d2a60b8b3062b403f3c04
3
+ metadata.gz: 3a8ebc29c703cfb07252f68d86a5dd37f17eb06d79bccce78f82eada2d732e9c
4
+ data.tar.gz: e711b683c3bdfa37ea1b43bc9e50bc7f21a4e2fca9e4dee4e8c2a2c2664e1bc5
5
5
  SHA512:
6
- metadata.gz: b710074a287c9f485408958f9400566e9f590fb4bfec4a8caf2f0661553f13c87d0aa5c0ab7c0672ff22a529eb27bff329b5a0dfff3661689f9dd89e4f4410d2
7
- data.tar.gz: 93aea8b667bd844b5b5fff5950074b27f765784213cf45f958788016366d70a8c9c25a5721b37a1a9f02cff388f08a5f6b41a9866cf99328e70072156397af35
6
+ metadata.gz: a50806a6cee6edfcf55db3a62d748c293c7b01f20f2744540d520489ca6bbbfe89d65fb3d0132e5e3f723121b835fdc99ac3856559910f227f708e43931070d9
7
+ data.tar.gz: 26c2f577491b2be1f89e7f49832e308bdc7aabb7453b761a7a72692316a8fce84268d346a3adc545e1308a1b7f3b471d4b671b3f9f4a9ac1ef65dd07139ddb62
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.8.3" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.8.5" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -39,10 +39,13 @@
39
39
  <a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
40
40
  <img src="https://img.shields.io/badge/Docker-007ec6?logo=docker&logoColor=white" alt="Docker">
41
41
  </a>
42
+ <a href="https://artifacthub.io/packages/search?repo=kreuzberg">
43
+ <img src="https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/kreuzberg" alt="Artifact Hub">
44
+ </a>
42
45
 
43
46
  <!-- Project Info -->
44
47
  <a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
45
- <img src="https://img.shields.io/badge/License-Elastic--2.0-blue.svg" alt="License">
48
+ <img src="https://img.shields.io/badge/License-MIT-007ec6" alt="License">
46
49
  </a>
47
50
  <a href="https://docs.kreuzberg.dev">
48
51
  <img src="https://img.shields.io/badge/docs-kreuzberg.dev-007ec6" alt="Documentation">
@@ -419,7 +422,7 @@ Contributions are welcome! See [Contributing Guide](https://github.com/kreuzberg
419
422
 
420
423
  ## License
421
424
 
422
- Elastic License 2.0 (ELv2) - see [LICENSE](../../LICENSE) for details.
425
+ MIT License - see LICENSE file for details.
423
426
 
424
427
  ## Support
425
428
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.8.3'
4
+ VERSION = '4.8.5'
5
5
  end
data/lib/kreuzberg_rb.so CHANGED
Binary file
data/sig/kreuzberg.rbs CHANGED
@@ -18,7 +18,8 @@ module Kreuzberg
18
18
  type element_type = 'title' | 'narrative_text' | 'heading' | 'list_item' | 'table' | 'image' | 'page_break' | 'code_block' | 'block_quote' | 'footer' | 'header'
19
19
 
20
20
  # Bounding box coordinates for element positioning (T::Struct from types.rb)
21
- class BoundingBox attr_reader x0: Float
21
+ class BoundingBox
22
+ attr_reader x0: Float
22
23
  attr_reader y0: Float
23
24
  attr_reader x1: Float
24
25
  attr_reader y1: Float
@@ -28,7 +29,8 @@ module Kreuzberg
28
29
  end
29
30
 
30
31
  # Metadata for a semantic element (T::Struct from types.rb)
31
- class ElementMetadata attr_reader page_number: Integer?
32
+ class ElementMetadata
33
+ attr_reader page_number: Integer?
32
34
  attr_reader filename: String?
33
35
  attr_reader coordinates: BoundingBox?
34
36
  attr_reader element_index: Integer?
@@ -39,7 +41,8 @@ module Kreuzberg
39
41
  end
40
42
 
41
43
  # Semantic element extracted from document (T::Struct from types.rb)
42
- class Element attr_reader element_id: String
44
+ class Element
45
+ attr_reader element_id: String
43
46
  attr_reader element_type: String
44
47
  attr_reader text: String
45
48
  attr_reader metadata: ElementMetadata
@@ -49,7 +52,8 @@ module Kreuzberg
49
52
  end
50
53
 
51
54
  # Header/Heading metadata (T::Struct from types.rb)
52
- class HeaderMetadata attr_reader level: Integer
55
+ class HeaderMetadata
56
+ attr_reader level: Integer
53
57
  attr_reader text: String
54
58
  attr_reader id: String?
55
59
  attr_reader depth: Integer
@@ -60,7 +64,8 @@ module Kreuzberg
60
64
  end
61
65
 
62
66
  # Link metadata (T::Struct from types.rb)
63
- class LinkMetadata attr_reader href: String
67
+ class LinkMetadata
68
+ attr_reader href: String
64
69
  attr_reader text: String
65
70
  attr_reader title: String?
66
71
  attr_reader link_type: String
@@ -72,7 +77,8 @@ module Kreuzberg
72
77
  end
73
78
 
74
79
  # Image metadata (T::Struct from types.rb)
75
- class ImageMetadata attr_reader src: String
80
+ class ImageMetadata
81
+ attr_reader src: String
76
82
  attr_reader alt: String?
77
83
  attr_reader title: String?
78
84
  attr_reader dimensions: Array[Integer]?
@@ -84,7 +90,8 @@ module Kreuzberg
84
90
  end
85
91
 
86
92
  # Structured data metadata (T::Struct from types.rb)
87
- class StructuredData attr_reader data_type: String
93
+ class StructuredData
94
+ attr_reader data_type: String
88
95
  attr_reader raw_json: String
89
96
  attr_reader schema_type: String?
90
97
 
@@ -210,7 +217,8 @@ module Kreuzberg
210
217
  end
211
218
 
212
219
  # HTML metadata (T::Struct from types.rb)
213
- class HtmlMetadata attr_reader title: String?
220
+ class HtmlMetadata
221
+ attr_reader title: String?
214
222
  attr_reader description: String?
215
223
  attr_reader author: String?
216
224
  attr_reader copyright: String?
@@ -261,7 +269,8 @@ module Kreuzberg
261
269
  end
262
270
 
263
271
  # Extracted keyword with relevance metadata (T::Struct from types.rb)
264
- class ExtractedKeyword attr_reader text: String
272
+ class ExtractedKeyword
273
+ attr_reader text: String
265
274
  attr_reader score: Float
266
275
  attr_reader algorithm: String
267
276
  attr_reader positions: Array[Integer]?
@@ -271,15 +280,39 @@ module Kreuzberg
271
280
  end
272
281
 
273
282
  # Processing warning from a pipeline stage (T::Struct from types.rb)
274
- class ProcessingWarning attr_reader source: String
283
+ class ProcessingWarning
284
+ attr_reader source: String
275
285
  attr_reader message: String
276
286
 
277
287
  def initialize: (source: String, message: String) -> void
278
288
  def serialize: () -> Hash[Symbol, untyped]
279
289
  end
280
290
 
291
+ # LLM token usage from an LLM-assisted extraction step (T::Struct from types.rb)
292
+ class LlmUsage
293
+ attr_reader model: String
294
+ attr_reader source: String
295
+ attr_reader input_tokens: Integer?
296
+ attr_reader output_tokens: Integer?
297
+ attr_reader total_tokens: Integer?
298
+ attr_reader estimated_cost: Float?
299
+ attr_reader finish_reason: String?
300
+
301
+ def initialize: (
302
+ model: String,
303
+ source: String,
304
+ ?input_tokens: Integer?,
305
+ ?output_tokens: Integer?,
306
+ ?total_tokens: Integer?,
307
+ ?estimated_cost: Float?,
308
+ ?finish_reason: String?
309
+ ) -> void
310
+ def serialize: () -> Hash[Symbol, untyped]
311
+ end
312
+
281
313
  # Bounding box for document node positioning (T::Struct from types.rb)
282
- class DocumentBoundingBox attr_reader x0: Float
314
+ class DocumentBoundingBox
315
+ attr_reader x0: Float
283
316
  attr_reader y0: Float
284
317
  attr_reader x1: Float
285
318
  attr_reader y1: Float
@@ -289,7 +322,8 @@ module Kreuzberg
289
322
  end
290
323
 
291
324
  # Annotation for a document node (T::Struct from types.rb)
292
- class DocumentAnnotation attr_reader key: String
325
+ class DocumentAnnotation
326
+ attr_reader key: String
293
327
  attr_reader value: String
294
328
 
295
329
  def initialize: (key: String, value: String) -> void
@@ -297,7 +331,8 @@ module Kreuzberg
297
331
  end
298
332
 
299
333
  # Single node in the document structure tree (T::Struct from types.rb)
300
- class DocumentNode attr_reader id: String
334
+ class DocumentNode
335
+ attr_reader id: String
301
336
  attr_reader content: String
302
337
  attr_reader parent: Integer?
303
338
  attr_reader children: Array[Integer]
@@ -322,7 +357,8 @@ module Kreuzberg
322
357
  end
323
358
 
324
359
  # Structured document representation (T::Struct from types.rb)
325
- class DocumentStructure attr_reader nodes: Array[DocumentNode]
360
+ class DocumentStructure
361
+ attr_reader nodes: Array[DocumentNode]
326
362
 
327
363
  def initialize: (nodes: Array[DocumentNode]) -> void
328
364
  def serialize: () -> Hash[Symbol, untyped]
@@ -927,7 +963,10 @@ module Kreuzberg
927
963
  extracted_keywords: Array[extracted_keyword_hash]?,
928
964
  quality_score: Float?,
929
965
  processing_warnings: Array[processing_warning_hash]?,
930
- annotations: Array[pdf_annotation_hash]?
966
+ annotations: Array[pdf_annotation_hash]?,
967
+ uris: Array[uri_hash]?,
968
+ children: Array[archive_entry_hash]?,
969
+ llm_usage: Array[llm_usage_hash]?
931
970
  }
932
971
 
933
972
  type extracted_keyword_hash = {
@@ -942,6 +981,29 @@ module Kreuzberg
942
981
  message: String
943
982
  }
944
983
 
984
+ type llm_usage_hash = {
985
+ model: String,
986
+ source: String,
987
+ input_tokens: Integer?,
988
+ output_tokens: Integer?,
989
+ total_tokens: Integer?,
990
+ estimated_cost: Float?,
991
+ finish_reason: String?
992
+ }
993
+
994
+ type uri_hash = {
995
+ url: String,
996
+ label: String?,
997
+ page: Integer?,
998
+ kind: String
999
+ }
1000
+
1001
+ type archive_entry_hash = {
1002
+ path: String,
1003
+ mime_type: String,
1004
+ result: extraction_result_hash?
1005
+ }
1006
+
945
1007
  type page_content_hash = {
946
1008
  page_number: Integer,
947
1009
  content: String,
@@ -1483,6 +1545,9 @@ module Kreuzberg
1483
1545
  attr_reader quality_score: Float?
1484
1546
  attr_reader processing_warnings: Array[ProcessingWarning]?
1485
1547
  attr_reader annotations: Array[PdfAnnotation]?
1548
+ attr_reader uris: Array[uri_hash]?
1549
+ attr_reader children: Array[archive_entry_hash]?
1550
+ attr_reader llm_usage: Array[LlmUsage]?
1486
1551
 
1487
1552
  # PDF annotation extracted from a document page (Struct from result.rb)
1488
1553
  class PdfAnnotation
@@ -1521,6 +1586,11 @@ module Kreuzberg
1521
1586
  def parse_document_structure: (Hash[String, untyped]? document_data) -> DocumentStructure?
1522
1587
  def parse_extracted_keywords: (Array[extracted_keyword_hash]? keywords_data) -> Array[ExtractedKeyword]?
1523
1588
  def parse_processing_warnings: (Array[processing_warning_hash]? warnings_data) -> Array[ProcessingWarning]
1589
+ def parse_uris: (Array[uri_hash]? uris_data) -> Array[uri_hash]?
1590
+ def build_uri: (Hash[String, untyped] u_hash) -> uri_hash
1591
+ def parse_children: (Array[untyped]? children_data) -> Array[archive_entry_hash]?
1592
+ def build_archive_entry: (Hash[String, untyped] c_hash) -> archive_entry_hash
1593
+ def parse_llm_usage: (Array[llm_usage_hash]? usage_data) -> Array[LlmUsage]?
1524
1594
  def get_value: (Hash[String | Symbol, untyped] hash, String key, ?untyped default) -> untyped
1525
1595
  def serialize_tables: () -> Array[table_hash]
1526
1596
  def serialize_chunks: () -> Array[chunk_hash]?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.8.3
4
+ version: 4.8.5
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-04-12 00:00:00.000000000 Z
11
+ date: 2026-04-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler