kreuzberg 4.5.2 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -4
  3. data/ext/kreuzberg_rb/native/Cargo.lock +125 -510
  4. data/ext/kreuzberg_rb/native/Cargo.toml +3 -3
  5. data/ext/kreuzberg_rb/native/src/config/types.rs +23 -9
  6. data/ext/kreuzberg_rb/native/src/plugins/mod.rs +0 -2
  7. data/ext/kreuzberg_rb/native/src/plugins/ocr_backend.rs +2 -1
  8. data/ext/kreuzberg_rb/native/src/plugins/post_processor.rs +0 -1
  9. data/ext/kreuzberg_rb/native/src/plugins/validator.rs +0 -1
  10. data/ext/kreuzberg_rb/native/src/result.rs +85 -2
  11. data/lib/kreuzberg/config.rb +5 -3
  12. data/lib/kreuzberg/version.rb +1 -1
  13. data/sig/kreuzberg.rbs +2 -1
  14. data/vendor/Cargo.toml +8 -7
  15. data/vendor/kreuzberg/Cargo.toml +147 -116
  16. data/vendor/kreuzberg/README.md +1 -1
  17. data/vendor/kreuzberg/src/cache/core.rs +47 -42
  18. data/vendor/kreuzberg/src/cache/mod.rs +13 -13
  19. data/vendor/kreuzberg/src/chunking/core.rs +56 -1
  20. data/vendor/kreuzberg/src/chunking/mod.rs +3 -3
  21. data/vendor/kreuzberg/src/chunking/processor.rs +209 -1
  22. data/vendor/kreuzberg/src/chunking/yaml_section.rs +604 -0
  23. data/vendor/kreuzberg/src/core/batch_optimizations.rs +1 -1
  24. data/vendor/kreuzberg/src/core/config/concurrency.rs +79 -0
  25. data/vendor/kreuzberg/src/core/config/extraction/core.rs +32 -0
  26. data/vendor/kreuzberg/src/core/config/layout.rs +12 -0
  27. data/vendor/kreuzberg/src/core/config/processing.rs +28 -5
  28. data/vendor/kreuzberg/src/core/extractor/batch.rs +3 -7
  29. data/vendor/kreuzberg/src/core/extractor/file.rs +17 -0
  30. data/vendor/kreuzberg/src/core/extractor/helpers.rs +2 -3
  31. data/vendor/kreuzberg/src/core/extractor/legacy.rs +1 -0
  32. data/vendor/kreuzberg/src/core/io.rs +87 -0
  33. data/vendor/kreuzberg/src/core/mime.rs +42 -1
  34. data/vendor/kreuzberg/src/core/pipeline/cache.rs +4 -8
  35. data/vendor/kreuzberg/src/core/pipeline/execution.rs +3 -5
  36. data/vendor/kreuzberg/src/core/pipeline/features.rs +38 -70
  37. data/vendor/kreuzberg/src/core/pipeline/format.rs +23 -13
  38. data/vendor/kreuzberg/src/core/pipeline/initialization.rs +9 -9
  39. data/vendor/kreuzberg/src/core/pipeline/tests.rs +47 -20
  40. data/vendor/kreuzberg/src/doc_orientation.rs +1 -1
  41. data/vendor/kreuzberg/src/error.rs +1 -1
  42. data/vendor/kreuzberg/src/extraction/archive/gzip.rs +54 -0
  43. data/vendor/kreuzberg/src/extraction/archive/mod.rs +6 -4
  44. data/vendor/kreuzberg/src/extraction/archive/sevenz.rs +58 -0
  45. data/vendor/kreuzberg/src/extraction/archive/tar.rs +64 -0
  46. data/vendor/kreuzberg/src/extraction/archive/zip.rs +56 -0
  47. data/vendor/kreuzberg/src/extraction/docx/parser.rs +83 -35
  48. data/vendor/kreuzberg/src/extraction/email.rs +592 -2
  49. data/vendor/kreuzberg/src/extraction/html/converter.rs +12 -0
  50. data/vendor/kreuzberg/src/extraction/html/mod.rs +1 -0
  51. data/vendor/kreuzberg/src/extraction/html/structure.rs +1415 -0
  52. data/vendor/kreuzberg/src/extraction/hwp/error.rs +54 -0
  53. data/vendor/kreuzberg/src/extraction/hwp/mod.rs +72 -0
  54. data/vendor/kreuzberg/src/extraction/hwp/model.rs +109 -0
  55. data/vendor/kreuzberg/src/extraction/hwp/parser.rs +168 -0
  56. data/vendor/kreuzberg/src/extraction/hwp/reader.rs +125 -0
  57. data/vendor/kreuzberg/src/extraction/image_ocr.rs +40 -13
  58. data/vendor/kreuzberg/src/extraction/mod.rs +3 -0
  59. data/vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs +12 -33
  60. data/vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs +4 -11
  61. data/vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs +4 -11
  62. data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +30 -0
  63. data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +4 -11
  64. data/vendor/kreuzberg/src/extraction/ppt/mod.rs +43 -5
  65. data/vendor/kreuzberg/src/extraction/pptx/elements.rs +12 -0
  66. data/vendor/kreuzberg/src/extraction/pptx/mod.rs +239 -17
  67. data/vendor/kreuzberg/src/extraction/pptx/parser.rs +40 -1
  68. data/vendor/kreuzberg/src/extraction/structured.rs +106 -74
  69. data/vendor/kreuzberg/src/extraction/transform/document_tree.rs +5 -0
  70. data/vendor/kreuzberg/src/extraction/transform/mod.rs +4 -0
  71. data/vendor/kreuzberg/src/extraction/xml.rs +88 -90
  72. data/vendor/kreuzberg/src/extractors/archive.rs +124 -23
  73. data/vendor/kreuzberg/src/extractors/bibtex.rs +151 -12
  74. data/vendor/kreuzberg/src/extractors/citation.rs +24 -3
  75. data/vendor/kreuzberg/src/extractors/csv.rs +261 -33
  76. data/vendor/kreuzberg/src/extractors/dbf.rs +116 -18
  77. data/vendor/kreuzberg/src/extractors/djot_format/conversion.rs +2 -0
  78. data/vendor/kreuzberg/src/extractors/djot_format/extractor.rs +335 -4
  79. data/vendor/kreuzberg/src/extractors/djot_format/parsing/event_handlers.rs +1 -2
  80. data/vendor/kreuzberg/src/extractors/djot_format/parsing/state.rs +1 -1
  81. data/vendor/kreuzberg/src/extractors/doc.rs +89 -2
  82. data/vendor/kreuzberg/src/extractors/docbook.rs +670 -21
  83. data/vendor/kreuzberg/src/extractors/docx.rs +239 -173
  84. data/vendor/kreuzberg/src/extractors/email.rs +107 -13
  85. data/vendor/kreuzberg/src/extractors/epub/content.rs +71 -8
  86. data/vendor/kreuzberg/src/extractors/epub/metadata.rs +73 -0
  87. data/vendor/kreuzberg/src/extractors/epub/mod.rs +166 -8
  88. data/vendor/kreuzberg/src/extractors/excel.rs +43 -3
  89. data/vendor/kreuzberg/src/extractors/fictionbook.rs +757 -50
  90. data/vendor/kreuzberg/src/extractors/html.rs +9 -1
  91. data/vendor/kreuzberg/src/extractors/hwp.rs +21 -7
  92. data/vendor/kreuzberg/src/extractors/image.rs +43 -10
  93. data/vendor/kreuzberg/src/extractors/iwork/keynote.rs +232 -0
  94. data/vendor/kreuzberg/src/extractors/iwork/mod.rs +295 -0
  95. data/vendor/kreuzberg/src/extractors/iwork/numbers.rs +203 -0
  96. data/vendor/kreuzberg/src/extractors/iwork/pages.rs +208 -0
  97. data/vendor/kreuzberg/src/extractors/jats/elements.rs +69 -0
  98. data/vendor/kreuzberg/src/extractors/jats/metadata.rs +8 -0
  99. data/vendor/kreuzberg/src/extractors/jats/mod.rs +479 -6
  100. data/vendor/kreuzberg/src/extractors/jats/parser.rs +2 -1
  101. data/vendor/kreuzberg/src/extractors/jupyter.rs +325 -20
  102. data/vendor/kreuzberg/src/extractors/latex/mod.rs +676 -3
  103. data/vendor/kreuzberg/src/extractors/markdown.rs +425 -182
  104. data/vendor/kreuzberg/src/extractors/markdown_utils.rs +169 -0
  105. data/vendor/kreuzberg/src/extractors/mdx.rs +402 -110
  106. data/vendor/kreuzberg/src/extractors/mod.rs +36 -26
  107. data/vendor/kreuzberg/src/extractors/odt.rs +620 -20
  108. data/vendor/kreuzberg/src/extractors/opml/core.rs +10 -3
  109. data/vendor/kreuzberg/src/extractors/opml/parser.rs +186 -1
  110. data/vendor/kreuzberg/src/extractors/orgmode.rs +507 -6
  111. data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +4 -0
  112. data/vendor/kreuzberg/src/extractors/pdf/mod.rs +58 -46
  113. data/vendor/kreuzberg/src/extractors/pdf/ocr.rs +232 -76
  114. data/vendor/kreuzberg/src/extractors/ppt.rs +110 -2
  115. data/vendor/kreuzberg/src/extractors/pptx.rs +21 -5
  116. data/vendor/kreuzberg/src/extractors/rst.rs +516 -21
  117. data/vendor/kreuzberg/src/extractors/rtf/mod.rs +126 -5
  118. data/vendor/kreuzberg/src/extractors/rtf/parser.rs +563 -0
  119. data/vendor/kreuzberg/src/extractors/structured.rs +105 -3
  120. data/vendor/kreuzberg/src/extractors/text.rs +19 -153
  121. data/vendor/kreuzberg/src/extractors/typst.rs +431 -3
  122. data/vendor/kreuzberg/src/extractors/xml.rs +122 -5
  123. data/vendor/kreuzberg/src/keywords/mod.rs +1 -3
  124. data/vendor/kreuzberg/src/keywords/processor.rs +7 -0
  125. data/vendor/kreuzberg/src/language_detection/mod.rs +2 -4
  126. data/vendor/kreuzberg/src/language_detection/processor.rs +5 -0
  127. data/vendor/kreuzberg/src/layout/mod.rs +133 -0
  128. data/vendor/kreuzberg/src/layout/model_manager.rs +66 -3
  129. data/vendor/kreuzberg/src/layout/models/mod.rs +2 -0
  130. data/vendor/kreuzberg/src/layout/models/rtdetr.rs +2 -1
  131. data/vendor/kreuzberg/src/layout/models/slanet.rs +550 -0
  132. data/vendor/kreuzberg/src/layout/models/table_classifier.rs +219 -0
  133. data/vendor/kreuzberg/src/layout/models/tatr.rs +5 -5
  134. data/vendor/kreuzberg/src/layout/models/yolo.rs +2 -1
  135. data/vendor/kreuzberg/src/layout/session.rs +10 -4
  136. data/vendor/kreuzberg/src/lib.rs +1 -0
  137. data/vendor/kreuzberg/src/mcp/format.rs +4 -0
  138. data/vendor/kreuzberg/src/ocr/language_registry.rs +3 -3
  139. data/vendor/kreuzberg/src/ocr/processor/execution.rs +6 -5
  140. data/vendor/kreuzberg/src/ocr/processor/mod.rs +20 -16
  141. data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +9 -4
  142. data/vendor/kreuzberg/src/ocr/validation.rs +126 -127
  143. data/vendor/kreuzberg/src/paddle_ocr/backend.rs +4 -3
  144. data/vendor/kreuzberg/src/paddle_ocr/model_manager.rs +21 -44
  145. data/vendor/kreuzberg/src/pdf/hierarchy/clustering.rs +35 -8
  146. data/vendor/kreuzberg/src/pdf/images.rs +287 -12
  147. data/vendor/kreuzberg/src/pdf/layout_runner.rs +20 -15
  148. data/vendor/kreuzberg/src/pdf/markdown/bridge.rs +10 -3
  149. data/vendor/kreuzberg/src/pdf/markdown/classify.rs +6 -7
  150. data/vendor/kreuzberg/src/pdf/markdown/paragraphs.rs +4 -0
  151. data/vendor/kreuzberg/src/pdf/markdown/pipeline.rs +254 -81
  152. data/vendor/kreuzberg/src/pdf/markdown/regions/heading.rs +3 -1
  153. data/vendor/kreuzberg/src/pdf/markdown/regions/mod.rs +2 -0
  154. data/vendor/kreuzberg/src/pdf/markdown/regions/table_recognition.rs +334 -1
  155. data/vendor/kreuzberg/src/pdf/rendering.rs +9 -0
  156. data/vendor/kreuzberg/src/plugins/extractor/mod.rs +2 -0
  157. data/vendor/kreuzberg/src/plugins/extractor/registry.rs +8 -12
  158. data/vendor/kreuzberg/src/plugins/extractor/trait.rs +4 -1
  159. data/vendor/kreuzberg/src/plugins/ocr.rs +36 -23
  160. data/vendor/kreuzberg/src/plugins/processor/mod.rs +8 -0
  161. data/vendor/kreuzberg/src/plugins/processor/registry.rs +1 -3
  162. data/vendor/kreuzberg/src/plugins/registry/extractor.rs +2 -0
  163. data/vendor/kreuzberg/src/plugins/registry/mod.rs +6 -17
  164. data/vendor/kreuzberg/src/plugins/registry/ocr.rs +2 -0
  165. data/vendor/kreuzberg/src/plugins/startup_validation.rs +4 -4
  166. data/vendor/kreuzberg/src/plugins/validator/mod.rs +11 -0
  167. data/vendor/kreuzberg/src/plugins/validator/registry.rs +4 -12
  168. data/vendor/kreuzberg/src/rendering/markdown.rs +620 -0
  169. data/vendor/kreuzberg/src/rendering/mod.rs +11 -0
  170. data/vendor/kreuzberg/src/rendering/plain.rs +288 -0
  171. data/vendor/kreuzberg/src/text/mod.rs +1 -1
  172. data/vendor/kreuzberg/src/text/quality_processor.rs +5 -0
  173. data/vendor/kreuzberg/src/text/string_utils.rs +6 -229
  174. data/vendor/kreuzberg/src/text/token_reduction/filters/markdown.rs +2 -3
  175. data/vendor/kreuzberg/src/text/token_reduction/filters.rs +2 -3
  176. data/vendor/kreuzberg/src/types/builder.rs +958 -0
  177. data/vendor/kreuzberg/src/types/document_structure.rs +295 -5
  178. data/vendor/kreuzberg/src/types/extraction.rs +29 -3
  179. data/vendor/kreuzberg/src/types/formats.rs +4 -0
  180. data/vendor/kreuzberg/src/types/mod.rs +1 -0
  181. data/vendor/kreuzberg/src/utils/mod.rs +23 -0
  182. data/vendor/kreuzberg/src/utils/quality/mod.rs +4 -3
  183. data/vendor/kreuzberg/src/utils/xml_utils.rs +7 -0
  184. data/vendor/kreuzberg/tests/concurrency_stress.rs +3 -3
  185. data/vendor/kreuzberg/tests/csv_embedding_quality.rs +142 -0
  186. data/vendor/kreuzberg/tests/document_structure_tests.rs +3911 -0
  187. data/vendor/kreuzberg/tests/epub_markdown_headings_tests.rs +177 -0
  188. data/vendor/kreuzberg/tests/instrumentation_test.rs +2 -2
  189. data/vendor/kreuzberg/tests/iwork_integration.rs +220 -0
  190. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +0 -2
  191. data/vendor/kreuzberg/tests/ocr_stress.rs +9 -9
  192. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +1 -1
  193. data/vendor/kreuzberg/tests/paddle_ocr_integration.rs +2 -2
  194. data/vendor/kreuzberg/tests/pdf_integration.rs +25 -1
  195. data/vendor/kreuzberg/tests/pipeline_integration.rs +25 -75
  196. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +313 -35
  197. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +28 -30
  198. data/vendor/kreuzberg/tests/plugin_validator_test.rs +46 -46
  199. data/vendor/kreuzberg/tests/xml_embedding_quality.rs +137 -0
  200. data/vendor/kreuzberg-ffi/Cargo.toml +14 -14
  201. data/vendor/kreuzberg-ffi/kreuzberg.h +3 -3
  202. data/vendor/kreuzberg-ffi/src/batch_streaming.rs +15 -1
  203. data/vendor/kreuzberg-ffi/src/helpers.rs +5 -0
  204. data/vendor/kreuzberg-ffi/src/plugins/document_extractor.rs +4 -32
  205. data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +3 -16
  206. data/vendor/kreuzberg-ffi/src/plugins/post_processor.rs +5 -40
  207. data/vendor/kreuzberg-ffi/src/plugins/validator.rs +4 -32
  208. data/vendor/kreuzberg-ffi/src/result.rs +1 -0
  209. data/vendor/kreuzberg-ffi/src/result_pool.rs +15 -1
  210. data/vendor/kreuzberg-ffi/src/result_view.rs +2 -0
  211. data/vendor/kreuzberg-paddle-ocr/Cargo.toml +14 -14
  212. data/vendor/kreuzberg-paddle-ocr/src/angle_net.rs +9 -4
  213. data/vendor/kreuzberg-paddle-ocr/src/constants.rs +33 -0
  214. data/vendor/kreuzberg-paddle-ocr/src/crnn_net.rs +13 -8
  215. data/vendor/kreuzberg-paddle-ocr/src/db_net.rs +3 -8
  216. data/vendor/kreuzberg-paddle-ocr/src/lib.rs +1 -0
  217. data/vendor/kreuzberg-paddle-ocr/src/ocr_lite.rs +9 -27
  218. data/vendor/kreuzberg-pdfium-render/Cargo.toml +17 -17
  219. data/vendor/kreuzberg-pdfium-render/src/lib.rs +4 -5
  220. data/vendor/kreuzberg-pdfium-render/src/pdf/document/form.rs +5 -70
  221. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotation/private.rs +2 -124
  222. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotation.rs +2 -170
  223. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotations.rs +0 -12
  224. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/render_config.rs +1 -84
  225. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page.rs +0 -1
  226. data/vendor/kreuzberg-pdfium-render/src/pdf.rs +0 -1
  227. data/vendor/kreuzberg-tesseract/Cargo.toml +27 -27
  228. data/vendor/kreuzberg-tesseract/src/api.rs +16 -10
  229. data/vendor/kreuzberg-tesseract/src/lib.rs +1 -1
  230. data/vendor/kreuzberg-tesseract/tests/integration_test.rs +7 -7
  231. metadata +27 -35
  232. data/.gitignore +0 -14
  233. data/.rspec +0 -3
  234. data/.rubocop.yaml +0 -1
  235. data/.rubocop.yml +0 -543
  236. data/Gemfile +0 -8
  237. data/Gemfile.lock +0 -274
  238. data/Rakefile +0 -34
  239. data/Steepfile +0 -51
  240. data/examples/async_patterns.rb +0 -282
  241. data/extconf.rb +0 -60
  242. data/kreuzberg.gemspec +0 -253
  243. data/spec/fixtures/config.toml +0 -38
  244. data/spec/fixtures/config.yaml +0 -41
  245. data/spec/fixtures/invalid_config.toml +0 -3
  246. data/test/metadata_types_test.rb +0 -959
  247. data/vendor/kreuzberg-pdfium-render/src/pdf/appearance_mode.rs +0 -39
  248. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotation/popup.rs +0 -64
  249. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotation/redacted.rs +0 -64
  250. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotation/variable_text.rs +0 -118
  251. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotation/widget.rs +0 -86
  252. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotation/xfa_widget.rs +0 -86
  253. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/button.rs +0 -64
  254. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/checkbox.rs +0 -142
  255. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/combo.rs +0 -129
  256. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/list.rs +0 -111
  257. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/option.rs +0 -36
  258. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/options.rs +0 -156
  259. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/private.rs +0 -518
  260. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/radio.rs +0 -140
  261. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/signature.rs +0 -63
  262. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/text.rs +0 -151
  263. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/unknown.rs +0 -62
  264. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field.rs +0 -436
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f29e7c9b7614fc78e0c54f673a804f79625081faa79317da54647937fe51a46
4
- data.tar.gz: b465d7be3c677c7a7a87eb888503f57b7cf42e5bac353418a191cc2629ad3d5c
3
+ metadata.gz: 203e9719bcf3cf2cda1252dcd7a5c5782e7b73936a304b626a351894d4fcd909
4
+ data.tar.gz: 2c02a45c882ef6b6b6935896e9334c46f012aaf8bc6f6669fa3c0110b67398e5
5
5
  SHA512:
6
- metadata.gz: fc25d857d8252f4759ed2ea07003107843182c87d855872da228f599371cdb9f705d2883995bc17ac6dd2fadf12d6aa2023eb1abf6f69f5e2844b1a90473cb02
7
- data.tar.gz: 5fe146eebe572f4a6b5ac89d9e187b97eb72787493b4748ba66c968014cbc7757b0ee6bec64516968ff7419a0c6c4c5b57e0b88240a61cde454ac72fa0fed9e7
6
+ metadata.gz: d3dde81c8c38b1ee99bed3cae32e477e4c8941d401c6449fc9c3eec3608a5b771b47c20ab3a9679ccf75059fed5e6c09f9d91eefed83a7d9dc59eebf7acb5626
7
+ data.tar.gz: e590247800d9752175985ee3b8ad0c89c5926f1afa0669a881cf476455c8514332880d30ff35d46a2836cf9cdc18b752296fb06a545f42129b548b5675180a71
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.5.2" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.6.0" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -61,7 +61,7 @@
61
61
  </div>
62
62
 
63
63
 
64
- Extract text, tables, images, and metadata from 88+ file formats including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance.
64
+ Extract text, tables, images, and metadata from 91+ file formats including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance.
65
65
 
66
66
 
67
67
  ## Installation
@@ -211,9 +211,9 @@ puts "Processing time: #{result.metadata&.dig('processing_time')}ms"
211
211
 
212
212
  ## Features
213
213
 
214
- ### Supported File Formats (88+)
214
+ ### Supported File Formats (91+)
215
215
 
216
- 88+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
216
+ 91+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
217
217
 
218
218
  #### Office Documents
219
219