kreuzberg 4.2.1 → 4.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +9 -9
  3. data/README.md +1 -1
  4. data/lib/kreuzberg/api_proxy.rb +3 -3
  5. data/lib/kreuzberg/cli_proxy.rb +2 -2
  6. data/lib/kreuzberg/config.rb +4 -20
  7. data/lib/kreuzberg/mcp_proxy.rb +3 -3
  8. data/lib/kreuzberg/version.rb +1 -1
  9. data/spec/binding/config_spec.rb +1 -1
  10. data/spec/unit/config/extraction_config_spec.rb +2 -2
  11. data/vendor/Cargo.toml +1 -1
  12. data/vendor/kreuzberg/Cargo.toml +3 -2
  13. data/vendor/kreuzberg/README.md +1 -1
  14. data/vendor/kreuzberg/src/api/error.rs +89 -0
  15. data/vendor/kreuzberg/src/api/handlers.rs +153 -32
  16. data/vendor/kreuzberg/src/api/mod.rs +2 -0
  17. data/vendor/kreuzberg/src/api/openapi.rs +141 -0
  18. data/vendor/kreuzberg/src/api/router.rs +24 -2
  19. data/vendor/kreuzberg/src/api/startup.rs +11 -5
  20. data/vendor/kreuzberg/src/api/types.rs +50 -4
  21. data/vendor/kreuzberg/src/core/config/processing.rs +8 -1
  22. data/vendor/kreuzberg/src/extraction/excel.rs +246 -9
  23. data/vendor/kreuzberg/src/mcp/format.rs +46 -57
  24. data/vendor/kreuzberg/src/mcp/server.rs +2 -8
  25. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +1 -7
  26. data/vendor/kreuzberg/tests/api_chunk.rs +25 -0
  27. data/vendor/kreuzberg/tests/api_embed.rs +60 -0
  28. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +56 -0
  29. data/vendor/kreuzberg-ffi/kreuzberg.h +7 -2
  30. data/vendor/kreuzberg-ffi/src/helpers.rs +13 -1
  31. data/vendor/kreuzberg-ffi/src/lib.rs +8 -5
  32. data/vendor/kreuzberg-ffi/src/memory.rs +35 -1
  33. data/vendor/kreuzberg-ffi/src/types.rs +8 -5
  34. data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
  35. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '010412940492f83bc170a4a4efc644ac7e3818502734035523796287837a3893'
4
- data.tar.gz: af24cba007cc58283d678d4b15251ebae3a5740f60ade9a54cc19973a2022a82
3
+ metadata.gz: 2c6fc44b151014f7e56c82bd191f55244a4294a259b24b95fc494dba6f8eaba6
4
+ data.tar.gz: 6e40a732814ff3e2a164e718cdb1c7a6ae838b2b2210a66b232f8675c7f79a80
5
5
  SHA512:
6
- metadata.gz: ad67348bec54a01ca3592ed72e9b2b8bc9e711a37e11b40ada31466c67f834132fc0de278c53a1c014fa6751da7abebae934cff2a9cc1835f7e056c895a273cb
7
- data.tar.gz: ca2cdb076a5d1af67f0e807978a966d1a391cc286bcdf5499544e3403196140918a54674beab77ea09fc0e8bc7ab66f357da5d984326a511b1d21643a3d6cf41
6
+ metadata.gz: f9c3a45f31c3ad9e3857872d8705b397b40c4317844ef421f4da4c2918e57411f5a626df4f6706d7db4916f33b8644c736e7b41508b398fd0197f1a87170fa3c
7
+ data.tar.gz: 8b05a75be261dbe583c4873d9d21079efff97d6c9c0340bbd8a73a43c9d15955431f4de20cd8b4a8b7956872f52e4467c253f5da03177a1e7d3b6a10d202b59d
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.2.1)
4
+ kreuzberg (4.2.3)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -61,7 +61,7 @@ GEM
61
61
  parser (3.3.10.1)
62
62
  ast (~> 2.4.1)
63
63
  racc
64
- prism (1.8.0)
64
+ prism (1.9.0)
65
65
  pry (0.16.0)
66
66
  coderay (~> 1.1)
67
67
  method_source (~> 1.0)
@@ -98,7 +98,7 @@ GEM
98
98
  diff-lcs (>= 1.2.0, < 2.0)
99
99
  rspec-support (~> 3.13.0)
100
100
  rspec-support (3.13.6)
101
- rubocop (1.82.1)
101
+ rubocop (1.84.0)
102
102
  json (~> 2.3)
103
103
  language_server-protocol (~> 3.17.0.2)
104
104
  lint_roller (~> 1.1.0)
@@ -106,7 +106,7 @@ GEM
106
106
  parser (>= 3.3.0.2)
107
107
  rainbow (>= 2.2.2, < 4.0)
108
108
  regexp_parser (>= 2.9.3, < 3.0)
109
- rubocop-ast (>= 1.48.0, < 2.0)
109
+ rubocop-ast (>= 1.49.0, < 2.0)
110
110
  ruby-progressbar (~> 1.7)
111
111
  unicode-display_width (>= 2.4.0, < 4.0)
112
112
  rubocop-ast (1.49.0)
@@ -121,7 +121,7 @@ GEM
121
121
  rubocop (~> 1.81)
122
122
  ruby-progressbar (1.13.0)
123
123
  securerandom (0.4.1)
124
- sorbet-runtime (0.6.12897)
124
+ sorbet-runtime (0.6.12903)
125
125
  steep (1.10.0)
126
126
  activesupport (>= 5.1)
127
127
  concurrent-ruby (>= 1.1.10)
@@ -207,7 +207,7 @@ CHECKSUMS
207
207
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
208
208
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
209
209
  json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
210
- kreuzberg (4.2.1)
210
+ kreuzberg (4.2.3)
211
211
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
212
212
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
213
213
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
@@ -217,7 +217,7 @@ CHECKSUMS
217
217
  mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
218
218
  parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
219
219
  parser (3.3.10.1) sha256=06f6a725d2cd91e5e7f2b7c32ba143631e1f7c8ae2fb918fc4cebec187e6a688
220
- prism (1.8.0) sha256=84453a16ef5530ea62c5f03ec16b52a459575ad4e7b9c2b360fd8ce2c39c1254
220
+ prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
221
221
  pry (0.16.0) sha256=d76c69065698ed1f85e717bd33d7942c38a50868f6b0673c636192b3d1b6054e
222
222
  pry-byebug (3.12.0) sha256=594e094ae8a8390a7ad4c7b36ae36e13304ed02664c67417d108dc5f7213d1b7
223
223
  racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
@@ -236,13 +236,13 @@ CHECKSUMS
236
236
  rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
237
237
  rspec-mocks (3.13.7) sha256=0979034e64b1d7a838aaaddf12bf065ea4dc40ef3d4c39f01f93ae2c66c62b1c
238
238
  rspec-support (3.13.6) sha256=2e8de3702427eab064c9352fe74488cc12a1bfae887ad8b91cba480ec9f8afb2
239
- rubocop (1.82.1) sha256=09f1a6a654a960eda767aebea33e47603080f8e9c9a3f019bf9b94c9cab5e273
239
+ rubocop (1.84.0) sha256=88dec310153bb685a879f5a7cdb601f6287b8f0ee675d9dc63a17c7204c4190a
240
240
  rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
241
241
  rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
242
242
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
243
243
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
244
244
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
245
- sorbet-runtime (0.6.12897) sha256=0348ab8803c4c3646977fee298083ded9b7e74d5b34b50c567c63eb7e36eb286
245
+ sorbet-runtime (0.6.12903) sha256=c23968c0dcf5a5db57f32c003fe3db7fb588c168cdd57d92ea4dceaba063118a
246
246
  steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
247
247
  strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
248
248
  terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.1" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.3" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -6,9 +6,9 @@ module Kreuzberg
6
6
  # @example Start the server
7
7
  # @example With block
8
8
  module APIProxy
9
- Error = Class.new(Kreuzberg::Errors::Error)
10
- MissingBinaryError = Class.new(Error)
11
- ServerError = Class.new(Error)
9
+ class Error < Kreuzberg::Errors::Error; end
10
+ class MissingBinaryError < Error; end
11
+ class ServerError < Error; end
12
12
 
13
13
  # API server instance
14
14
  class Server
@@ -5,8 +5,8 @@ require 'open3'
5
5
  module Kreuzberg
6
6
  # @example
7
7
  module CLIProxy
8
- Error = Class.new(Kreuzberg::Errors::Error)
9
- MissingBinaryError = Class.new(Error)
8
+ class Error < Kreuzberg::Errors::Error; end
9
+ class MissingBinaryError < Error; end
10
10
 
11
11
  # CLI execution error with stderr and exit status
12
12
  class CLIExecutionError < Error
@@ -684,13 +684,6 @@ module Kreuzberg
684
684
  # image = Config::ImageExtraction.new(extract_images: true, target_dpi: 600)
685
685
  # config = Extraction.new(image_extraction: image)
686
686
  #
687
- # @example With preprocessing
688
- # preprocessing = Config::ImagePreprocessing.new(
689
- # binarization_method: "sauvola",
690
- # denoise: true
691
- # )
692
- # config = Extraction.new(image_preprocessing: preprocessing)
693
- #
694
687
  # @example With post-processing
695
688
  # postprocessor = Config::PostProcessor.new(
696
689
  # enabled: true,
@@ -708,14 +701,13 @@ module Kreuzberg
708
701
  # language_detection: Config::LanguageDetection.new(enabled: true),
709
702
  # pdf_options: Config::PDF.new(extract_images: true, passwords: ["secret"]),
710
703
  # image_extraction: Config::ImageExtraction.new(target_dpi: 600),
711
- # image_preprocessing: Config::ImagePreprocessing.new(denoise: true),
712
704
  # postprocessor: Config::PostProcessor.new(enabled: true)
713
705
  # )
714
706
  #
715
707
  class Extraction
716
708
  attr_reader :use_cache, :enable_quality_processing, :force_ocr,
717
709
  :ocr, :chunking, :language_detection, :pdf_options,
718
- :images, :image_preprocessing, :postprocessor,
710
+ :images, :postprocessor,
719
711
  :token_reduction, :keywords, :html_options, :pages,
720
712
  :max_concurrent_extractions, :output_format, :result_format
721
713
 
@@ -739,7 +731,7 @@ module Kreuzberg
739
731
  # Keys that are allowed in the Extraction config
740
732
  ALLOWED_KEYS = %i[
741
733
  use_cache enable_quality_processing force_ocr ocr chunking
742
- language_detection pdf_options image_extraction image_preprocessing
734
+ language_detection pdf_options image_extraction
743
735
  postprocessor token_reduction keywords html_options pages
744
736
  max_concurrent_extractions output_format result_format
745
737
  ].freeze
@@ -800,14 +792,13 @@ module Kreuzberg
800
792
 
801
793
  def initialize(hash = nil,
802
794
  use_cache: true,
803
- enable_quality_processing: false,
795
+ enable_quality_processing: true,
804
796
  force_ocr: false,
805
797
  ocr: nil,
806
798
  chunking: nil,
807
799
  language_detection: nil,
808
800
  pdf_options: nil,
809
801
  image_extraction: nil,
810
- image_preprocessing: nil,
811
802
  postprocessor: nil,
812
803
  token_reduction: nil,
813
804
  keywords: nil,
@@ -820,7 +811,7 @@ module Kreuzberg
820
811
  use_cache: use_cache, enable_quality_processing: enable_quality_processing,
821
812
  force_ocr: force_ocr, ocr: ocr, chunking: chunking, language_detection: language_detection,
822
813
  pdf_options: pdf_options, image_extraction: image_extraction,
823
- image_preprocessing: image_preprocessing, postprocessor: postprocessor,
814
+ postprocessor: postprocessor,
824
815
  token_reduction: token_reduction, keywords: keywords, html_options: html_options,
825
816
  pages: pages, max_concurrent_extractions: max_concurrent_extractions,
826
817
  output_format: output_format, result_format: result_format
@@ -846,7 +837,6 @@ module Kreuzberg
846
837
  @language_detection = normalize_config(params[:language_detection], LanguageDetection)
847
838
  @pdf_options = normalize_config(params[:pdf_options], PDF)
848
839
  @images = normalize_config(params[:image_extraction], ImageExtraction)
849
- @image_preprocessing = normalize_config(params[:image_preprocessing], ImagePreprocessing)
850
840
  @postprocessor = normalize_config(params[:postprocessor], PostProcessor)
851
841
  @token_reduction = normalize_config(params[:token_reduction], TokenReduction)
852
842
  @keywords = normalize_config(params[:keywords], Keywords)
@@ -878,7 +868,6 @@ module Kreuzberg
878
868
  end
879
869
 
880
870
  # rubocop:disable Metrics/CyclomaticComplexity
881
- # rubocop:disable Metrics/MethodLength
882
871
  def to_h
883
872
  {
884
873
  use_cache: @use_cache,
@@ -889,7 +878,6 @@ module Kreuzberg
889
878
  language_detection: @language_detection&.to_h,
890
879
  pdf_options: @pdf_options&.to_h,
891
880
  images: @images&.to_h,
892
- image_preprocessing: @image_preprocessing&.to_h,
893
881
  postprocessor: @postprocessor&.to_h,
894
882
  token_reduction: @token_reduction&.to_h,
895
883
  keywords: @keywords&.to_h,
@@ -900,7 +888,6 @@ module Kreuzberg
900
888
  result_format: @result_format
901
889
  }.compact
902
890
  end
903
- # rubocop:enable Metrics/MethodLength
904
891
  # rubocop:enable Metrics/CyclomaticComplexity
905
892
 
906
893
  # Serialize configuration to JSON string
@@ -1025,8 +1012,6 @@ module Kreuzberg
1025
1012
  @pdf_options = normalize_config(value, PDF)
1026
1013
  when :image_extraction
1027
1014
  @images = normalize_config(value, ImageExtraction)
1028
- when :image_preprocessing
1029
- @image_preprocessing = normalize_config(value, ImagePreprocessing)
1030
1015
  when :postprocessor
1031
1016
  @postprocessor = normalize_config(value, PostProcessor)
1032
1017
  when :token_reduction
@@ -1101,7 +1086,6 @@ module Kreuzberg
1101
1086
  @language_detection = merged.language_detection
1102
1087
  @pdf_options = merged.pdf_options
1103
1088
  @images = merged.image_extraction
1104
- @image_preprocessing = merged.image_preprocessing
1105
1089
  @postprocessor = merged.postprocessor
1106
1090
  @token_reduction = merged.token_reduction
1107
1091
  @keywords = merged.keywords
@@ -6,9 +6,9 @@ require 'json'
6
6
  module Kreuzberg
7
7
  # @example Start MCP server
8
8
  module MCPProxy
9
- Error = Class.new(Kreuzberg::Errors::Error)
10
- MissingBinaryError = Class.new(Error)
11
- ServerError = Class.new(Error)
9
+ class Error < Kreuzberg::Errors::Error; end
10
+ class MissingBinaryError < Error; end
11
+ class ServerError < Error; end
12
12
 
13
13
  # MCP server instance
14
14
  class Server
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.2.1'
4
+ VERSION = '4.2.3'
5
5
  end
@@ -309,7 +309,7 @@ RSpec.describe Kreuzberg::Config do
309
309
  config = described_class.new
310
310
 
311
311
  expect(config.use_cache).to be true
312
- expect(config.enable_quality_processing).to be false
312
+ expect(config.enable_quality_processing).to be true
313
313
  expect(config.force_ocr).to be false
314
314
  expect(config.ocr).to be_nil
315
315
  expect(config.chunking).to be_nil
@@ -6,7 +6,7 @@ RSpec.describe Kreuzberg::Config::Extraction do
6
6
  config = described_class.new
7
7
 
8
8
  expect(config.use_cache).to be true
9
- expect(config.enable_quality_processing).to be false
9
+ expect(config.enable_quality_processing).to be true
10
10
  expect(config.force_ocr).to be false
11
11
  expect(config.ocr).to be_nil
12
12
  expect(config.chunking).to be_nil
@@ -103,7 +103,7 @@ RSpec.describe Kreuzberg::Config::Extraction do
103
103
  hash = config.to_h
104
104
 
105
105
  expect(hash[:use_cache]).to be true
106
- expect(hash[:enable_quality_processing]).to be false
106
+ expect(hash[:enable_quality_processing]).to be true
107
107
  expect(hash[:force_ocr]).to be false
108
108
  end
109
109
  end
data/vendor/Cargo.toml CHANGED
@@ -3,7 +3,7 @@ members = ["kreuzberg", "kreuzberg-tesseract", "kreuzberg-ffi"]
3
3
  resolver = "2"
4
4
 
5
5
  [workspace.package]
6
- version = "4.2.1"
6
+ version = "4.2.3"
7
7
  edition = "2024"
8
8
  rust-version = "1.91"
9
9
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg"
3
- version = "4.2.1"
3
+ version = "4.2.3"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -71,7 +71,7 @@ keywords-yake = ["dep:yake-rust", "stopwords"]
71
71
  keywords-rake = ["dep:rake", "stopwords"]
72
72
  keywords = ["keywords-yake", "keywords-rake"]
73
73
 
74
- api = ["dep:axum", "dep:tower", "dep:tower-http", "tokio-runtime"]
74
+ api = ["dep:axum", "dep:tower", "dep:tower-http", "dep:utoipa", "tokio-runtime"]
75
75
  mcp = ["dep:rmcp", "tokio-runtime"]
76
76
  mcp-http = ["mcp", "api"]
77
77
 
@@ -198,6 +198,7 @@ rake = { version = "0.3.6", optional = true }
198
198
  axum = { version = "0.8", features = ["macros", "json", "multipart"], optional = true }
199
199
  tower = { version = "0.5", optional = true }
200
200
  tower-http = { version = "0.6", features = ["cors", "trace", "limit"], optional = true }
201
+ utoipa = { version = "5.4", features = ["axum_extras"], optional = true }
201
202
  rmcp = { version = "0.14.0", features = [
202
203
  "server",
203
204
  "macros",
@@ -17,7 +17,7 @@ High-performance document intelligence library for Rust. Extract text, metadata,
17
17
 
18
18
  This is the core Rust library that powers the Python, TypeScript, and Ruby bindings.
19
19
 
20
- > **🚀 Version 4.2.1 Release**
20
+ > **🚀 Version 4.2.3 Release**
21
21
  > This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
22
22
  >
23
23
  > **Note**: The Rust crate is not currently published to crates.io for this RC. Use git dependencies or language bindings (Python, TypeScript, Ruby) instead.
@@ -2,14 +2,67 @@
2
2
 
3
3
  use axum::{
4
4
  Json,
5
+ body::to_bytes,
6
+ extract::{FromRequest, Request, rejection::JsonRejection},
5
7
  http::StatusCode,
6
8
  response::{IntoResponse, Response},
7
9
  };
10
+ use serde::de::DeserializeOwned;
8
11
 
9
12
  use crate::error::KreuzbergError;
10
13
 
11
14
  use super::types::ErrorResponse;
12
15
 
16
+ /// Custom JSON extractor that returns JSON error responses instead of plain text.
17
+ ///
18
+ /// This wraps axum's `Json` extractor but uses `ApiError` as the rejection type,
19
+ /// ensuring that all JSON parsing errors are returned as JSON with proper content type.
20
+ ///
21
+ /// Additionally, this extractor validates that the root JSON value is an object (not an array),
22
+ /// which prevents serde from incorrectly deserializing JSON arrays into struct fields.
23
+ #[derive(Debug, Clone, Copy, Default)]
24
+ pub struct JsonApi<T>(pub T);
25
+
26
+ impl<T, S> FromRequest<S> for JsonApi<T>
27
+ where
28
+ T: DeserializeOwned,
29
+ S: Send + Sync,
30
+ {
31
+ type Rejection = ApiError;
32
+
33
+ async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
34
+ // First, extract the body to check if it's a valid JSON object (not array)
35
+ let (parts, body) = req.into_parts();
36
+ let bytes = to_bytes(body, usize::MAX).await.map_err(|_| {
37
+ ApiError::new(
38
+ StatusCode::BAD_REQUEST,
39
+ KreuzbergError::Other("Failed to read request body".to_string()),
40
+ )
41
+ })?;
42
+
43
+ // Validate that the root JSON is an object, not an array
44
+ if !bytes.is_empty() {
45
+ let trimmed = std::str::from_utf8(&bytes).unwrap_or("").trim_start();
46
+ if trimmed.starts_with('[') {
47
+ return Err(ApiError::new(
48
+ StatusCode::BAD_REQUEST,
49
+ KreuzbergError::validation(
50
+ "Expected JSON object, but received JSON array. \
51
+ Please wrap your data in an object with appropriate fields.",
52
+ ),
53
+ ));
54
+ }
55
+ }
56
+
57
+ // Reconstruct the request and use the standard Json extractor
58
+ let req = Request::from_parts(parts, axum::body::Body::from(bytes));
59
+ match Json::<T>::from_request(req, state).await {
60
+ Ok(Json(value)) => Ok(JsonApi(value)),
61
+ Err(rejection) => Err(ApiError::from(rejection)),
62
+ }
63
+ }
64
+ }
65
+
13
66
  /// API-specific error wrapper.
14
67
  #[derive(Debug)]
15
68
  pub struct ApiError {
@@ -79,3 +132,39 @@ impl From<KreuzbergError> for ApiError {
79
132
  }
80
133
  }
81
134
  }
135
+
136
+ impl From<JsonRejection> for ApiError {
137
+ fn from(rejection: JsonRejection) -> Self {
138
+ let (status, message) = match rejection {
139
+ JsonRejection::JsonDataError(err) => (
140
+ StatusCode::UNPROCESSABLE_ENTITY,
141
+ format!(
142
+ "Failed to deserialize the JSON body into the target type: {}",
143
+ err.body_text()
144
+ ),
145
+ ),
146
+ JsonRejection::JsonSyntaxError(err) => (
147
+ StatusCode::BAD_REQUEST,
148
+ format!("Failed to parse the request body as JSON: {}", err.body_text()),
149
+ ),
150
+ JsonRejection::MissingJsonContentType(_) => (
151
+ StatusCode::UNSUPPORTED_MEDIA_TYPE,
152
+ "Expected request with `Content-Type: application/json`".to_string(),
153
+ ),
154
+ JsonRejection::BytesRejection(err) => {
155
+ (StatusCode::BAD_REQUEST, format!("Failed to read request body: {}", err))
156
+ }
157
+ _ => (StatusCode::BAD_REQUEST, "Unknown JSON parsing error".to_string()),
158
+ };
159
+
160
+ Self {
161
+ status,
162
+ body: ErrorResponse {
163
+ error_type: "JsonParsingError".to_string(),
164
+ message,
165
+ traceback: None,
166
+ status_code: status.as_u16(),
167
+ },
168
+ }
169
+ }
170
+ }