kreuzberg 4.2.0 → 4.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/Cargo.lock +26 -17
  5. data/lib/kreuzberg/cli.rb +16 -6
  6. data/lib/kreuzberg/cli_proxy.rb +3 -1
  7. data/lib/kreuzberg/config.rb +59 -28
  8. data/lib/kreuzberg/djot_content.rb +225 -0
  9. data/lib/kreuzberg/extraction_api.rb +20 -4
  10. data/lib/kreuzberg/result.rb +12 -2
  11. data/lib/kreuzberg/version.rb +1 -1
  12. data/lib/kreuzberg.rb +1 -0
  13. data/sig/kreuzberg.rbs +23 -11
  14. data/spec/binding/batch_spec.rb +6 -5
  15. data/spec/binding/config_spec.rb +1 -1
  16. data/spec/binding/error_recovery_spec.rb +3 -3
  17. data/spec/binding/tables_spec.rb +11 -2
  18. data/spec/unit/config/extraction_config_spec.rb +2 -2
  19. data/spec/unit/config/output_format_spec.rb +18 -18
  20. data/vendor/Cargo.toml +1 -1
  21. data/vendor/kreuzberg/Cargo.toml +3 -2
  22. data/vendor/kreuzberg/README.md +1 -1
  23. data/vendor/kreuzberg/src/api/error.rs +60 -0
  24. data/vendor/kreuzberg/src/api/handlers.rs +153 -32
  25. data/vendor/kreuzberg/src/api/mod.rs +2 -0
  26. data/vendor/kreuzberg/src/api/openapi.rs +141 -0
  27. data/vendor/kreuzberg/src/api/router.rs +24 -2
  28. data/vendor/kreuzberg/src/api/startup.rs +21 -1
  29. data/vendor/kreuzberg/src/api/types.rs +50 -4
  30. data/vendor/kreuzberg/src/core/config/processing.rs +8 -1
  31. data/vendor/kreuzberg/src/core/config_validation/sections.rs +16 -4
  32. data/vendor/kreuzberg/src/core/extractor/file.rs +1 -2
  33. data/vendor/kreuzberg/src/core/extractor/mod.rs +2 -1
  34. data/vendor/kreuzberg/src/core/io.rs +7 -7
  35. data/vendor/kreuzberg/src/core/mime.rs +4 -4
  36. data/vendor/kreuzberg/src/extraction/excel.rs +246 -9
  37. data/vendor/kreuzberg/src/extraction/pptx/parser.rs +6 -0
  38. data/vendor/kreuzberg/src/plugins/mod.rs +1 -0
  39. data/vendor/kreuzberg/src/plugins/registry/extractor.rs +251 -5
  40. data/vendor/kreuzberg/src/plugins/registry/ocr.rs +150 -2
  41. data/vendor/kreuzberg/src/plugins/registry/processor.rs +213 -5
  42. data/vendor/kreuzberg/src/plugins/registry/validator.rs +220 -4
  43. data/vendor/kreuzberg/src/plugins/startup_validation.rs +385 -0
  44. data/vendor/kreuzberg/tests/config_behavioral.rs +14 -12
  45. data/vendor/kreuzberg/tests/core_integration.rs +2 -4
  46. data/vendor/kreuzberg/tests/mime_detection.rs +3 -2
  47. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +284 -1
  48. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +56 -0
  49. data/vendor/kreuzberg-ffi/kreuzberg.h +7 -2
  50. data/vendor/kreuzberg-ffi/src/helpers.rs +13 -1
  51. data/vendor/kreuzberg-ffi/src/lib.rs +8 -5
  52. data/vendor/kreuzberg-ffi/src/memory.rs +35 -1
  53. data/vendor/kreuzberg-ffi/src/types.rs +8 -5
  54. data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
  55. metadata +5 -2
@@ -15,11 +15,15 @@ module Kreuzberg
15
15
  # @example Extract with explicit MIME type
16
16
  # @example Extract with OCR enabled
17
17
  def extract_file_sync(path:, mime_type: nil, config: nil)
18
+ # Validate that the file exists
19
+ path_str = path.to_s
20
+ raise Errors::IOError, "File not found: #{path_str}" unless File.exist?(path_str)
21
+
18
22
  opts = normalize_config(config)
19
23
  hash = if mime_type
20
- native_extract_file_sync(path.to_s, mime_type.to_s, **opts)
24
+ native_extract_file_sync(path_str, mime_type.to_s, **opts)
21
25
  else
22
- native_extract_file_sync(path.to_s, **opts)
26
+ native_extract_file_sync(path_str, **opts)
23
27
  end
24
28
  result = Result.new(hash)
25
29
  record_cache_entry!(result, opts)
@@ -53,6 +57,8 @@ module Kreuzberg
53
57
  # response = HTTParty.get("https://example.com/document.docx")
54
58
  # result = Kreuzberg.extract_bytes_sync(response.body, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
55
59
  def extract_bytes_sync(data:, mime_type:, config: nil)
60
+ raise TypeError, "mime_type must be a String, got #{mime_type.inspect}" if mime_type.nil?
61
+
56
62
  opts = normalize_config(config)
57
63
  hash = native_extract_bytes_sync(data.to_s, mime_type.to_s, **opts)
58
64
  result = Result.new(hash)
@@ -92,6 +98,12 @@ module Kreuzberg
92
98
  # config = Kreuzberg::Config::Extraction.new(force_ocr: true)
93
99
  # results = Kreuzberg.batch_extract_files_sync(paths, config: config)
94
100
  def batch_extract_files_sync(paths:, config: nil)
101
+ # Validate that all files exist
102
+ paths.each do |path|
103
+ path_str = path.to_s
104
+ raise Errors::IOError, "File not found: #{path_str}" unless File.exist?(path_str)
105
+ end
106
+
95
107
  opts = normalize_config(config)
96
108
  hashes = native_batch_extract_files_sync(paths.map(&:to_s), **opts)
97
109
  results = hashes.map { |hash| Result.new(hash) }
@@ -130,11 +142,15 @@ module Kreuzberg
130
142
  # )
131
143
  # result = Kreuzberg.extract_file("document.pdf", config: config)
132
144
  def extract_file(path:, mime_type: nil, config: nil)
145
+ # Validate that the file exists
146
+ path_str = path.to_s
147
+ raise Errors::IOError, "File not found: #{path_str}" unless File.exist?(path_str)
148
+
133
149
  opts = normalize_config(config)
134
150
  hash = if mime_type
135
- native_extract_file(path.to_s, mime_type.to_s, **opts)
151
+ native_extract_file(path_str, mime_type.to_s, **opts)
136
152
  else
137
- native_extract_file(path.to_s, **opts)
153
+ native_extract_file(path_str, **opts)
138
154
  end
139
155
  result = Result.new(hash)
140
156
  record_cache_entry!(result, opts)
@@ -11,7 +11,7 @@ module Kreuzberg
11
11
  # rubocop:disable Metrics/ClassLength
12
12
  class Result
13
13
  attr_reader :content, :mime_type, :metadata, :metadata_json, :tables,
14
- :detected_languages, :chunks, :images, :pages, :elements
14
+ :detected_languages, :chunks, :images, :pages, :elements, :djot_content
15
15
 
16
16
  # @!attribute [r] cells
17
17
  # @return [Array<Array<String>>] Table cells (2D array)
@@ -180,6 +180,7 @@ module Kreuzberg
180
180
  #
181
181
  # @param hash [Hash] Hash returned from native extension
182
182
  #
183
+ # rubocop:disable Metrics/AbcSize
183
184
  def initialize(hash)
184
185
  @content = get_value(hash, 'content', '')
185
186
  @mime_type = get_value(hash, 'mime_type', '')
@@ -191,7 +192,9 @@ module Kreuzberg
191
192
  @images = parse_images(get_value(hash, 'images'))
192
193
  @pages = parse_pages(get_value(hash, 'pages'))
193
194
  @elements = parse_elements(get_value(hash, 'elements'))
195
+ @djot_content = parse_djot_content(get_value(hash, 'djot_content'))
194
196
  end
197
+ # rubocop:enable Metrics/AbcSize
195
198
 
196
199
  # Convert to hash
197
200
  #
@@ -207,7 +210,8 @@ module Kreuzberg
207
210
  chunks: serialize_chunks,
208
211
  images: serialize_images,
209
212
  pages: serialize_pages,
210
- elements: serialize_elements
213
+ elements: serialize_elements,
214
+ djot_content: @djot_content&.to_h
211
215
  }
212
216
  end
213
217
 
@@ -434,6 +438,12 @@ module Kreuzberg
434
438
  y1: coordinates_data['y1'].to_f
435
439
  )
436
440
  end
441
+
442
+ def parse_djot_content(djot_data)
443
+ return nil if djot_data.nil?
444
+
445
+ DjotContent.new(djot_data)
446
+ end
437
447
  end
438
448
  # rubocop:enable Metrics/ClassLength
439
449
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.2.0'
4
+ VERSION = '4.2.2'
5
5
  end
data/lib/kreuzberg.rb CHANGED
@@ -87,6 +87,7 @@ end
87
87
 
88
88
  require_relative 'kreuzberg/cache_api'
89
89
  require_relative 'kreuzberg/extraction_api'
90
+ require_relative 'kreuzberg/djot_content'
90
91
 
91
92
  Kreuzberg.singleton_class.prepend(Kreuzberg::CacheAPI)
92
93
  Kreuzberg.singleton_class.prepend(Kreuzberg::ExtractionAPI)
data/sig/kreuzberg.rbs CHANGED
@@ -417,14 +417,23 @@ module Kreuzberg
417
417
  attr_reader plain_text: String
418
418
  attr_reader blocks: Array[DjotContent::FormattedBlock]
419
419
  attr_reader metadata: Hash[untyped, untyped]
420
- attr_reader tables: Array[Table]
420
+ attr_reader metadata_json: String
421
+ attr_reader tables: Array[untyped]
421
422
  attr_reader images: Array[DjotContent::DjotImage]
422
423
  attr_reader links: Array[DjotContent::DjotLink]
423
424
  attr_reader footnotes: Array[DjotContent::Footnote]
424
425
  attr_reader attributes: Hash[String, untyped]?
425
426
 
426
- def initialize: (djot_content_hash hash) -> void
427
- def to_h: () -> djot_content_hash
427
+ def initialize: (untyped hash) -> void
428
+ def to_h: () -> Hash[Symbol, untyped]
429
+
430
+ private
431
+
432
+ def parse_metadata: (String metadata_json) -> Hash[untyped, untyped]
433
+ def parse_blocks: (Array[untyped] blocks_data) -> Array[FormattedBlock]
434
+ def parse_images: (Array[untyped] images_data) -> Array[DjotImage]
435
+ def parse_links: (Array[untyped] links_data) -> Array[DjotLink]
436
+ def parse_footnotes: (Array[untyped] footnotes_data) -> Array[Footnote]
428
437
 
429
438
  class FormattedBlock
430
439
  attr_reader block_type: String
@@ -433,28 +442,31 @@ module Kreuzberg
433
442
  attr_reader children: Array[FormattedBlock]?
434
443
  attr_reader attributes: Hash[String, untyped]?
435
444
 
436
- def initialize: (formatted_block_hash hash) -> void
437
- def to_h: () -> formatted_block_hash
445
+ def initialize: (?untyped hash_or_type, ?children: untyped, ?attributes: untyped, ?content: untyped, ?level: untyped, ?block_type: untyped) -> void
446
+ def to_h: () -> Hash[Symbol, untyped]
438
447
  end
439
448
 
440
449
  class DjotImage
441
450
  attr_reader url: String
442
451
  attr_reader alt: String?
443
452
  attr_reader title: String?
444
- attr_reader attributes: Hash[String, untyped]?
453
+ attr_reader width: Integer?
454
+ attr_reader height: Integer?
445
455
 
446
- def initialize: (djot_image_hash hash) -> void
447
- def to_h: () -> djot_image_hash
456
+ def initialize: (?untyped hash_or_url, ?alt: untyped, ?title: untyped, ?width: untyped, ?height: untyped, ?url: untyped, ?src: untyped) -> void
457
+ def src: () -> String
458
+ def to_h: () -> Hash[Symbol, untyped]
448
459
  end
449
460
 
450
461
  class DjotLink
451
462
  attr_reader url: String
452
- attr_reader text: String
463
+ attr_reader text: String?
453
464
  attr_reader title: String?
454
465
  attr_reader link_type: String?
455
466
 
456
- def initialize: (djot_link_hash hash) -> void
457
- def to_h: () -> djot_link_hash
467
+ def initialize: (?untyped hash_or_url, ?text: untyped, ?title: untyped, ?url: untyped, ?href: untyped, ?link_type: untyped) -> void
468
+ def href: () -> String
469
+ def to_h: () -> Hash[Symbol, untyped]
458
470
  end
459
471
 
460
472
  class Footnote
@@ -295,7 +295,7 @@ RSpec.describe Kreuzberg do
295
295
  end
296
296
 
297
297
  describe 'batch error handling' do
298
- it 'handles missing files gracefully in batch' do
298
+ it 'raises IOError for missing files in batch' do
299
299
  paths = [
300
300
  '/nonexistent/file1.txt',
301
301
  '/nonexistent/file2.txt'
@@ -303,10 +303,10 @@ RSpec.describe Kreuzberg do
303
303
 
304
304
  expect do
305
305
  described_class.batch_extract_files_sync(paths: paths)
306
- end.not_to raise_error
306
+ end.to raise_error(Kreuzberg::Errors::IOError, /not found/)
307
307
  end
308
308
 
309
- it 'handles mixed valid and invalid paths' do
309
+ it 'raises IOError when batch contains invalid paths' do
310
310
  paths = []
311
311
  temp_dir = Dir.mktmpdir
312
312
 
@@ -316,8 +316,9 @@ RSpec.describe Kreuzberg do
316
316
 
317
317
  paths << '/nonexistent/invalid.txt'
318
318
 
319
- results = described_class.batch_extract_files_sync(paths: paths)
320
- expect(results).to be_a(Array)
319
+ expect do
320
+ described_class.batch_extract_files_sync(paths: paths)
321
+ end.to raise_error(Kreuzberg::Errors::IOError, /not found/)
321
322
  ensure
322
323
  FileUtils.remove_entry(temp_dir)
323
324
  end
@@ -309,7 +309,7 @@ RSpec.describe Kreuzberg::Config do
309
309
  config = described_class.new
310
310
 
311
311
  expect(config.use_cache).to be true
312
- expect(config.enable_quality_processing).to be false
312
+ expect(config.enable_quality_processing).to be true
313
313
  expect(config.force_ocr).to be false
314
314
  expect(config.ocr).to be_nil
315
315
  expect(config.chunking).to be_nil
@@ -57,7 +57,7 @@ RSpec.describe 'Error Recovery' do
57
57
  nonexistent_path = '/nonexistent/file/that/does/not/exist.pdf'
58
58
 
59
59
  expect { Kreuzberg.extract_file_sync(path: nonexistent_path, config: config) }
60
- .to raise_error(Kreuzberg::Errors::ValidationError, /not found|does not exist|no such file/)
60
+ .to raise_error(Kreuzberg::Errors::IOError, /not found|does not exist|no such file/)
61
61
  end
62
62
 
63
63
  it 'provides descriptive error messages for invalid MIME types' do
@@ -293,7 +293,7 @@ RSpec.describe 'Error Recovery' do
293
293
 
294
294
  expect(validation_error).to be_a(ArgumentError)
295
295
 
296
- # Runtime error (file not found)
296
+ # Runtime error (file not found) - IOError since the file doesn't exist
297
297
  runtime_error = nil
298
298
  begin
299
299
  Kreuzberg.extract_file_sync(path: '/nonexistent/file.pdf')
@@ -301,7 +301,7 @@ RSpec.describe 'Error Recovery' do
301
301
  runtime_error = e
302
302
  end
303
303
 
304
- expect(runtime_error).to be_a(Kreuzberg::Errors::ValidationError)
304
+ expect(runtime_error).to be_a(Kreuzberg::Errors::IOError)
305
305
  end
306
306
 
307
307
  it 'provides error recovery suggestions in messages' do
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'spec_helper'
4
+ require 'tempfile'
5
+ require 'fileutils'
4
6
 
5
7
  RSpec.describe 'Table Extraction Quality' do
6
8
  describe 'table structure extraction' do
@@ -523,12 +525,19 @@ RSpec.describe 'Table Extraction Quality' do
523
525
  it 'handles documents with no tables gracefully' do
524
526
  config = Kreuzberg::Config::Extraction.new
525
527
 
528
+ # Create a temporary text file for this test
529
+ file = Tempfile.new(['no_tables_test', '.txt'])
530
+ file.write('This is a text document without any tables.')
531
+ file.close
532
+
526
533
  begin
527
- result = Kreuzberg.extract_file(path: 'test.txt', config: config)
534
+ result = Kreuzberg.extract_file(path: file.path, config: config)
528
535
  expect(result).not_to be_nil
529
536
  expect(result.tables).to be_a(Array) if result.tables
530
- rescue Kreuzberg::Errors::ValidationError
537
+ rescue Kreuzberg::Errors::IOError
531
538
  skip 'Text file not available for testing'
539
+ ensure
540
+ FileUtils.rm_f(file.path)
532
541
  end
533
542
  end
534
543
 
@@ -6,7 +6,7 @@ RSpec.describe Kreuzberg::Config::Extraction do
6
6
  config = described_class.new
7
7
 
8
8
  expect(config.use_cache).to be true
9
- expect(config.enable_quality_processing).to be false
9
+ expect(config.enable_quality_processing).to be true
10
10
  expect(config.force_ocr).to be false
11
11
  expect(config.ocr).to be_nil
12
12
  expect(config.chunking).to be_nil
@@ -103,7 +103,7 @@ RSpec.describe Kreuzberg::Config::Extraction do
103
103
  hash = config.to_h
104
104
 
105
105
  expect(hash[:use_cache]).to be true
106
- expect(hash[:enable_quality_processing]).to be false
106
+ expect(hash[:enable_quality_processing]).to be true
107
107
  expect(hash[:force_ocr]).to be false
108
108
  end
109
109
  end
@@ -282,34 +282,34 @@ RSpec.describe 'Output Format and Result Format Configuration' do
282
282
  end
283
283
 
284
284
  describe 'format validation and edge cases' do
285
- it 'handles empty string output_format' do
286
- config = described_class.new(output_format: '')
287
-
288
- expect(config.output_format).to eq ''
285
+ it 'raises error for empty string output_format' do
286
+ expect do
287
+ described_class.new(output_format: '')
288
+ end.to raise_error(ArgumentError, /Invalid output_format/)
289
289
  end
290
290
 
291
- it 'handles empty string result_format' do
292
- config = described_class.new(result_format: '')
293
-
294
- expect(config.result_format).to eq ''
291
+ it 'raises error for empty string result_format' do
292
+ expect do
293
+ described_class.new(result_format: '')
294
+ end.to raise_error(ArgumentError, /Invalid result_format/)
295
295
  end
296
296
 
297
- it 'handles whitespace in output_format' do
298
- config = described_class.new(output_format: ' plain ')
299
-
300
- expect(config.output_format).to eq ' plain '
297
+ it 'raises error for whitespace in output_format' do
298
+ expect do
299
+ described_class.new(output_format: ' plain ')
300
+ end.to raise_error(ArgumentError, /Invalid output_format/)
301
301
  end
302
302
 
303
- it 'handles case sensitivity in output_format' do
303
+ it 'normalizes case in output_format' do
304
304
  config = described_class.new(output_format: 'MarkDown')
305
305
 
306
- expect(config.output_format).to eq 'MarkDown'
306
+ expect(config.output_format).to eq 'markdown'
307
307
  end
308
308
 
309
- it 'handles custom string in result_format' do
310
- config = described_class.new(result_format: 'custom_format')
311
-
312
- expect(config.result_format).to eq 'custom_format'
309
+ it 'raises error for custom string in result_format' do
310
+ expect do
311
+ described_class.new(result_format: 'custom_format')
312
+ end.to raise_error(ArgumentError, /Invalid result_format/)
313
313
  end
314
314
  end
315
315
 
data/vendor/Cargo.toml CHANGED
@@ -3,7 +3,7 @@ members = ["kreuzberg", "kreuzberg-tesseract", "kreuzberg-ffi"]
3
3
  resolver = "2"
4
4
 
5
5
  [workspace.package]
6
- version = "4.2.0"
6
+ version = "4.2.2"
7
7
  edition = "2024"
8
8
  rust-version = "1.91"
9
9
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg"
3
- version = "4.2.0"
3
+ version = "4.2.2"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -71,7 +71,7 @@ keywords-yake = ["dep:yake-rust", "stopwords"]
71
71
  keywords-rake = ["dep:rake", "stopwords"]
72
72
  keywords = ["keywords-yake", "keywords-rake"]
73
73
 
74
- api = ["dep:axum", "dep:tower", "dep:tower-http", "tokio-runtime"]
74
+ api = ["dep:axum", "dep:tower", "dep:tower-http", "dep:utoipa", "tokio-runtime"]
75
75
  mcp = ["dep:rmcp", "tokio-runtime"]
76
76
  mcp-http = ["mcp", "api"]
77
77
 
@@ -198,6 +198,7 @@ rake = { version = "0.3.6", optional = true }
198
198
  axum = { version = "0.8", features = ["macros", "json", "multipart"], optional = true }
199
199
  tower = { version = "0.5", optional = true }
200
200
  tower-http = { version = "0.6", features = ["cors", "trace", "limit"], optional = true }
201
+ utoipa = { version = "5.3", features = ["axum_extras"], optional = true }
201
202
  rmcp = { version = "0.14.0", features = [
202
203
  "server",
203
204
  "macros",
@@ -17,7 +17,7 @@ High-performance document intelligence library for Rust. Extract text, metadata,
17
17
 
18
18
  This is the core Rust library that powers the Python, TypeScript, and Ruby bindings.
19
19
 
20
- > **🚀 Version 4.2.0 Release**
20
+ > **🚀 Version 4.2.2 Release**
21
21
  > This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
22
22
  >
23
23
  > **Note**: The Rust crate is not currently published to crates.io for this RC. Use git dependencies or language bindings (Python, TypeScript, Ruby) instead.
@@ -2,14 +2,38 @@
2
2
 
3
3
  use axum::{
4
4
  Json,
5
+ extract::{FromRequest, Request, rejection::JsonRejection},
5
6
  http::StatusCode,
6
7
  response::{IntoResponse, Response},
7
8
  };
9
+ use serde::de::DeserializeOwned;
8
10
 
9
11
  use crate::error::KreuzbergError;
10
12
 
11
13
  use super::types::ErrorResponse;
12
14
 
15
+ /// Custom JSON extractor that returns JSON error responses instead of plain text.
16
+ ///
17
+ /// This wraps axum's `Json` extractor but uses `ApiError` as the rejection type,
18
+ /// ensuring that all JSON parsing errors are returned as JSON with proper content type.
19
+ #[derive(Debug, Clone, Copy, Default)]
20
+ pub struct JsonApi<T>(pub T);
21
+
22
+ impl<T, S> FromRequest<S> for JsonApi<T>
23
+ where
24
+ T: DeserializeOwned,
25
+ S: Send + Sync,
26
+ {
27
+ type Rejection = ApiError;
28
+
29
+ async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
30
+ match Json::<T>::from_request(req, state).await {
31
+ Ok(Json(value)) => Ok(JsonApi(value)),
32
+ Err(rejection) => Err(ApiError::from(rejection)),
33
+ }
34
+ }
35
+ }
36
+
13
37
  /// API-specific error wrapper.
14
38
  #[derive(Debug)]
15
39
  pub struct ApiError {
@@ -79,3 +103,39 @@ impl From<KreuzbergError> for ApiError {
79
103
  }
80
104
  }
81
105
  }
106
+
107
+ impl From<JsonRejection> for ApiError {
108
+ fn from(rejection: JsonRejection) -> Self {
109
+ let (status, message) = match rejection {
110
+ JsonRejection::JsonDataError(err) => (
111
+ StatusCode::UNPROCESSABLE_ENTITY,
112
+ format!(
113
+ "Failed to deserialize the JSON body into the target type: {}",
114
+ err.body_text()
115
+ ),
116
+ ),
117
+ JsonRejection::JsonSyntaxError(err) => (
118
+ StatusCode::BAD_REQUEST,
119
+ format!("Failed to parse the request body as JSON: {}", err.body_text()),
120
+ ),
121
+ JsonRejection::MissingJsonContentType(_) => (
122
+ StatusCode::UNSUPPORTED_MEDIA_TYPE,
123
+ "Expected request with `Content-Type: application/json`".to_string(),
124
+ ),
125
+ JsonRejection::BytesRejection(err) => {
126
+ (StatusCode::BAD_REQUEST, format!("Failed to read request body: {}", err))
127
+ }
128
+ _ => (StatusCode::BAD_REQUEST, "Unknown JSON parsing error".to_string()),
129
+ };
130
+
131
+ Self {
132
+ status,
133
+ body: ErrorResponse {
134
+ error_type: "JsonParsingError".to_string(),
135
+ message,
136
+ traceback: None,
137
+ status_code: status.as_u16(),
138
+ },
139
+ }
140
+ }
141
+ }