kreuzberg 4.6.1-aarch64-linux → 4.6.2-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/kreuzberg/extraction_api.rb +37 -0
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg_rb.so +0 -0
- data/spec/binding/render_spec.rb +91 -0
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 406c791db1a8cb29e3ff2e89a60e0a29ed73c0ff0548338b60c3574bd5944f6f
|
|
4
|
+
data.tar.gz: 7565bbe0708afceadc13f43b57103a03529992f705caf21f1ddaa00e68ad27d6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a680fd2406f8dac338a53ab303ef29d117462cbf376d48be4af48846f7d4f82d4e7999e2aeac17a404e5da7feb79f1cccfb8f240153c0c36a4274e16eeb50f6f
|
|
7
|
+
data.tar.gz: dc1c1dc215020a65560490e159e38d6780b703bae597279cb0e3d2c369afce7a7bee06984972987d1c3a69fe2040da1cb1e2812df5c542590300dfcb09e5e7d7
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.6.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.6.2" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -319,6 +319,43 @@ module Kreuzberg
|
|
|
319
319
|
results
|
|
320
320
|
end
|
|
321
321
|
|
|
322
|
+
# Render a single PDF page as a PNG image.
|
|
323
|
+
#
|
|
324
|
+
# @param path [String, Pathname] Path to the PDF file
|
|
325
|
+
# @param page_index [Integer] Zero-based page index
|
|
326
|
+
# @param dpi [Integer] Rendering resolution (default 150)
|
|
327
|
+
# @return [String] PNG-encoded binary string
|
|
328
|
+
# @raise [Errors::IOError] If the file cannot be read
|
|
329
|
+
# @raise [Errors::ParsingError] If rendering fails
|
|
330
|
+
def render_pdf_page(path, page_index, dpi: 150)
|
|
331
|
+
path_str = path.to_s
|
|
332
|
+
raise ArgumentError, 'page_index must be non-negative' if page_index.negative?
|
|
333
|
+
raise Errors::IOError, "File not found: #{path_str}" unless File.exist?(path_str)
|
|
334
|
+
|
|
335
|
+
native_render_pdf_page(path_str, page_index, dpi)
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Iterate over pages of a PDF lazily, yielding each page as it is rendered.
|
|
339
|
+
#
|
|
340
|
+
# Each page is rendered via the native FFI iterator, so only one page is in
|
|
341
|
+
# memory at a time.
|
|
342
|
+
#
|
|
343
|
+
# @param path [String, Pathname] Path to the PDF file
|
|
344
|
+
# @param dpi [Integer] Rendering resolution (default 150)
|
|
345
|
+
# @yieldparam page_index [Integer] Zero-based page index
|
|
346
|
+
# @yieldparam png_bytes [String] PNG-encoded binary string for the page
|
|
347
|
+
# @return [Enumerator] if no block is given
|
|
348
|
+
# @raise [Errors::IOError] If the file cannot be read
|
|
349
|
+
# @raise [Errors::ParsingError] If rendering fails
|
|
350
|
+
def render_pdf_pages_iter(path, dpi: 150, &block)
|
|
351
|
+
path_str = path.to_s
|
|
352
|
+
raise Errors::IOError, "File not found: #{path_str}" unless File.exist?(path_str)
|
|
353
|
+
|
|
354
|
+
return enum_for(:render_pdf_pages_iter, path, dpi: dpi) unless block
|
|
355
|
+
|
|
356
|
+
native_render_pdf_pages_iter(path_str, dpi, &block)
|
|
357
|
+
end
|
|
358
|
+
|
|
322
359
|
def normalize_config(config)
|
|
323
360
|
return {} if config.nil?
|
|
324
361
|
return config if config.is_a?(Hash)
|
data/lib/kreuzberg/version.rb
CHANGED
data/lib/kreuzberg_rb.so
CHANGED
|
Binary file
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Hand-written binding-specific edge case tests for PDF rendering.
|
|
2
|
+
# Happy-path render tests are auto-generated from fixtures in e2e/.
|
|
3
|
+
# These tests cover error handling, validation, and lifecycle patterns
|
|
4
|
+
# that vary per language and can't be generated uniformly.
|
|
5
|
+
|
|
6
|
+
# frozen_string_literal: true
|
|
7
|
+
|
|
8
|
+
require 'spec_helper'
|
|
9
|
+
|
|
10
|
+
RSpec.describe 'PDF Rendering' do
|
|
11
|
+
it 'exposes rendering methods' do
|
|
12
|
+
expect(Kreuzberg).to respond_to(:render_pdf_page)
|
|
13
|
+
expect(Kreuzberg).to respond_to(:render_pdf_pages_iter)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
describe '.render_pdf_page' do
|
|
17
|
+
it 'raises an error for a nonexistent file' do
|
|
18
|
+
expect do
|
|
19
|
+
Kreuzberg.render_pdf_page('/nonexistent/path/to/document.pdf', 0)
|
|
20
|
+
end.to raise_error(Kreuzberg::Errors::IOError)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'raises an error for an out-of-bounds page index' do
|
|
24
|
+
pdf_path = test_document_path('pdf/tiny.pdf')
|
|
25
|
+
skip 'Test PDF not available' unless File.exist?(pdf_path)
|
|
26
|
+
|
|
27
|
+
expect do
|
|
28
|
+
Kreuzberg.render_pdf_page(pdf_path, 9999)
|
|
29
|
+
end.to raise_error(StandardError)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
describe '.render_pdf_page with negative index' do
|
|
34
|
+
it 'raises ArgumentError for a negative page index' do
|
|
35
|
+
pdf_path = test_document_path('pdf/tiny.pdf')
|
|
36
|
+
skip 'Test PDF not available' unless File.exist?(pdf_path)
|
|
37
|
+
|
|
38
|
+
expect do
|
|
39
|
+
Kreuzberg.render_pdf_page(pdf_path, -1)
|
|
40
|
+
end.to raise_error(ArgumentError)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
describe '.render_pdf_pages_iter' do
|
|
45
|
+
it 'raises an error for a nonexistent file' do
|
|
46
|
+
expect do
|
|
47
|
+
Kreuzberg.render_pdf_pages_iter('/nonexistent/path/to/document.pdf') { |_, _| nil }
|
|
48
|
+
end.to raise_error(Kreuzberg::Errors::IOError)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
describe '.render_pdf_page with empty path' do
|
|
53
|
+
it 'raises an error for an empty path' do
|
|
54
|
+
expect do
|
|
55
|
+
Kreuzberg.render_pdf_page('', 0)
|
|
56
|
+
end.to raise_error(StandardError)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
describe '.render_pdf_pages_iter cleanup' do
|
|
61
|
+
it 'handles iterator cleanup without fully consuming' do
|
|
62
|
+
pdf_path = test_document_path('pdf/tiny.pdf')
|
|
63
|
+
skip 'Test PDF not available' unless File.exist?(pdf_path)
|
|
64
|
+
|
|
65
|
+
# Iterate but stop immediately — no crash
|
|
66
|
+
Kreuzberg.render_pdf_pages_iter(pdf_path) do |_page_index, _png_data|
|
|
67
|
+
break
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
describe '.render_pdf_pages_iter early termination' do
|
|
73
|
+
it 'returns valid PNG for the first page then stops' do
|
|
74
|
+
pdf_path = test_document_path('pdf/tiny.pdf')
|
|
75
|
+
skip 'Test PDF not available' unless File.exist?(pdf_path)
|
|
76
|
+
|
|
77
|
+
first_png = nil
|
|
78
|
+
Kreuzberg.render_pdf_pages_iter(pdf_path) do |page_index, png_data|
|
|
79
|
+
expect(page_index).to eq(0)
|
|
80
|
+
expect(png_data).to be_a(String)
|
|
81
|
+
expect(png_data.bytesize).to be > 8
|
|
82
|
+
# PNG magic bytes
|
|
83
|
+
expect(png_data.bytes[0..3]).to eq([0x89, 0x50, 0x4E, 0x47])
|
|
84
|
+
first_png = png_data
|
|
85
|
+
break
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
expect(first_png).not_to be_nil
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.6.
|
|
4
|
+
version: 4.6.2
|
|
5
5
|
platform: aarch64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -220,6 +220,7 @@ files:
|
|
|
220
220
|
- spec/binding/plugins/ocr_backend_spec.rb
|
|
221
221
|
- spec/binding/plugins/postprocessor_spec.rb
|
|
222
222
|
- spec/binding/plugins/validator_spec.rb
|
|
223
|
+
- spec/binding/render_spec.rb
|
|
223
224
|
- spec/binding/tables_spec.rb
|
|
224
225
|
- spec/serialization_spec.rb
|
|
225
226
|
- spec/smoke/package_spec.rb
|