pdf_oxide 0.3.56-x86_64-linux → 0.3.58-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/ext/pdf_oxide/libpdf_oxide.so +0 -0
- data/lib/pdf_oxide/ffi/bindings.rb +4 -0
- data/lib/pdf_oxide/pdf_document.rb +17 -0
- data/lib/pdf_oxide/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ab3a6f46409c93c527e742d0445c255930ea2d7fbfd97b0f52062c785f2e19e3
|
|
4
|
+
data.tar.gz: e74db8c8145f9eaeb1bdd56280bdafb09c631c3e1450a8c141ca42f03621fef2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 83d5e906902462c4969693816f27871c21aa34572c3fb5ef32d39d8754ae0a86562673b4b80e14a83a1241f51546a85911782645c41fad9ad7de0271aa0474f0
|
|
7
|
+
data.tar.gz: 252ecac3a9c645f2f00aa871bdba7c8e8818b88eca60246f46f09adf0016b9a3adb3e51d0e55b3445d0b7bd253660741205dc7908a137c5667d9d915db146a26
|
data/Gemfile
CHANGED
|
Binary file
|
|
@@ -77,6 +77,10 @@ module PdfOxide
|
|
|
77
77
|
# bytes but never calls free_string → leaks one full-document buffer
|
|
78
78
|
# per call.
|
|
79
79
|
attach_function :pdf_document_extract_text, %i[pointer int32 pointer], :pointer
|
|
80
|
+
# char *pdf_document_extract_structured_to_json(void *handle, int32_t page_index, int32_t *error_code)
|
|
81
|
+
# Returns owned char* (serialized StructuredPage JSON) — bind as
|
|
82
|
+
# :pointer so the caller frees via StringMarshaller/free_string (#536).
|
|
83
|
+
attach_function :pdf_document_extract_structured_to_json, %i[pointer int32 pointer], :pointer
|
|
80
84
|
attach_function :pdf_document_to_markdown, %i[pointer int32 pointer], :pointer
|
|
81
85
|
attach_function :pdf_document_to_markdown_all, %i[pointer pointer], :pointer
|
|
82
86
|
attach_function :pdf_document_to_html, %i[pointer int32 pointer], :pointer
|
|
@@ -138,6 +138,23 @@ module PdfOxide
|
|
|
138
138
|
StringMarshaller.from_c_string(ptr) || ''
|
|
139
139
|
end
|
|
140
140
|
|
|
141
|
+
# Extract a structured representation of a single page (#536).
|
|
142
|
+
# Returns the parsed `StructuredPage` JSON as a Hash:
|
|
143
|
+
# `{ "page_index", "page_width", "page_height",
|
|
144
|
+
# "regions" => [ { "kind", "text", "bbox", "spans", "column_index" } ] }`.
|
|
145
|
+
# @param page [Integer] 0-based page index.
|
|
146
|
+
# @return [Hash] parsed structured page.
|
|
147
|
+
def extract_structured(page)
|
|
148
|
+
validate_page_index(page)
|
|
149
|
+
err = ::FFI::MemoryPointer.new(:int32)
|
|
150
|
+
ptr = Bindings.pdf_document_extract_structured_to_json(handle, page, err)
|
|
151
|
+
raise_for_code(err.read_int32, 'extract_structured')
|
|
152
|
+
json = StringMarshaller.from_c_string(ptr) || ''
|
|
153
|
+
|
|
154
|
+
require 'json'
|
|
155
|
+
JSON.parse(json)
|
|
156
|
+
end
|
|
157
|
+
|
|
141
158
|
# Auto-routed extraction for a single page (v0.3.51 #517).
|
|
142
159
|
# Returns native text where present, OCR'd text for scanned regions
|
|
143
160
|
# when the `ocr` feature is available, and gracefully falls back to
|
data/lib/pdf_oxide/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pdf_oxide
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.58
|
|
5
5
|
platform: x86_64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- PDF Oxide Contributors
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: ffi
|
|
@@ -86,14 +86,14 @@ dependencies:
|
|
|
86
86
|
requirements:
|
|
87
87
|
- - "~>"
|
|
88
88
|
- !ruby/object:Gem::Version
|
|
89
|
-
version: '
|
|
89
|
+
version: '3.9'
|
|
90
90
|
type: :development
|
|
91
91
|
prerelease: false
|
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
|
93
93
|
requirements:
|
|
94
94
|
- - "~>"
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
|
-
version: '
|
|
96
|
+
version: '3.9'
|
|
97
97
|
- !ruby/object:Gem::Dependency
|
|
98
98
|
name: simplecov-lcov
|
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|