pdf_oxide 0.3.57-x86_64-linux → 0.3.58-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8564e1a91ad5315587fa3517aae2345f7bf6e6ffdfef70b0043444e17ae4de6
4
- data.tar.gz: a3744d89c18df9f0efafaf3440fb5cf96284804ea25a995c9922d685180fb69b
3
+ metadata.gz: ab3a6f46409c93c527e742d0445c255930ea2d7fbfd97b0f52062c785f2e19e3
4
+ data.tar.gz: e74db8c8145f9eaeb1bdd56280bdafb09c631c3e1450a8c141ca42f03621fef2
5
5
  SHA512:
6
- metadata.gz: 8c768bbcd4fbcfb695476c83b0bf8ee3795f1a07683365b2fe3381d25c0af07df5edc4b74ab727cff97f9f8eb12700dd8938042650910ff36a0f9e8f985b6532
7
- data.tar.gz: efc95af786a82face1a97778fa40666c8a7ef9668bcb57523919709902c4c31ef348fed0e71e525ac9257eb2e98855c056516668b4d61e9744b77c92bac445e2
6
+ metadata.gz: 83d5e906902462c4969693816f27871c21aa34572c3fb5ef32d39d8754ae0a86562673b4b80e14a83a1241f51546a85911782645c41fad9ad7de0271aa0474f0
7
+ data.tar.gz: 252ecac3a9c645f2f00aa871bdba7c8e8818b88eca60246f46f09adf0016b9a3adb3e51d0e55b3445d0b7bd253660741205dc7908a137c5667d9d915db146a26
data/Gemfile CHANGED
@@ -9,7 +9,7 @@ group :development do
9
9
  gem 'rake', '~> 13.0'
10
10
  gem 'rspec', '~> 3.12'
11
11
  gem 'rubocop', '~> 1.86'
12
- gem 'rubocop-rspec', '~> 2.20'
12
+ gem 'rubocop-rspec', '~> 3.9'
13
13
  gem 'yard', '~> 0.9'
14
14
  gem 'simplecov', '~> 0.22'
15
15
  gem 'simplecov-lcov', '~> 0.8'
Binary file
@@ -77,6 +77,10 @@ module PdfOxide
77
77
  # bytes but never calls free_string → leaks one full-document buffer
78
78
  # per call.
79
79
  attach_function :pdf_document_extract_text, %i[pointer int32 pointer], :pointer
80
+ # char *pdf_document_extract_structured_to_json(void *handle, int32_t page_index, int32_t *error_code)
81
+ # Returns owned char* (serialized StructuredPage JSON) — bind as
82
+ # :pointer so the caller frees via StringMarshaller/free_string (#536).
83
+ attach_function :pdf_document_extract_structured_to_json, %i[pointer int32 pointer], :pointer
80
84
  attach_function :pdf_document_to_markdown, %i[pointer int32 pointer], :pointer
81
85
  attach_function :pdf_document_to_markdown_all, %i[pointer pointer], :pointer
82
86
  attach_function :pdf_document_to_html, %i[pointer int32 pointer], :pointer
@@ -138,6 +138,23 @@ module PdfOxide
138
138
  StringMarshaller.from_c_string(ptr) || ''
139
139
  end
140
140
 
141
+ # Extract a structured representation of a single page (#536).
142
+ # Returns the parsed `StructuredPage` JSON as a Hash:
143
+ # `{ "page_index", "page_width", "page_height",
144
+ # "regions" => [ { "kind", "text", "bbox", "spans", "column_index" } ] }`.
145
+ # @param page [Integer] 0-based page index.
146
+ # @return [Hash] parsed structured page.
147
+ def extract_structured(page)
148
+ validate_page_index(page)
149
+ err = ::FFI::MemoryPointer.new(:int32)
150
+ ptr = Bindings.pdf_document_extract_structured_to_json(handle, page, err)
151
+ raise_for_code(err.read_int32, 'extract_structured')
152
+ json = StringMarshaller.from_c_string(ptr) || ''
153
+
154
+ require 'json'
155
+ JSON.parse(json)
156
+ end
157
+
141
158
  # Auto-routed extraction for a single page (v0.3.51 #517).
142
159
  # Returns native text where present, OCR'd text for scanned regions
143
160
  # when the `ocr` feature is available, and gracefully falls back to
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PdfOxide
4
- VERSION = '0.3.57'
4
+ VERSION = '0.3.58'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf_oxide
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.57
4
+ version: 0.3.58
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - PDF Oxide Contributors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-05-30 00:00:00.000000000 Z
11
+ date: 2026-05-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '2.20'
89
+ version: '3.9'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '2.20'
96
+ version: '3.9'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: simplecov-lcov
99
99
  requirement: !ruby/object:Gem::Requirement