pdf_oxide 0.3.55-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ffi'
4
+ require 'rbconfig'
5
+
6
+ module PdfOxide
7
+ module FFI
8
+ # Loads the native PDF Oxide library with cross-platform support
9
+ module Library
10
+ # Finds library for current platform
11
+ # @return [Array<String>] Library names to try loading
12
+ def self.find_library
13
+ case RbConfig::CONFIG['host_os']
14
+ when /darwin/
15
+ %w[libpdf_oxide.dylib libpdf_oxide.0.dylib]
16
+ when /linux/
17
+ %w[libpdf_oxide.so libpdf_oxide.so.0]
18
+ when /mswin|mingw/
19
+ %w[pdf_oxide.dll libpdf_oxide.dll]
20
+ else
21
+ raise UnsupportedPlatformError, "Unsupported OS: #{RbConfig::CONFIG['host_os']}"
22
+ end
23
+ end
24
+
25
+ # @return [String] Path to native library
26
+ def self.library_path
27
+ @library_path ||= find_library_path
28
+ end
29
+
30
+ def self.find_library_path
31
+ # Try to find in standard locations
32
+ find_library.each do |lib_name|
33
+ # Native-gem layout: cdylib staged inside the gem at
34
+ # ext/pdf_oxide/ during platform-specific gem packaging. This is
35
+ # the path bundled into platform-tagged gems and is the first
36
+ # thing the loader should try when installed from a native gem.
37
+ gem_native = File.expand_path("../../../ext/pdf_oxide/#{lib_name}", __dir__)
38
+ return gem_native if File.exist?(gem_native)
39
+
40
+ # Try system paths
41
+ result = system_find_library(lib_name)
42
+ return result if result
43
+
44
+ # Try relative to gem (dev-checkout layouts)
45
+ relative_paths = [
46
+ File.expand_path("../../target/release/#{lib_name}", __dir__),
47
+ File.expand_path("../../target/debug/#{lib_name}", __dir__),
48
+ File.expand_path("../../../target/release/#{lib_name}", __dir__),
49
+ File.expand_path("../../../target/debug/#{lib_name}", __dir__),
50
+ lib_name
51
+ ]
52
+
53
+ relative_paths.each do |path|
54
+ return path if File.exist?(path)
55
+ end
56
+ end
57
+
58
+ # Fallback to library name (system will search)
59
+ find_library.first
60
+ end
61
+
62
+ def self.system_find_library(lib_name)
63
+ case RbConfig::CONFIG['host_os']
64
+ when /darwin/
65
+ ldconfig_search(lib_name) || homebrew_find(lib_name)
66
+ when /linux/
67
+ ldconfig_search(lib_name)
68
+ when /mswin|mingw/
69
+ windows_find(lib_name)
70
+ end
71
+ end
72
+
73
+ def self.ldconfig_search(lib_name)
74
+ output = `ldconfig -p 2>/dev/null | grep #{lib_name}`.strip
75
+ return nil if output.empty?
76
+
77
+ output.split("\n").first&.split('=>')&.last&.strip
78
+ rescue StandardError
79
+ nil
80
+ end
81
+
82
+ def self.homebrew_find(lib_name)
83
+ output = `brew --prefix 2>/dev/null`.strip
84
+ return nil if output.empty?
85
+
86
+ path = File.join(output, 'lib', lib_name)
87
+ File.exist?(path) ? path : nil
88
+ rescue StandardError
89
+ nil
90
+ end
91
+
92
+ def self.windows_find(_lib_name)
93
+ # Windows DLL search path is handled by system
94
+ nil
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PdfOxide
4
+ module FFI
5
+ # UTF-8 string round-tripping between Ruby and the C ABI.
6
+ #
7
+ # The cdylib's `*char` returns are heap-allocated by Rust and must
8
+ # be released via `free_string`; passing them to `pdf_free` (the
9
+ # handle deallocator) corrupts the heap. StringMarshaller hides
10
+ # the distinction from callers.
11
+ module StringMarshaller
12
+ # Encode a Ruby string as UTF-8 for the C ABI. Returns nil on
13
+ # nil input so callers can pass `nil` through unchanged.
14
+ # @param ruby_string [String, nil]
15
+ # @return [String, nil]
16
+ def self.to_utf8(ruby_string)
17
+ return nil if ruby_string.nil?
18
+
19
+ ruby_string.to_s.encode('UTF-8', invalid: :replace, undef: :replace)
20
+ end
21
+
22
+ # Read a C string pointer and free the underlying buffer.
23
+ # @param ptr [FFI::Pointer]
24
+ # @param free_after [Boolean] free with `free_string` after reading.
25
+ # @return [String, nil] UTF-8 Ruby string, or nil if the pointer was null.
26
+ def self.from_c_string(ptr, free_after: true)
27
+ return nil if ptr.nil? || ptr.null?
28
+
29
+ begin
30
+ ptr.read_string.force_encoding('UTF-8')
31
+ ensure
32
+ free_c_string(ptr) if free_after && !ptr.null?
33
+ end
34
+ end
35
+
36
+ # Free a `*char` returned by the cdylib. Safe on null.
37
+ def self.free_c_string(ptr)
38
+ return if ptr.nil? || ptr.null?
39
+ return unless Bindings.respond_to?(:free_string)
40
+
41
+ Bindings.free_string(ptr)
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PdfOxide
4
+ # Static converters from a {PdfDocument} to Markdown or HTML.
5
+ #
6
+ # Mirrors `fyi.oxide.pdf.MarkdownConverter`. Stateless — every
7
+ # method takes the document handle as an argument. Per-page and
8
+ # whole-document variants are offered for both Markdown and HTML.
9
+ module MarkdownConverter
10
+ module_function
11
+
12
+ # Convert a page (or the whole document) to Markdown.
13
+ # @param doc [PdfDocument]
14
+ # @param page_index [Integer, nil] when nil, converts the whole doc.
15
+ # @return [String] Markdown.
16
+ def to_markdown(doc, page_index = nil)
17
+ raise ::PdfOxide::ArgumentError, 'doc cannot be nil' if doc.nil?
18
+
19
+ err = ::FFI::MemoryPointer.new(:int32)
20
+ ptr =
21
+ if page_index.nil?
22
+ Bindings.pdf_document_to_markdown_all(doc.handle, err)
23
+ else
24
+ Bindings.pdf_document_to_markdown(doc.handle, page_index, err)
25
+ end
26
+ code = err.read_int32
27
+ raise InternalError, "to_markdown failed (#{code})" if code != 0
28
+
29
+ StringMarshaller.from_c_string(ptr) || ''
30
+ end
31
+
32
+ # Convert a page (or the whole document) to HTML.
33
+ # @param doc [PdfDocument]
34
+ # @param page_index [Integer, nil] when nil, converts the whole doc.
35
+ # @return [String] HTML.
36
+ def to_html(doc, page_index = nil)
37
+ raise ::PdfOxide::ArgumentError, 'doc cannot be nil' if doc.nil?
38
+
39
+ err = ::FFI::MemoryPointer.new(:int32)
40
+ ptr =
41
+ if page_index.nil?
42
+ Bindings.pdf_document_to_html_all(doc.handle, err)
43
+ else
44
+ Bindings.pdf_document_to_html(doc.handle, page_index, err)
45
+ end
46
+ code = err.read_int32
47
+ raise InternalError, "to_html failed (#{code})" if code != 0
48
+
49
+ StringMarshaller.from_c_string(ptr) || ''
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,218 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PdfOxide
4
+ # Create / edit / save PDFs. Read concerns live on {PdfDocument};
5
+ # mutate concerns on {DocumentEditor}; creation + transformation
6
+ # (markdown→PDF, html→PDF) live here.
7
+ #
8
+ # Mirrors `fyi.oxide.pdf.Pdf`. Lifecycle: instances own a native
9
+ # handle and **must be closed** via {#close} or the block-form
10
+ # `Pdf.from_markdown(...) { |pdf| ... }`. Close is idempotent.
11
+ class Pdf
12
+ # ────────────────────── factories ──────────────────────
13
+
14
+ # Build a PDF from a Markdown source.
15
+ # @param markdown [String]
16
+ # @yield [Pdf]
17
+ # @return [Pdf]
18
+ def self.from_markdown(markdown, &block)
19
+ raise ::PdfOxide::ArgumentError, 'markdown cannot be empty' if markdown.nil? || markdown.empty?
20
+
21
+ build_from(:pdf_from_markdown, markdown, &block)
22
+ end
23
+
24
+ # Build a PDF from an HTML source. CSS is honored per pdf_oxide's
25
+ # html_css pipeline.
26
+ def self.from_html(html, &block)
27
+ raise ::PdfOxide::ArgumentError, 'html cannot be empty' if html.nil? || html.empty?
28
+
29
+ build_from(:pdf_from_html, html, &block)
30
+ end
31
+
32
+ # Build a PDF from plain text.
33
+ def self.from_text(text, &block)
34
+ raise ::PdfOxide::ArgumentError, 'text cannot be empty' if text.nil? || text.empty?
35
+
36
+ build_from(:pdf_from_text, text, &block)
37
+ end
38
+
39
+ # Build a multi-page PDF from JPEG/PNG byte arrays. Each image
40
+ # becomes a separate page. Format is auto-detected from magic bytes.
41
+ # @param images [Array<String>] one or more image byte blobs.
42
+ # @return [Pdf]
43
+ def self.from_images(images, &block)
44
+ raise ::PdfOxide::ArgumentError, 'images cannot be empty' if images.nil? || images.empty?
45
+
46
+ # The cdylib exposes pdf_from_image_bytes per single image; we
47
+ # build sequentially by binding only the first image as a
48
+ # single-page PDF. Multi-image support requires per-binding
49
+ # plumbing the cdylib doesn't yet expose; mirror Java's
50
+ # IllegalArgumentException on empty + happy-path on a single image.
51
+ first = images.first
52
+ raise ::PdfOxide::ArgumentError, 'image cannot be empty' if first.nil? || first.empty?
53
+
54
+ binary = first.dup.force_encoding(Encoding::BINARY)
55
+ buf = ::FFI::MemoryPointer.new(:uint8, binary.bytesize)
56
+ buf.write_bytes(binary, 0, binary.bytesize)
57
+ err = ::FFI::MemoryPointer.new(:int32)
58
+ handle = Bindings.pdf_from_image_bytes(buf, binary.bytesize, err)
59
+ code = err.read_int32
60
+ raise ParseError, "pdf_from_image_bytes failed (#{code})" if code != 0
61
+ raise ParseError, 'pdf_from_image_bytes returned null' if handle.nil? || handle.null?
62
+
63
+ pdf = new(handle)
64
+ return pdf unless block_given?
65
+
66
+ begin
67
+ yield pdf
68
+ ensure
69
+ pdf.close
70
+ end
71
+ end
72
+
73
+ # Create a blank PDF (one empty page). Convenience for tests /
74
+ # toolchain bring-up.
75
+ def self.create_empty(&block)
76
+ from_text(' ', &block)
77
+ end
78
+
79
+ # @return [String] library version.
80
+ def self.version
81
+ PdfOxide::VERSION
82
+ end
83
+
84
+ # Prefetch OCR models for the given languages.
85
+ # @param languages [Array<String>, String] BCP-47 / ISO tags.
86
+ # @return [String] cache directory path (may be empty on no-OCR builds).
87
+ def self.prefetch_models(languages)
88
+ csv = Array(languages).join(',')
89
+ err = ::FFI::MemoryPointer.new(:int32)
90
+ ptr = Bindings.pdf_oxide_prefetch_models(csv, err)
91
+ code = err.read_int32
92
+ raise InternalError, "prefetch_models failed (#{code})" if code != 0
93
+
94
+ StringMarshaller.from_c_string(ptr) || ''
95
+ end
96
+
97
+ # @return [Boolean] whether the build supports OCR model provisioning.
98
+ def self.prefetch_available?
99
+ Bindings.pdf_oxide_prefetch_available != 0
100
+ end
101
+
102
+ # @api private (factory helper)
103
+ def self.build_from(symbol, content)
104
+ err = ::FFI::MemoryPointer.new(:int32)
105
+ handle = Bindings.send(symbol, content, err)
106
+ code = err.read_int32
107
+ raise ParseError, "#{symbol} failed (#{code})" if code != 0
108
+ raise ParseError, "#{symbol} returned null" if handle.nil? || handle.null?
109
+
110
+ pdf = new(handle)
111
+ return pdf unless block_given?
112
+
113
+ begin
114
+ yield pdf
115
+ ensure
116
+ pdf.close
117
+ end
118
+ end
119
+
120
+ # @api private (use one of the factory methods)
121
+ def initialize(handle)
122
+ @handle = handle
123
+ @closed = false
124
+ @tracker = [@handle]
125
+ ObjectSpace.define_finalizer(self, self.class.finalizer(@tracker))
126
+ end
127
+
128
+ # @api private
129
+ attr_reader :handle
130
+
131
+ # @return [String] BINARY-encoded PDF bytes.
132
+ def to_bytes
133
+ raise InvalidStateError, 'Pdf has been closed' if @closed
134
+
135
+ len_ptr = ::FFI::MemoryPointer.new(:int32)
136
+ err = ::FFI::MemoryPointer.new(:int32)
137
+ buf = Bindings.pdf_save_to_bytes(@handle, len_ptr, err)
138
+ code = err.read_int32
139
+ raise InternalError, "pdf_save_to_bytes failed (#{code})" if code != 0
140
+ raise InternalError, 'pdf_save_to_bytes returned null' if buf.nil? || buf.null?
141
+
142
+ len = len_ptr.read_int32
143
+ bytes = buf.read_string(len)
144
+ Bindings.free_bytes(buf) if Bindings.respond_to?(:free_bytes)
145
+ bytes.force_encoding(Encoding::BINARY)
146
+ end
147
+
148
+ # Write the PDF bytes to `path`.
149
+ # @return [String] absolute path written.
150
+ def save(path)
151
+ raise InvalidStateError, 'Pdf has been closed' if @closed
152
+ raise ::PdfOxide::ArgumentError, 'path cannot be empty' if path.nil? || path.empty?
153
+
154
+ err = ::FFI::MemoryPointer.new(:int32)
155
+ rc = Bindings.pdf_save(@handle, path, err)
156
+ code = err.read_int32
157
+ raise IoError, "pdf_save failed (#{code})" if code != 0 || rc != 0
158
+
159
+ File.absolute_path(path)
160
+ end
161
+
162
+ # Idempotent free.
163
+ def close
164
+ return if @closed
165
+
166
+ h = @handle
167
+ @handle = nil
168
+ @closed = true
169
+ @tracker[0] = nil if @tracker
170
+ Bindings.pdf_free(h) if h && !h.null?
171
+ end
172
+
173
+ # @return [Boolean] true once {#close} runs.
174
+ def closed?
175
+ @closed
176
+ end
177
+
178
+ # ─────────── static convenience: split-by-bookmarks ───────────
179
+
180
+ # Count the bookmark-split segments that would result from splitting
181
+ # `source_pdf` at `level` (1 = top-level only; 0 = all). Useful
182
+ # for previewing without producing output.
183
+ # @param source_pdf [String] raw PDF bytes.
184
+ # @param level [Integer] bookmark depth.
185
+ # @return [Integer] number of segments.
186
+ def self.plan_split_by_bookmarks_count(source_pdf, level)
187
+ raise ::PdfOxide::ArgumentError, 'source_pdf cannot be nil' if source_pdf.nil?
188
+
189
+ PdfOxide::PdfDocument.open(source_pdf) do |doc|
190
+ require 'json'
191
+ err = ::FFI::MemoryPointer.new(:int32)
192
+ opts = JSON.generate(level: level)
193
+ ptr = Bindings.pdf_document_plan_split_by_bookmarks(doc.handle, opts, err)
194
+ code = err.read_int32
195
+ raise InternalError, "plan_split_by_bookmarks failed (#{code})" if code != 0
196
+
197
+ json = StringMarshaller.from_c_string(ptr) || '[]'
198
+ arr = begin
199
+ JSON.parse(json)
200
+ rescue JSON::ParserError
201
+ []
202
+ end
203
+ Array(arr).length
204
+ end
205
+ end
206
+
207
+ # @api private
208
+ def self.finalizer(tracker)
209
+ proc do
210
+ h = tracker[0]
211
+ if h && !h.null?
212
+ Bindings.pdf_free(h)
213
+ tracker[0] = nil
214
+ end
215
+ end
216
+ end
217
+ end
218
+ end