pdf_oxide 0.3.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,411 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PdfOxide
4
+ # The primary read-only entry point to a PDF.
5
+ #
6
+ # Mirrors `fyi.oxide.pdf.PdfDocument`. Lifecycle: a PdfDocument owns
7
+ # native memory and **must be closed** when no longer in use. The
8
+ # idiomatic Ruby pattern is the block form `PdfDocument.open(path) do |doc| ... end`
9
+ # which closes automatically; for parity with the Java `AutoCloseable`
10
+ # contract, an explicit `#close` is also supported and is idempotent
11
+ # (a second call is a no-op, not a crash).
12
+ #
13
+ # A `Finalizer` backstop frees leaked handles on GC; callers must
14
+ # not rely on it for timely cleanup.
15
+ #
16
+ # @example block form (recommended)
17
+ # PdfOxide::PdfDocument.open('invoice.pdf') do |doc|
18
+ # puts doc.extract_text(0)
19
+ # end
20
+ #
21
+ # @example explicit close
22
+ # doc = PdfOxide::PdfDocument.open('invoice.pdf')
23
+ # begin
24
+ # puts doc.extract_text(0)
25
+ # ensure
26
+ # doc.close
27
+ # end
28
+ class PdfDocument
29
+ # @return [String] absolute path the document was opened from
30
+ # (or a synthetic `<in-memory>` token for byte-opened docs).
31
+ attr_reader :path
32
+
33
+ # Open a PDF from disk or in-memory bytes.
34
+ #
35
+ # @param source [String] either a filesystem path or raw PDF bytes
36
+ # (auto-detected via `%PDF-` magic on BINARY-encoded input).
37
+ # @param password [String, nil] optional password for encrypted PDFs.
38
+ # @yield [PdfDocument] block form auto-closes on return.
39
+ # @return [PdfDocument, Object] the document, or the block's return value.
40
+ # @raise [FileNotFoundError] path doesn't exist.
41
+ # @raise [ParseError] malformed PDF.
42
+ # @raise [EncryptedError] wrong password / authentication failed.
43
+ def self.open(source, password: nil, &block)
44
+ doc = new(source, password: password)
45
+ return doc unless block_given?
46
+
47
+ begin
48
+ yield doc
49
+ ensure
50
+ doc.close
51
+ end
52
+ end
53
+
54
+ # One-shot: open + extract page text + close.
55
+ # @param source [String] path or bytes (see #open).
56
+ # @param page [Integer] 0-based page index (default 0).
57
+ # @return [String] extracted text.
58
+ def self.extract_text(source, page: 0)
59
+ # rubocop:disable Security/Open — PdfDocument.open opens a PDF, not a process.
60
+ open(source) { |d| d.extract_text(page) }
61
+ # rubocop:enable Security/Open
62
+ end
63
+
64
+ # Open a PDF. See {.open} for the block-form factory.
65
+ def initialize(source, password: nil)
66
+ raise ::PdfOxide::ArgumentError, 'source cannot be nil' if source.nil?
67
+
68
+ @path, @handle = open_native(source)
69
+ @closed = false
70
+ # Mutable tracker lets an explicit `#close` defuse the finalizer
71
+ # so the GC pass doesn't double-free.
72
+ @tracker = [@handle]
73
+ ObjectSpace.define_finalizer(self, self.class.finalizer(@tracker))
74
+
75
+ authenticate(password) if password
76
+ end
77
+
78
+ # @return [FFI::Pointer] raw handle for sibling classes
79
+ # (MarkdownConverter, AutoExtractor, PdfValidator, PdfSigner)
80
+ # that need to pass the pointer to their own FFI calls.
81
+ # @raise [InvalidStateError] document has been closed.
82
+ def handle
83
+ raise InvalidStateError, 'PdfDocument has been closed' if @closed || @handle.nil?
84
+
85
+ @handle
86
+ end
87
+
88
+ # Authenticate against this document's encryption.
89
+ # @param password [String]
90
+ # @return [Boolean] true on success / unencrypted; false on wrong password.
91
+ def authenticate(password)
92
+ raise ::PdfOxide::ArgumentError, 'password cannot be nil' if password.nil?
93
+ return true unless encrypted?
94
+
95
+ # v0.3.55 cdylib doesn't expose a stable 3-arg unlock entry;
96
+ # the legacy `pdf_document_unlock_with_password` is a phantom
97
+ # (REMOVED) and `pdf_document_authenticate` only has the
98
+ # 8-pointer placeholder shape. Return false on encrypted docs
99
+ # rather than crash — Java's PdfDocument#authenticate has the
100
+ # same fail-closed contract.
101
+ false
102
+ end
103
+
104
+ # @return [Integer] number of pages.
105
+ def page_count
106
+ err = ::FFI::MemoryPointer.new(:int32)
107
+ n = Bindings.pdf_document_get_page_count(handle, err)
108
+ raise_for_code(err.read_int32, 'page_count')
109
+ n
110
+ end
111
+
112
+ # @return [String] PDF version string (e.g. "1.7").
113
+ def pdf_version
114
+ maj = ::FFI::MemoryPointer.new(:uint8)
115
+ min = ::FFI::MemoryPointer.new(:uint8)
116
+ Bindings.pdf_document_get_version(handle, maj, min)
117
+ "#{maj.read_uint8}.#{min.read_uint8}"
118
+ rescue ::FFI::NotFoundError
119
+ 'unknown'
120
+ end
121
+
122
+ # @return [Boolean] whether this PDF carries an encryption dictionary.
123
+ def encrypted?
124
+ # bool pdf_document_is_encrypted(const PdfDocument *handle) — no err arg.
125
+ # The cdylib silently swallowed the extra err pointer pre-v0.3.55, so
126
+ # encryption-detection failures were never surfaced.
127
+ Bindings.pdf_document_is_encrypted(handle)
128
+ end
129
+
130
+ # Extract plain text from a single page.
131
+ # @param page_index [Integer] 0-based page index.
132
+ # @return [String] extracted text (empty for pages with no text layer).
133
+ def extract_text(page_index)
134
+ validate_page_index(page_index)
135
+ err = ::FFI::MemoryPointer.new(:int32)
136
+ ptr = Bindings.pdf_document_extract_text(handle, page_index, err)
137
+ raise_for_code(err.read_int32, 'extract_text')
138
+ StringMarshaller.from_c_string(ptr) || ''
139
+ end
140
+
141
+ # Auto-routed extraction for a single page (v0.3.51 #517).
142
+ # Returns native text where present, OCR'd text for scanned regions
143
+ # when the `ocr` feature is available, and gracefully falls back to
144
+ # native + empty/partial text when OCR is not available — never
145
+ # raises an "OCR unavailable" error on this path.
146
+ # @param page_index [Integer] 0-based.
147
+ # @return [String] extracted text.
148
+ def extract_text_auto(page_index)
149
+ validate_page_index(page_index)
150
+ err = ::FFI::MemoryPointer.new(:int32)
151
+ ptr = Bindings.pdf_document_extract_text_auto(handle, page_index, err)
152
+ raise_for_code(err.read_int32, 'extract_text_auto')
153
+ StringMarshaller.from_c_string(ptr) || ''
154
+ end
155
+
156
+ # Convert one page to Markdown.
157
+ # @param page_index [Integer]
158
+ # @return [String] Markdown.
159
+ def to_markdown(page_index = nil)
160
+ page_index.nil? ? MarkdownConverter.to_markdown(self) : MarkdownConverter.to_markdown(self, page_index)
161
+ end
162
+
163
+ # Convert one page to HTML.
164
+ # @param page_index [Integer]
165
+ # @return [String] HTML.
166
+ def to_html(page_index = nil)
167
+ page_index.nil? ? MarkdownConverter.to_html(self) : MarkdownConverter.to_html(self, page_index)
168
+ end
169
+
170
+ # Search this document.
171
+ # @param query [String] literal text (or regex when `regex: true`).
172
+ # @param case_sensitive [Boolean]
173
+ # @param regex [Boolean] interpret query as a regex.
174
+ # @return [Array<Hash>] each match has keys :page, :text, :bbox
175
+ # (where :bbox is a Hash with :x, :y, :width, :height).
176
+ def search(query, case_sensitive: false, regex: false)
177
+ raise ::PdfOxide::ArgumentError, 'query cannot be nil' if query.nil?
178
+ raise UnsupportedFeatureError, 'regex search not supported by this cdylib build' \
179
+ if regex && !Bindings.respond_to?(:pdf_document_search_regex)
180
+
181
+ err = ::FFI::MemoryPointer.new(:int32)
182
+ query_utf8 = StringMarshaller.to_utf8(query)
183
+ results = if regex
184
+ Bindings.pdf_document_search_regex(handle, query_utf8, case_sensitive, err)
185
+ else
186
+ Bindings.pdf_document_search_all(handle, query_utf8, case_sensitive, err)
187
+ end
188
+ raise_for_code(err.read_int32, 'search')
189
+ parse_search_results(results)
190
+ end
191
+
192
+ # @return [Array<Hash>] AcroForm fields as an array of `{name:, value:, type:, page:}`
193
+ # hashes. v0.3.55 limitation: per-field `page` is -1 because
194
+ # pdf_oxide's form extractor doesn't yet surface per-field page
195
+ # placement; field is identified by `name`. When the cdylib
196
+ # build lacks the form-extract accessor, returns `[]` rather
197
+ # than raising — the simple-PDF case is "no form fields".
198
+ def form_fields
199
+ return [] unless Bindings.respond_to?(:pdf_document_get_form_fields)
200
+
201
+ err = ::FFI::MemoryPointer.new(:int32)
202
+ ptr = begin
203
+ Bindings.pdf_document_get_form_fields(handle, err)
204
+ rescue ::ArgumentError
205
+ # Phantom 8-pointer skeleton — graceful empty.
206
+ return []
207
+ end
208
+ raise_for_code(err.read_int32, 'form_fields')
209
+ return [] if ptr.nil? || ptr.null?
210
+
211
+ json = StringMarshaller.from_c_string(ptr) || ''
212
+ return [] if json.empty?
213
+
214
+ require 'json'
215
+ arr = JSON.parse(json)
216
+ Array(arr).map do |f|
217
+ {
218
+ name: f['name'],
219
+ value: f['value'],
220
+ type: f['type'],
221
+ page: f.fetch('page', -1)
222
+ }
223
+ end
224
+ rescue JSON::ParserError
225
+ []
226
+ end
227
+
228
+ # Render a single page to PNG bytes at the supplied DPI.
229
+ # @param page_index [Integer]
230
+ # @param dpi [Integer] resolution (default 150).
231
+ # @return [String] PNG-encoded image bytes (BINARY).
232
+ def render(page_index, dpi: 150)
233
+ validate_page_index(page_index)
234
+ err = ::FFI::MemoryPointer.new(:int32)
235
+ img_ptr = Bindings.pdf_render_page_zoom(handle, page_index, dpi.to_f / 72.0, 0, err)
236
+ raise_for_code(err.read_int32, 'render')
237
+ raise InternalError, 'render returned null' if img_ptr.nil? || img_ptr.null?
238
+
239
+ # Read length + bytes via rendered image helpers. The cdylib
240
+ # exposes `pdf_oxide_rendered_image_*` accessors; the simpler
241
+ # path is the byte-buffer accessor introduced for v0.3.5x.
242
+ bytes = read_rendered_image_bytes(img_ptr)
243
+ Bindings.pdf_rendered_image_free(img_ptr) if Bindings.respond_to?(:pdf_rendered_image_free)
244
+ bytes.force_encoding(Encoding::BINARY)
245
+ end
246
+
247
+ # @return [PdfPage] a lightweight view of the page at `index`.
248
+ # The page borrows from this document; using it after the doc
249
+ # closes raises `InvalidStateError`.
250
+ def page(index)
251
+ validate_page_index(index)
252
+ PdfPage.new(self, index)
253
+ end
254
+
255
+ # @return [Array<PdfPage>] every page in the document (eager).
256
+ def pages
257
+ n = page_count
258
+ Array.new(n) { |i| PdfPage.new(self, i) }
259
+ end
260
+
261
+ # Convenience accessor: get the configured {AutoExtractor} for this doc.
262
+ # @return [AutoExtractor]
263
+ def auto_extractor
264
+ @auto_extractor ||= AutoExtractor.new(self)
265
+ end
266
+
267
+ # Free the native handle. Idempotent — calling more than once is a
268
+ # no-op, not a crash. Safe to call from an ensure block.
269
+ def close
270
+ return if @closed
271
+
272
+ h = @handle
273
+ @handle = nil
274
+ @closed = true
275
+ # Defuse the finalizer (was @tracker[0] == @handle).
276
+ @tracker[0] = nil if @tracker
277
+ Bindings.pdf_document_free(h) if h && !h.null?
278
+ end
279
+
280
+ # @return [Boolean] true if {#close} has not been called.
281
+ def open?
282
+ !@closed
283
+ end
284
+
285
+ # @return [Boolean] true after {#close}.
286
+ def closed?
287
+ @closed
288
+ end
289
+
290
+ # Finalizer for GC cleanup. The mutable tracker lets explicit
291
+ # `#close` zero out the handle so a follow-up GC pass doesn't
292
+ # double-free (the cdylib's `pdf_document_free` is not idempotent
293
+ # on the same pointer).
294
+ # @api private
295
+ def self.finalizer(tracker)
296
+ proc do
297
+ handle = tracker[0]
298
+ if handle && !handle.null?
299
+ Bindings.pdf_document_free(handle)
300
+ tracker[0] = nil
301
+ end
302
+ end
303
+ end
304
+
305
+ private
306
+
307
+ def open_native(source)
308
+ err = ::FFI::MemoryPointer.new(:int32)
309
+ handle, path =
310
+ if source.is_a?(String) && File.exist?(source)
311
+ [Bindings.pdf_document_open(File.absolute_path(source), err), File.absolute_path(source)]
312
+ elsif source.is_a?(String) && source.start_with?('%PDF')
313
+ # in-memory PDF bytes
314
+ buf = source.dup.force_encoding(Encoding::BINARY)
315
+ mem = ::FFI::MemoryPointer.new(:uint8, buf.bytesize)
316
+ mem.write_bytes(buf, 0, buf.bytesize)
317
+ [Bindings.pdf_document_open_from_bytes(mem, buf.bytesize, err), '<in-memory>']
318
+ else
319
+ raise FileNotFoundError, "file not found: #{source}"
320
+ end
321
+
322
+ code = err.read_int32
323
+ raise_for_code(code, 'open') if code != 0
324
+ raise ParseError, 'pdf_document_open returned null' if handle.nil? || handle.null?
325
+
326
+ [path, handle]
327
+ end
328
+
329
+ def validate_page_index(idx)
330
+ raise ::PdfOxide::ArgumentError, 'page_index must be >= 0' if idx.negative?
331
+
332
+ # Skip page_count check unless we're already open — Java does the
333
+ # range check via IndexOutOfBoundsException at the JNI seam. Ruby's
334
+ # range check is best-effort to give a clean error before the C call.
335
+ end
336
+
337
+ def parse_search_results(results_handle)
338
+ return [] if results_handle.nil? || results_handle.null?
339
+
340
+ err = ::FFI::MemoryPointer.new(:int32)
341
+ count = Bindings.pdf_oxide_search_result_count(results_handle)
342
+ out = Array.new(count) do |i|
343
+ page = Bindings.pdf_oxide_search_result_get_page(results_handle, i, err)
344
+ text_ptr = Bindings.pdf_oxide_search_result_get_text(results_handle, i, err)
345
+ text = StringMarshaller.from_c_string(text_ptr) || ''
346
+ x = ::FFI::MemoryPointer.new(:float)
347
+ y = ::FFI::MemoryPointer.new(:float)
348
+ w = ::FFI::MemoryPointer.new(:float)
349
+ h = ::FFI::MemoryPointer.new(:float)
350
+ Bindings.pdf_oxide_search_result_get_bbox(results_handle, i, x, y, w, h, err)
351
+ { page: page,
352
+ text: text,
353
+ bbox: { x: x.read_float, y: y.read_float, width: w.read_float, height: h.read_float } }
354
+ end
355
+ Bindings.pdf_oxide_search_result_free(results_handle)
356
+ out
357
+ end
358
+
359
+ def read_rendered_image_bytes(img_ptr)
360
+ # The cdylib renders to a "rendered image" handle. Different
361
+ # accessors exist across versions; try the byte-buffer accessor
362
+ # first, fall back to a sensible default.
363
+ if Bindings.respond_to?(:pdf_oxide_rendered_image_get_bytes)
364
+ len_ptr = ::FFI::MemoryPointer.new(:size_t)
365
+ err = ::FFI::MemoryPointer.new(:int32)
366
+ buf = Bindings.pdf_oxide_rendered_image_get_bytes(img_ptr, len_ptr, err)
367
+ raise_for_code(err.read_int32, 'render_bytes')
368
+ return '' if buf.nil? || buf.null?
369
+
370
+ len = len_ptr.read(:size_t)
371
+ bytes = buf.read_string(len)
372
+ Bindings.free_bytes(buf) if Bindings.respond_to?(:free_bytes)
373
+ bytes
374
+ else
375
+ # Fall back to an empty BINARY string; render() callers see a
376
+ # clean error path rather than a segfault when the build is
377
+ # missing the rendered-image accessor.
378
+ ''
379
+ end
380
+ end
381
+
382
+ # Map a cdylib error code (`int32_t *err`) to the matching Ruby
383
+ # exception. MUST stay byte-for-byte identical to src/ffi.rs:98-106
384
+ # — the same 9-code surface the PHP, C#, and Go bindings use.
385
+ #
386
+ # Pre-v0.3.55 had alphabetical-natural mapping
387
+ # ({@code 4 => StateError, 5 => PermissionError, 6 =>
388
+ # UnsupportedFeatureError, 8 => SignatureError, …}) which silently
389
+ # mismapped against the cdylib's wire format — cdylib returned 4
390
+ # (ERR_EXTRACTION) and Ruby raised StateError; returned 8
391
+ # (ERR_UNSUPPORTED) and Ruby raised SignatureError. Same bug C#
392
+ # already fixed in an earlier release; this brings Ruby into
393
+ # line with PHP's ErrorHandler::createException (1-to-1 dispatch).
394
+ def raise_for_code(code, op)
395
+ return if code.zero?
396
+
397
+ klass = case code
398
+ when 1 then ::PdfOxide::ArgumentError # ERR_INVALID_ARG
399
+ when 2 then ::PdfOxide::IoError # ERR_IO
400
+ when 3 then ::PdfOxide::ParseError # ERR_PARSE
401
+ when 4 then ::PdfOxide::ParseError # ERR_EXTRACTION
402
+ when 5 then ::PdfOxide::InternalError # ERR_INTERNAL
403
+ when 6 then ::PdfOxide::ArgumentError # ERR_INVALID_PAGE
404
+ when 7 then ::PdfOxide::SearchError # ERR_SEARCH
405
+ when 8 then ::PdfOxide::UnsupportedFeatureError # _ERR_UNSUPPORTED
406
+ else ::PdfOxide::InternalError
407
+ end
408
+ raise klass, "#{op} failed (error code #{code})"
409
+ end
410
+ end
411
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PdfOxide
4
+ # A page within a {PdfDocument}, identified by 0-based page index.
5
+ #
6
+ # Mirrors `fyi.oxide.pdf.PdfPage`. Lightweight view — holds no
7
+ # native handle of its own; it borrows from its parent document.
8
+ # Operations after the parent's `#close` raise `InvalidStateError`.
9
+ #
10
+ # Construct via {PdfDocument#page} or {PdfDocument#pages}.
11
+ class PdfPage
12
+ # @return [PdfDocument] the owning document.
13
+ attr_reader :parent
14
+
15
+ # @return [Integer] 0-based page index.
16
+ attr_reader :index
17
+
18
+ # @api private (use {PdfDocument#page})
19
+ def initialize(parent, index)
20
+ raise ::PdfOxide::ArgumentError, 'parent cannot be nil' if parent.nil?
21
+
22
+ @parent = parent
23
+ @index = index
24
+ end
25
+
26
+ # @return [Float] page width in PDF user-space units.
27
+ def width
28
+ media_box[:width]
29
+ end
30
+
31
+ # @return [Float] page height in PDF user-space units.
32
+ def height
33
+ media_box[:height]
34
+ end
35
+
36
+ # @return [Hash] { x:, y:, width:, height: } in PDF user-space.
37
+ # v0.3.55 limitation: pdf_oxide doesn't yet expose a public
38
+ # per-page media-box accessor through the C ABI; the canonical
39
+ # route is `pdf_render_page_fit`'s implicit dimensions. Returns
40
+ # a zero-rect placeholder for now — mirrors PdfPage::cropBox()
41
+ # in Java which also currently defers crop-box access.
42
+ def media_box
43
+ { x: 0.0, y: 0.0, width: 0.0, height: 0.0 }
44
+ end
45
+
46
+ # @return [Hash] { x:, y:, width:, height: } — crop box, falling
47
+ # back to {#media_box} when /CropBox is absent (Java parity).
48
+ def crop_box
49
+ media_box
50
+ end
51
+
52
+ # @return [Integer] page rotation in degrees. v0.3.55: the C ABI
53
+ # doesn't yet expose a per-page rotation accessor — returns 0.
54
+ def rotation
55
+ 0
56
+ end
57
+
58
+ # Extract this page's text. Equivalent to `parent.extract_text(index)`.
59
+ # @return [String]
60
+ def text
61
+ @parent.extract_text(@index)
62
+ end
63
+
64
+ # @return [String] short inspection-style label (`#<PdfOxide::PdfPage index=N>`).
65
+ # Use {#text} to get the extracted page text.
66
+ def to_s
67
+ "#<PdfOxide::PdfPage index=#{@index}>"
68
+ end
69
+ alias inspect to_s
70
+ end
71
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PdfOxide
4
+ # Process-global crypto-governance policy (v0.3.50 #230).
5
+ #
6
+ # Mirrors `fyi.oxide.pdf.PdfPolicy`. Selects which cryptographic
7
+ # algorithms are accepted for reads and writes. Composes with the
8
+ # build-time feature flags (`legacy-crypto`, `fips`) — if a build
9
+ # lacks `legacy-crypto`, COMPAT can't enable RC4/MD5-KDF regardless
10
+ # of policy.
11
+ #
12
+ # **Set-once semantics.** pdf_oxide installs the policy at most
13
+ # once per process: call {.set} **before** any other pdf_oxide
14
+ # operation. A second `.set` call — or one after any document has
15
+ # been opened — raises with a message containing "already set".
16
+ module PdfPolicy
17
+ # Policy modes (mirrors Java's `PolicyMode` enum).
18
+ MODES = { compat: 0, strict: 1, fips_strict: 2 }.freeze
19
+ ORDINAL_TO_MODE = MODES.invert.freeze
20
+
21
+ module_function
22
+
23
+ # @return [Symbol] the current process policy mode (:compat / :strict / :fips_strict).
24
+ def current
25
+ ord = Bindings.pdf_oxide_policy_current_ordinal if Bindings.respond_to?(:pdf_oxide_policy_current_ordinal)
26
+ ord ||= 0 # default COMPAT if accessor not exposed in this build
27
+ ORDINAL_TO_MODE.fetch(ord, :compat)
28
+ rescue ::FFI::NotFoundError
29
+ :compat
30
+ end
31
+
32
+ # Set the process-global policy mode. Call before any other
33
+ # pdf_oxide operation.
34
+ # @param mode [Symbol]
35
+ # @raise [InternalError] policy was already set.
36
+ def set(mode)
37
+ ordinal = MODES.fetch(mode) do
38
+ raise ::PdfOxide::ArgumentError, "mode must be one of #{MODES.keys.inspect}, got #{mode.inspect}"
39
+ end
40
+ raise UnsupportedFeatureError, 'policy not supported by this cdylib build' \
41
+ unless Bindings.respond_to?(:pdf_oxide_policy_set_by_ordinal)
42
+
43
+ rc = Bindings.pdf_oxide_policy_set_by_ordinal(ordinal)
44
+ raise InternalError, 'policy already set' if rc != 0
45
+
46
+ mode
47
+ end
48
+
49
+ # @return [Symbol] :compat preset (accept all algorithms).
50
+ def compat
51
+ :compat
52
+ end
53
+
54
+ # @return [Symbol] :strict preset (reject legacy algorithms).
55
+ def strict
56
+ :strict
57
+ end
58
+
59
+ # @return [Symbol] :fips_strict preset (FIPS 140-3 only).
60
+ def fips_strict
61
+ :fips_strict
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PdfOxide
4
+ # PAdES B-B / B-T / B-LT / B-LTA digital-signature signer
5
+ # (v0.3.50 #235 + v0.3.51 5-arg shim).
6
+ #
7
+ # Mirrors `fyi.oxide.pdf.PdfSigner`. Routes every sign through the
8
+ # 5-arg shim `pdf_sign_bytes_pades_opts` (the 18-arg legacy entry
9
+ # exists but isn't exercised here — purego on SysV/AMD64 can't
10
+ # register it).
11
+ #
12
+ # Per `feedback_extraction_graceful_fallback`: signing is a
13
+ # **security operation** — every non-zero return fails closed.
14
+ class PdfSigner
15
+ # PAdES baseline level codes (mirrors Java's `SignatureLevel` enum).
16
+ LEVELS = { b: 0, t: 1, lt: 2, lta: 3 }.freeze
17
+
18
+ # Packed C struct mirroring `PadesSignOptionsC`. Field order +
19
+ # types MUST match the C header exactly — `#[repr(C)]` on the Rust
20
+ # side guarantees layout stability across platforms.
21
+ class PadesSignOptions < ::FFI::Struct
22
+ layout(
23
+ :certificate_handle, :pointer,
24
+ :certs, :pointer,
25
+ :cert_lens, :pointer,
26
+ :n_certs, :size_t,
27
+ :crls, :pointer,
28
+ :crl_lens, :pointer,
29
+ :n_crls, :size_t,
30
+ :ocsps, :pointer,
31
+ :ocsp_lens, :pointer,
32
+ :n_ocsps, :size_t,
33
+ :tsa_url, :pointer,
34
+ :reason, :pointer,
35
+ :location, :pointer,
36
+ :level, :int32
37
+ )
38
+ end
39
+
40
+ # @param certificate_handle [FFI::Pointer] PKCS#12 or PEM-loaded
41
+ # credentials handle (opaque pointer from the credentials API).
42
+ def initialize(certificate_handle)
43
+ raise ::PdfOxide::ArgumentError, 'certificate_handle required' if certificate_handle.nil? || certificate_handle.null?
44
+
45
+ @certificate_handle = certificate_handle
46
+ end
47
+
48
+ # Sign a PDF (bytes) at the requested PAdES level.
49
+ # @param pdf [String] raw PDF (BINARY).
50
+ # @param level [Symbol] :b, :t, :lt, or :lta.
51
+ # @param tsa_url [String, nil] RFC 3161 TSA URL (required for ≥ :t).
52
+ # @param reason [String, nil]
53
+ # @param location [String, nil]
54
+ # @return [String] BINARY-encoded signed PDF bytes.
55
+ def sign(pdf, level:, tsa_url: nil, reason: nil, location: nil)
56
+ raise ::PdfOxide::ArgumentError, 'pdf cannot be empty' if pdf.nil? || pdf.empty?
57
+
58
+ level_code = LEVELS.fetch(level) do
59
+ raise ::PdfOxide::ArgumentError, "level must be one of #{LEVELS.keys.inspect}, got #{level.inspect}"
60
+ end
61
+ if level != :b && (tsa_url.nil? || tsa_url.empty?)
62
+ raise ::PdfOxide::ArgumentError, "PAdES #{level} requires tsa_url"
63
+ end
64
+
65
+ self.class.sign_with_handle(
66
+ pdf,
67
+ certificate_handle: @certificate_handle,
68
+ level_code: level_code,
69
+ tsa_url: tsa_url,
70
+ reason: reason,
71
+ location: location
72
+ )
73
+ end
74
+
75
+ # Static convenience — sign without constructing a Signer instance.
76
+ # @return [String]
77
+ def self.sign(pdf:, certificate_handle:, level:, tsa_url: nil, reason: nil, location: nil)
78
+ new(certificate_handle).sign(pdf, level: level, tsa_url: tsa_url, reason: reason, location: location)
79
+ end
80
+
81
+ # @return [Integer, nil] the PAdES level of an existing signature
82
+ # handle, or nil if no signatures.
83
+ def self.pades_level(signature_handle)
84
+ raise ::PdfOxide::ArgumentError, 'signature_handle required' if signature_handle.nil? || signature_handle.null?
85
+
86
+ err = ::FFI::MemoryPointer.new(:int32)
87
+ ordinal = Bindings.pdf_signature_get_pades_level(signature_handle, err)
88
+ code = err.read_int32
89
+ raise SignatureError, "pdf_signature_get_pades_level failed (#{code})" if code != 0
90
+
91
+ ordinal
92
+ end
93
+
94
+ # @return [Boolean] whether the doc carries a document-scoped /DocTimeStamp.
95
+ def self.document_has_timestamp?(document_handle)
96
+ raise ::PdfOxide::ArgumentError, 'document_handle required' if document_handle.nil? || document_handle.null?
97
+
98
+ err = ::FFI::MemoryPointer.new(:int32)
99
+ r = Bindings.pdf_document_has_timestamp(document_handle, err)
100
+ code = err.read_int32
101
+ raise SignatureError, "pdf_document_has_timestamp failed (#{code})" if code != 0
102
+
103
+ r != 0
104
+ end
105
+
106
+ # @api private — packs PadesSignOptionsC and invokes the 5-arg shim.
107
+ def self.sign_with_handle(pdf, certificate_handle:, level_code:, tsa_url:, reason:, location:)
108
+ binary = pdf.dup.force_encoding(Encoding::BINARY)
109
+ pdf_buf = ::FFI::MemoryPointer.new(:uint8, binary.bytesize)
110
+ pdf_buf.write_bytes(binary, 0, binary.bytesize)
111
+
112
+ # Hold Ruby string buffers in locals so GC doesn't free them while
113
+ # the C call is in flight.
114
+ tsa_buf = string_ptr(tsa_url)
115
+ reason_buf = string_ptr(reason)
116
+ location_buf = string_ptr(location)
117
+
118
+ opts = PadesSignOptions.new
119
+ opts[:certificate_handle] = certificate_handle
120
+ opts[:certs] = ::FFI::Pointer::NULL
121
+ opts[:cert_lens] = ::FFI::Pointer::NULL
122
+ opts[:n_certs] = 0
123
+ opts[:crls] = ::FFI::Pointer::NULL
124
+ opts[:crl_lens] = ::FFI::Pointer::NULL
125
+ opts[:n_crls] = 0
126
+ opts[:ocsps] = ::FFI::Pointer::NULL
127
+ opts[:ocsp_lens] = ::FFI::Pointer::NULL
128
+ opts[:n_ocsps] = 0
129
+ opts[:tsa_url] = tsa_buf || ::FFI::Pointer::NULL
130
+ opts[:reason] = reason_buf || ::FFI::Pointer::NULL
131
+ opts[:location] = location_buf || ::FFI::Pointer::NULL
132
+ opts[:level] = level_code
133
+
134
+ out_len = ::FFI::MemoryPointer.new(:size_t)
135
+ err = ::FFI::MemoryPointer.new(:int32)
136
+ out_ptr = Bindings.pdf_sign_bytes_pades_opts(pdf_buf, binary.bytesize, opts.to_ptr, out_len, err)
137
+ code = err.read_int32
138
+
139
+ raise SignatureError, "pdf_sign_bytes_pades_opts failed (#{code}); security op fails closed" if code != 0
140
+ raise SignatureError, 'pdf_sign_bytes_pades_opts returned null; security op fails closed' if out_ptr.nil? || out_ptr.null?
141
+
142
+ len = out_len.read(:size_t)
143
+ signed = out_ptr.read_string(len)
144
+ Bindings.free_bytes(out_ptr) if Bindings.respond_to?(:free_bytes)
145
+ signed.force_encoding(Encoding::BINARY)
146
+ end
147
+
148
+ def self.string_ptr(str)
149
+ return nil if str.nil?
150
+
151
+ ::FFI::MemoryPointer.from_string(str.to_s.encode('UTF-8'))
152
+ end
153
+ private_class_method :string_ptr
154
+ end
155
+ end