rpdfium 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,982 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ffi"
4
+ require "rbconfig"
5
+
6
+ module Rpdfium
7
+ # Layer 1: bindings FFI grezzi alle API C di PDFium.
8
+ # Mappa 1:1 con i nomi originali. Usare le classi wrapper per il codice
9
+ # applicativo. Le API "Experimental" di PDFium sono marcate nei commenti:
10
+ # in teoria potrebbero cambiare, in pratica sono stabili da anni.
11
+ module Raw
12
+ extend FFI::Library
13
+
14
+ # Costruisce la lista di candidati che `ffi_lib` proverà in ordine.
15
+ #
16
+ # ATTENZIONE: FFI auto-appende l'estensione "naturale" della piattaforma
17
+ # (.dylib su macOS, .so su linux, .dll su windows) quando il path passato
18
+ # non termina già con un'estensione conosciuta. Quindi se passiamo
19
+ # `libpdfium.so` su macOS, FFI cerca `libpdfium.so.dylib` — assurdo ma
20
+ # documentato. Per evitarlo, filtriamo i nomi system_library_names per
21
+ # OS host.
22
+ #
23
+ # Inoltre: ENV["PDFIUM_LIBRARY_PATH"] e Rpdfium::Binary.library_path sono
24
+ # path ASSOLUTI/ESPLICITI: se non vengono trovati, NON facciamo fallback
25
+ # a nomi di sistema. Restituiamo subito un array di un solo path: in
26
+ # quel caso ffi_lib o riesce subito, o lancia LoadError chiaro
27
+ # (è ciò che vuole l'utente — gli ha dato un path esplicito).
28
+ def self.candidate_paths
29
+ explicit = ENV["PDFIUM_LIBRARY_PATH"]
30
+ return [explicit] if explicit && !explicit.empty?
31
+
32
+ if defined?(Rpdfium::Binary) && Rpdfium::Binary.respond_to?(:library_path)
33
+ path = Rpdfium::Binary.library_path
34
+ return [path] if path && !path.empty?
35
+ end
36
+
37
+ system_library_names
38
+ end
39
+
40
+ # Nomi "di sistema" filtrati per OS host. Manteniamo `pdfium` /
41
+ # `libpdfium` (senza estensione) per primi: FFI auto-appende l'ext giusta.
42
+ # I nomi con estensione vengono SOLO se matchano l'OS host, così evitiamo
43
+ # il bug di doppia estensione.
44
+ def self.system_library_names
45
+ base = %w[pdfium libpdfium]
46
+ host = host_os
47
+ ext_specific = case host
48
+ when :macos then %w[libpdfium.dylib]
49
+ when :linux then %w[libpdfium.so]
50
+ when :windows then %w[pdfium.dll libpdfium.dll]
51
+ else []
52
+ end
53
+ base + ext_specific
54
+ end
55
+
56
+ def self.host_os
57
+ case RbConfig::CONFIG["host_os"]
58
+ when /darwin/ then :macos
59
+ when /linux/ then :linux
60
+ when /mswin|mingw|cygwin/ then :windows
61
+ end
62
+ end
63
+
64
+ @native_loaded = false
65
+ @load_error = nil
66
+
67
+ def self.native_loaded?; @native_loaded; end
68
+ def self.load_error; @load_error; end
69
+
70
+ begin
71
+ ffi_lib(*candidate_paths)
72
+ ffi_convention :default # cdecl ovunque, anche su Win64 (build bblanchon)
73
+ @native_loaded = true
74
+ rescue ::LoadError, ::RuntimeError => e
75
+ # Cadiamo in modalità "stub": le attach_function generano stub che
76
+ # sollevano Rpdfium::LoadError alla prima invocazione. Permette di
77
+ # caricare la gemma per usare i moduli puri-Ruby (Edges, Cells, PNG)
78
+ # senza dover avere PDFium installato.
79
+ @load_error = e
80
+ ffi_lib_flags :now # no-op senza ffi_lib, ma documenta intent
81
+ end
82
+
83
+ # Wrap di attach_function tollerante: se il binding fallisce (libreria
84
+ # non caricata, simbolo non presente in questa versione di PDFium),
85
+ # genera comunque un metodo che alza un errore chiaro al call site,
86
+ # invece di far esplodere il `require`.
87
+ def self.attach_function(name, *args)
88
+ super
89
+ rescue FFI::NotFoundError, RuntimeError => e
90
+ define_singleton_method(name) do |*_a|
91
+ raise Rpdfium::LoadError,
92
+ "PDFium symbol #{name} not available: #{e.message}"
93
+ end
94
+ end
95
+
96
+ unless @native_loaded
97
+ # Override di attach_function quando la libreria non si è caricata:
98
+ # non chiamare super (che esploderebbe), genera direttamente lo stub.
99
+ def self.attach_function(name, *_args)
100
+ err = @load_error
101
+ define_singleton_method(name) do |*_a|
102
+ raise Rpdfium::LoadError, <<~MSG.strip
103
+ PDFium native library not loaded.
104
+ Set ENV["PDFIUM_LIBRARY_PATH"] to a valid libpdfium.{so,dylib,dll},
105
+ or install the rpdfium-binary gem (when released).
106
+ Original error: #{err.message}
107
+ MSG
108
+ end
109
+ end
110
+ end
111
+
112
+ # =========================================================================
113
+ # Tipi opachi
114
+ # =========================================================================
115
+ typedef :pointer, :FPDF_DOCUMENT
116
+ typedef :pointer, :FPDF_PAGE
117
+ typedef :pointer, :FPDF_TEXTPAGE
118
+ typedef :pointer, :FPDF_BITMAP
119
+ typedef :pointer, :FPDF_PAGEOBJECT
120
+ typedef :pointer, :FPDF_PAGEOBJECTMARK
121
+ typedef :pointer, :FPDF_PATHSEGMENT
122
+ typedef :pointer, :FPDF_FONT
123
+ typedef :pointer, :FPDF_ANNOTATION
124
+ typedef :pointer, :FPDF_FORMHANDLE
125
+ typedef :pointer, :FPDF_BOOKMARK
126
+ typedef :pointer, :FPDF_DEST
127
+ typedef :pointer, :FPDF_ACTION
128
+ typedef :pointer, :FPDF_LINK
129
+ typedef :pointer, :FPDF_GLYPHPATH
130
+ typedef :pointer, :FPDF_SCHHANDLE
131
+ typedef :pointer, :FPDF_ATTACHMENT
132
+ typedef :pointer, :FPDF_STRUCTTREE
133
+ typedef :pointer, :FPDF_STRUCTELEMENT
134
+ typedef :int, :FPDF_BOOL
135
+ typedef :ushort, :FPDF_WCHAR
136
+
137
+ # =========================================================================
138
+ # Strutture C
139
+ # =========================================================================
140
+ class FS_RECTF < FFI::Struct
141
+ layout :left, :float,
142
+ :top, :float,
143
+ :right, :float,
144
+ :bottom, :float
145
+ end
146
+
147
+ class FS_MATRIX < FFI::Struct
148
+ # PDF matrix: [a b 0; c d 0; e f 1] (row-major in PDF; FFI segue ordine campi)
149
+ layout :a, :float, :b, :float,
150
+ :c, :float, :d, :float,
151
+ :e, :float, :f, :float
152
+ end
153
+
154
+ class FS_POINTF < FFI::Struct
155
+ layout :x, :float, :y, :float
156
+ end
157
+
158
+ class FS_SIZEF < FFI::Struct
159
+ layout :width, :float, :height, :float
160
+ end
161
+
162
+ class FS_QUADPOINTSF < FFI::Struct
163
+ layout :x1, :float, :y1, :float,
164
+ :x2, :float, :y2, :float,
165
+ :x3, :float, :y3, :float,
166
+ :x4, :float, :y4, :float
167
+ end
168
+
169
+ class FPDF_IMAGEOBJ_METADATA < FFI::Struct
170
+ layout :width, :uint,
171
+ :height, :uint,
172
+ :horizontal_dpi, :float,
173
+ :vertical_dpi, :float,
174
+ :bits_per_pixel, :uint,
175
+ :colorspace, :int,
176
+ :marked_content_id, :int
177
+ end
178
+
179
+ # =========================================================================
180
+ # Costanti
181
+ # =========================================================================
182
+ # Bitmap formats
183
+ FPDFBitmap_Unknown = 0
184
+ FPDFBitmap_Gray = 1
185
+ FPDFBitmap_BGR = 2
186
+ FPDFBitmap_BGRx = 3
187
+ FPDFBitmap_BGRA = 4
188
+
189
+ # Render flags (bit fields)
190
+ FPDF_ANNOT = 0x01
191
+ FPDF_LCD_TEXT = 0x02
192
+ FPDF_NO_NATIVETEXT = 0x04
193
+ FPDF_GRAYSCALE = 0x08
194
+ FPDF_REVERSE_BYTE_ORDER = 0x10 # → RGBA invece di BGRA
195
+ FPDF_NO_GDIPLUS = 0x40
196
+ FPDF_PRINTING = 0x800
197
+ FPDF_RENDER_NO_SMOOTHTEXT = 0x1000
198
+ FPDF_RENDER_NO_SMOOTHIMAGE = 0x2000
199
+ FPDF_RENDER_NO_SMOOTHPATH = 0x4000
200
+
201
+ # Page object types
202
+ PAGEOBJ_UNKNOWN = 0
203
+ PAGEOBJ_TEXT = 1
204
+ PAGEOBJ_PATH = 2
205
+ PAGEOBJ_IMAGE = 3
206
+ PAGEOBJ_SHADING = 4
207
+ PAGEOBJ_FORM = 5
208
+
209
+ # Path segment types
210
+ SEGMENT_UNKNOWN = -1
211
+ SEGMENT_LINETO = 0
212
+ SEGMENT_BEZIERTO = 1
213
+ SEGMENT_MOVETO = 2
214
+
215
+ # Path fill mode
216
+ FILLMODE_NONE = 0
217
+ FILLMODE_ALTERNATE = 1
218
+ FILLMODE_WINDING = 2
219
+
220
+ # Text render modes
221
+ TEXT_RENDERMODE_FILL = 0
222
+ TEXT_RENDERMODE_STROKE = 1
223
+ TEXT_RENDERMODE_FILL_STROKE = 2
224
+ TEXT_RENDERMODE_INVISIBLE = 3
225
+
226
+ # Annotation subtypes (PDF spec 12.5.6)
227
+ FPDF_ANNOT_UNKNOWN = 0
228
+ FPDF_ANNOT_TEXT = 1
229
+ FPDF_ANNOT_LINK = 2
230
+ FPDF_ANNOT_FREETEXT = 3
231
+ FPDF_ANNOT_LINE = 4
232
+ FPDF_ANNOT_SQUARE = 5
233
+ FPDF_ANNOT_CIRCLE = 6
234
+ FPDF_ANNOT_HIGHLIGHT = 9
235
+ FPDF_ANNOT_UNDERLINE = 10
236
+ FPDF_ANNOT_SQUIGGLY = 11
237
+ FPDF_ANNOT_STRIKEOUT = 12
238
+ FPDF_ANNOT_STAMP = 13
239
+ FPDF_ANNOT_INK = 15
240
+ FPDF_ANNOT_POPUP = 16
241
+ FPDF_ANNOT_FILEATTACHMENT = 17
242
+ FPDF_ANNOT_WIDGET = 20
243
+ FPDF_ANNOT_REDACT = 27
244
+
245
+ ANNOT_SUBTYPE_NAMES = {
246
+ FPDF_ANNOT_TEXT => "Text", FPDF_ANNOT_LINK => "Link",
247
+ FPDF_ANNOT_FREETEXT => "FreeText", FPDF_ANNOT_LINE => "Line",
248
+ FPDF_ANNOT_SQUARE => "Square", FPDF_ANNOT_CIRCLE => "Circle",
249
+ FPDF_ANNOT_HIGHLIGHT => "Highlight", FPDF_ANNOT_UNDERLINE => "Underline",
250
+ FPDF_ANNOT_SQUIGGLY => "Squiggly", FPDF_ANNOT_STRIKEOUT => "StrikeOut",
251
+ FPDF_ANNOT_STAMP => "Stamp", FPDF_ANNOT_INK => "Ink",
252
+ FPDF_ANNOT_POPUP => "Popup",
253
+ FPDF_ANNOT_FILEATTACHMENT => "FileAttachment",
254
+ FPDF_ANNOT_WIDGET => "Widget", FPDF_ANNOT_REDACT => "Redact"
255
+ }.freeze
256
+
257
+ # Form field types (per widget annotations)
258
+ FPDF_FORMFIELD_UNKNOWN = 0
259
+ FPDF_FORMFIELD_PUSHBUTTON = 1
260
+ FPDF_FORMFIELD_CHECKBOX = 2
261
+ FPDF_FORMFIELD_RADIOBUTTON = 3
262
+ FPDF_FORMFIELD_COMBOBOX = 4
263
+ FPDF_FORMFIELD_LISTBOX = 5
264
+ FPDF_FORMFIELD_TEXTFIELD = 6
265
+ FPDF_FORMFIELD_SIGNATURE = 7
266
+
267
+ # Search flags
268
+ FPDF_MATCHCASE = 0x01
269
+ FPDF_MATCHWHOLEWORD = 0x02
270
+ FPDF_CONSECUTIVE = 0x04
271
+
272
+ # Form types (FPDF_GetFormType)
273
+ FORMTYPE_NONE = 0
274
+ FORMTYPE_ACRO_FORM = 1
275
+ FORMTYPE_XFA_FULL = 2
276
+ FORMTYPE_XFA_FOREGROUND = 3
277
+
278
+ # =========================================================================
279
+ # Library lifecycle
280
+ # =========================================================================
281
+ attach_function :FPDF_InitLibrary, [], :void
282
+ attach_function :FPDF_DestroyLibrary, [], :void
283
+ attach_function :FPDF_GetLastError, [], :ulong
284
+
285
+ # =========================================================================
286
+ # Document
287
+ # =========================================================================
288
+ attach_function :FPDF_LoadDocument,
289
+ %i[string string], :FPDF_DOCUMENT
290
+ attach_function :FPDF_LoadMemDocument64,
291
+ %i[pointer size_t string], :FPDF_DOCUMENT
292
+ attach_function :FPDF_CloseDocument, %i[FPDF_DOCUMENT], :void
293
+ attach_function :FPDF_GetPageCount, %i[FPDF_DOCUMENT], :int
294
+ attach_function :FPDF_GetDocPermissions, %i[FPDF_DOCUMENT], :ulong
295
+ attach_function :FPDF_GetSecurityHandlerRevision, %i[FPDF_DOCUMENT], :int
296
+ attach_function :FPDF_GetFileVersion,
297
+ %i[FPDF_DOCUMENT pointer], :FPDF_BOOL
298
+ attach_function :FPDF_GetFormType, %i[FPDF_DOCUMENT], :int
299
+
300
+ # Metadata: FPDF_GetMetaText(doc, "Title"|"Author"|"Subject"|"Keywords"|
301
+ # "Creator"|"Producer"|"CreationDate"|"ModDate")
302
+ attach_function :FPDF_GetMetaText,
303
+ %i[FPDF_DOCUMENT string pointer ulong], :ulong
304
+
305
+ # Page label (PDF spec: roman/letter labelling)
306
+ attach_function :FPDF_GetPageLabel,
307
+ %i[FPDF_DOCUMENT int pointer ulong], :ulong
308
+
309
+ # =========================================================================
310
+ # Pages
311
+ # =========================================================================
312
+ attach_function :FPDF_LoadPage, %i[FPDF_DOCUMENT int], :FPDF_PAGE
313
+ attach_function :FPDF_ClosePage, %i[FPDF_PAGE], :void
314
+ attach_function :FPDF_GetPageWidthF, %i[FPDF_PAGE], :float
315
+ attach_function :FPDF_GetPageHeightF, %i[FPDF_PAGE], :float
316
+ attach_function :FPDF_GetPageBoundingBox,
317
+ %i[FPDF_PAGE pointer], :FPDF_BOOL
318
+ attach_function :FPDFPage_GetRotation, %i[FPDF_PAGE], :int
319
+ attach_function :FPDFPage_HasTransparency, %i[FPDF_PAGE], :FPDF_BOOL
320
+
321
+ # CropBox / MediaBox / BleedBox / TrimBox / ArtBox
322
+ %i[FPDFPage_GetMediaBox FPDFPage_GetCropBox FPDFPage_GetBleedBox
323
+ FPDFPage_GetTrimBox FPDFPage_GetArtBox].each do |fn|
324
+ attach_function fn, %i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
325
+ end
326
+
327
+ # =========================================================================
328
+ # Text extraction
329
+ # =========================================================================
330
+ attach_function :FPDFText_LoadPage, %i[FPDF_PAGE], :FPDF_TEXTPAGE
331
+ attach_function :FPDFText_ClosePage, %i[FPDF_TEXTPAGE], :void
332
+ attach_function :FPDFText_CountChars, %i[FPDF_TEXTPAGE], :int
333
+ attach_function :FPDFText_GetUnicode, %i[FPDF_TEXTPAGE int], :uint
334
+ attach_function :FPDFText_GetFontSize, %i[FPDF_TEXTPAGE int], :double
335
+ attach_function :FPDFText_GetFontWeight, %i[FPDF_TEXTPAGE int], :int
336
+ attach_function :FPDFText_GetFontInfo,
337
+ %i[FPDF_TEXTPAGE int pointer ulong pointer], :ulong
338
+ # NOTE: FPDFText_GetTextRenderMode(text_page, char_index) è stato RIMOSSO
339
+ # da PDFium in chromium/6611 (luglio 2024). Il rimpiazzo è in due passi:
340
+ # 1. FPDFText_GetTextObject(text_page, char_index) → FPDF_PAGEOBJECT
341
+ # 2. FPDFTextObj_GetTextRenderMode(page_object) → int
342
+ # Wrapper di alto livello: vedi Page#chars (campo :render_mode).
343
+ # Riferimento: pypdfium2 issue #335, pdfium-render issue #151.
344
+ attach_function :FPDFText_GetTextObject,
345
+ %i[FPDF_TEXTPAGE int], :FPDF_PAGEOBJECT
346
+ attach_function :FPDFText_GetCharBox,
347
+ %i[FPDF_TEXTPAGE int pointer pointer pointer pointer],
348
+ :FPDF_BOOL
349
+ # "Loose" char box: bbox proporzionale alla font size, più stabile per layout
350
+ attach_function :FPDFText_GetLooseCharBox,
351
+ %i[FPDF_TEXTPAGE int pointer], :FPDF_BOOL
352
+ attach_function :FPDFText_GetMatrix,
353
+ %i[FPDF_TEXTPAGE int pointer], :FPDF_BOOL
354
+ attach_function :FPDFText_GetCharOrigin,
355
+ %i[FPDF_TEXTPAGE int pointer pointer], :FPDF_BOOL
356
+ attach_function :FPDFText_GetCharAngle,
357
+ %i[FPDF_TEXTPAGE int], :float
358
+ attach_function :FPDFText_HasUnicodeMapError,
359
+ %i[FPDF_TEXTPAGE int], :int
360
+ attach_function :FPDFText_IsGenerated, %i[FPDF_TEXTPAGE int], :int
361
+ attach_function :FPDFText_IsHyphen, %i[FPDF_TEXTPAGE int], :int
362
+ attach_function :FPDFText_GetText,
363
+ %i[FPDF_TEXTPAGE int int pointer], :int
364
+ attach_function :FPDFText_GetBoundedText,
365
+ %i[FPDF_TEXTPAGE double double double double pointer int],
366
+ :int
367
+ attach_function :FPDFText_CountRects,
368
+ %i[FPDF_TEXTPAGE int int], :int
369
+ attach_function :FPDFText_GetRect,
370
+ %i[FPDF_TEXTPAGE int pointer pointer pointer pointer],
371
+ :FPDF_BOOL
372
+
373
+ # =========================================================================
374
+ # Search
375
+ # =========================================================================
376
+ attach_function :FPDFText_FindStart,
377
+ %i[FPDF_TEXTPAGE pointer ulong int], :FPDF_SCHHANDLE
378
+ attach_function :FPDFText_FindNext, %i[FPDF_SCHHANDLE], :FPDF_BOOL
379
+ attach_function :FPDFText_FindPrev, %i[FPDF_SCHHANDLE], :FPDF_BOOL
380
+ attach_function :FPDFText_GetSchResultIndex, %i[FPDF_SCHHANDLE], :int
381
+ attach_function :FPDFText_GetSchCount, %i[FPDF_SCHHANDLE], :int
382
+ attach_function :FPDFText_FindClose, %i[FPDF_SCHHANDLE], :void
383
+
384
+ # =========================================================================
385
+ # Bitmap & rendering
386
+ # =========================================================================
387
+ attach_function :FPDFBitmap_Create, %i[int int int], :FPDF_BITMAP
388
+ attach_function :FPDFBitmap_CreateEx,
389
+ %i[int int int pointer int], :FPDF_BITMAP
390
+ attach_function :FPDFBitmap_Destroy, %i[FPDF_BITMAP], :void
391
+ attach_function :FPDFBitmap_FillRect,
392
+ %i[FPDF_BITMAP int int int int ulong], :void
393
+ attach_function :FPDFBitmap_GetBuffer, %i[FPDF_BITMAP], :pointer
394
+ attach_function :FPDFBitmap_GetWidth, %i[FPDF_BITMAP], :int
395
+ attach_function :FPDFBitmap_GetHeight, %i[FPDF_BITMAP], :int
396
+ attach_function :FPDFBitmap_GetStride, %i[FPDF_BITMAP], :int
397
+ attach_function :FPDFBitmap_GetFormat, %i[FPDF_BITMAP], :int
398
+ attach_function :FPDF_RenderPageBitmap,
399
+ %i[FPDF_BITMAP FPDF_PAGE int int int int int int],
400
+ :void
401
+ # Rendering con matrice 2x3 + clipping (per scaling/rotation arbitraria)
402
+ attach_function :FPDF_RenderPageBitmapWithMatrix,
403
+ %i[FPDF_BITMAP FPDF_PAGE pointer pointer int],
404
+ :void
405
+
406
+ # =========================================================================
407
+ # Page objects (generic)
408
+ # =========================================================================
409
+ attach_function :FPDFPage_CountObjects, %i[FPDF_PAGE], :int
410
+ attach_function :FPDFPage_GetObject, %i[FPDF_PAGE int], :FPDF_PAGEOBJECT
411
+ attach_function :FPDFPageObj_GetType, %i[FPDF_PAGEOBJECT], :int
412
+ attach_function :FPDFPageObj_GetBounds,
413
+ %i[FPDF_PAGEOBJECT pointer pointer pointer pointer],
414
+ :FPDF_BOOL
415
+ attach_function :FPDFPageObj_GetMatrix,
416
+ %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
417
+ attach_function :FPDFPageObj_GetFillColor,
418
+ %i[FPDF_PAGEOBJECT pointer pointer pointer pointer],
419
+ :FPDF_BOOL
420
+ attach_function :FPDFPageObj_GetStrokeColor,
421
+ %i[FPDF_PAGEOBJECT pointer pointer pointer pointer],
422
+ :FPDF_BOOL
423
+ attach_function :FPDFPageObj_GetStrokeWidth,
424
+ %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
425
+ attach_function :FPDFPageObj_GetLineCap, %i[FPDF_PAGEOBJECT], :int
426
+ attach_function :FPDFPageObj_GetLineJoin, %i[FPDF_PAGEOBJECT], :int
427
+
428
+ # =========================================================================
429
+ # Form XObjects: contenitori che incapsulano grafica (linee, rect, testo)
430
+ # come "subroutine grafica" riutilizzabile. Nei PDF generati da gestionali
431
+ # (TeamSystem, Zucchetti, ...) e da molti template Word/Excel, l'INTERA
432
+ # pagina è un singolo Form XObject. Senza discendervi dentro, non si
433
+ # vedono linee/rect/chars. Cf. PDF Spec 1.7 §8.10.
434
+ #
435
+ # Dopo FPDFFormObj_GetObject(form, i) si ottiene un FPDF_PAGEOBJECT child
436
+ # le cui coordinate sono nel sistema del form. La trasformazione al
437
+ # sistema-pagina si ottiene da FPDFPageObj_GetMatrix(form_obj, &matrix).
438
+ # =========================================================================
439
+ attach_function :FPDFFormObj_CountObjects, %i[FPDF_PAGEOBJECT], :int
440
+ attach_function :FPDFFormObj_GetObject,
441
+ %i[FPDF_PAGEOBJECT ulong], :FPDF_PAGEOBJECT
442
+
443
+ # =========================================================================
444
+ # Path segments — fondamentali per detection linee tabella
445
+ # =========================================================================
446
+ attach_function :FPDFPath_CountSegments, %i[FPDF_PAGEOBJECT], :int
447
+ attach_function :FPDFPath_GetPathSegment,
448
+ %i[FPDF_PAGEOBJECT int], :FPDF_PATHSEGMENT
449
+ attach_function :FPDFPath_GetDrawMode,
450
+ %i[FPDF_PAGEOBJECT pointer pointer], :FPDF_BOOL
451
+ attach_function :FPDFPathSegment_GetPoint,
452
+ %i[FPDF_PATHSEGMENT pointer pointer], :FPDF_BOOL
453
+ attach_function :FPDFPathSegment_GetType, %i[FPDF_PATHSEGMENT], :int
454
+ attach_function :FPDFPathSegment_GetClose, %i[FPDF_PATHSEGMENT], :FPDF_BOOL
455
+
456
+ # =========================================================================
457
+ # Image objects
458
+ # =========================================================================
459
+ attach_function :FPDFImageObj_GetImageMetadata,
460
+ %i[FPDF_PAGEOBJECT FPDF_PAGE pointer], :FPDF_BOOL
461
+ attach_function :FPDFImageObj_GetImagePixelSize,
462
+ %i[FPDF_PAGEOBJECT pointer pointer], :FPDF_BOOL
463
+ attach_function :FPDFImageObj_GetBitmap,
464
+ %i[FPDF_PAGEOBJECT], :FPDF_BITMAP
465
+ attach_function :FPDFImageObj_GetRenderedBitmap,
466
+ %i[FPDF_DOCUMENT FPDF_PAGE FPDF_PAGEOBJECT], :FPDF_BITMAP
467
+ attach_function :FPDFImageObj_GetImageDataDecoded,
468
+ %i[FPDF_PAGEOBJECT pointer ulong], :ulong
469
+ attach_function :FPDFImageObj_GetImageDataRaw,
470
+ %i[FPDF_PAGEOBJECT pointer ulong], :ulong
471
+ attach_function :FPDFImageObj_GetImageFilterCount,
472
+ %i[FPDF_PAGEOBJECT], :int
473
+ attach_function :FPDFImageObj_GetImageFilter,
474
+ %i[FPDF_PAGEOBJECT int pointer ulong], :ulong
475
+
476
+ # =========================================================================
477
+ # Text page-objects (font name di un text object, glifi)
478
+ # =========================================================================
479
+ attach_function :FPDFTextObj_GetFontSize,
480
+ %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
481
+ attach_function :FPDFTextObj_GetText,
482
+ %i[FPDF_PAGEOBJECT FPDF_TEXTPAGE pointer ulong], :ulong
483
+ attach_function :FPDFTextObj_GetFont, %i[FPDF_PAGEOBJECT], :FPDF_FONT
484
+ # FPDFTextObj_GetTextRenderMode è il rimpiazzo dell'ex
485
+ # FPDFText_GetTextRenderMode (rimossa upstream in chromium/6611).
486
+ # Prende un text PAGEOBJECT, non (textpage, char_index).
487
+ attach_function :FPDFTextObj_GetTextRenderMode, %i[FPDF_PAGEOBJECT], :int
488
+ # NOTE: FPDFFont_GetFontName è marcata come legacy in PDFium recenti.
489
+ # Il modello nuovo prevede due API distinte:
490
+ # - FPDFFont_GetBaseFontName → BaseFont entry del PDF dict (può
491
+ # includere prefissi di subset come
492
+ # "ABCDEF+Helvetica")
493
+ # - FPDFFont_GetFamilyName → nome famiglia "pulito" (es. "Helvetica")
494
+ # Queste API usano `c_size_t` per lunghezza/return type invece di
495
+ # `c_ulong`. Su build di PDFium <= chromium/6533 non sono presenti:
496
+ # in tal caso lo stub `attach_function` (in raw.rb) assicura che la
497
+ # chiamata fallisca con LoadError chiaro al call site, non al require.
498
+ attach_function :FPDFFont_GetBaseFontName,
499
+ %i[FPDF_FONT pointer size_t], :size_t
500
+ attach_function :FPDFFont_GetFamilyName,
501
+ %i[FPDF_FONT pointer size_t], :size_t
502
+ # Mantenuta per compatibilità con build PDFium più vecchi. Su build
503
+ # nuovi può non essere presente: stesso meccanismo di stub.
504
+ attach_function :FPDFFont_GetFontName,
505
+ %i[FPDF_FONT pointer ulong], :ulong
506
+ attach_function :FPDFFont_GetFlags, %i[FPDF_FONT pointer], :FPDF_BOOL
507
+ attach_function :FPDFFont_GetWeight, %i[FPDF_FONT], :int
508
+ attach_function :FPDFFont_GetIsEmbedded, %i[FPDF_FONT], :int
509
+ attach_function :FPDFFont_GetItalicAngle,
510
+ %i[FPDF_FONT pointer], :FPDF_BOOL
511
+
512
+ # Metriche font ascendente/discendente in unità del font program.
513
+ # Per ottenere il valore in coordinate pagina serve moltiplicare per
514
+ # font_size del text object e poi per la scala del CTM. Utili per
515
+ # baseline detection e leading di linee.
516
+ attach_function :FPDFFont_GetAscent, %i[FPDF_FONT int pointer], :FPDF_BOOL
517
+ attach_function :FPDFFont_GetDescent, %i[FPDF_FONT int pointer], :FPDF_BOOL
518
+
519
+ # Larghezza nominale di un glifo nel font program ("advance width").
520
+ # È la larghezza che il PDF dichiara per quel glifo prima del kerning
521
+ # applicato dagli operatori `TJ`. In combinazione con FPDFText_GetMatrix
522
+ # (per la scala del CTM), permette di calcolare l'advance reale in
523
+ # coordinate pagina. Equivale concettualmente all'advance che pdfminer.six
524
+ # legge dal font program direttamente.
525
+ #
526
+ # ATTENZIONE: il valore ritornato è in unità "scalate per font_size",
527
+ # con font_size passato come parametro. Per la maggior parte dei PDF
528
+ # generati da gestionali, il font_size è 1.0 e il CTM scala
529
+ # (tipicamente 5×–10× per il rendering finale).
530
+ attach_function :FPDFFont_GetGlyphWidth,
531
+ %i[FPDF_FONT uint float pointer], :FPDF_BOOL
532
+
533
+ # NOTA: FPDFText_GetMatrix è già attaccata sopra (sezione text page).
534
+ # In combinazione con FPDFFont_GetGlyphWidth, permette di calcolare
535
+ # l'advance del glifo in coordinate pagina come
536
+ # `glyph_width × |FPDFText_GetMatrix.a|`.
537
+
538
+ # =========================================================================
539
+ # Annotations
540
+ # =========================================================================
541
+ attach_function :FPDFPage_GetAnnotCount, %i[FPDF_PAGE], :int
542
+ attach_function :FPDFPage_GetAnnot,
543
+ %i[FPDF_PAGE int], :FPDF_ANNOTATION
544
+ attach_function :FPDFPage_CloseAnnot, %i[FPDF_ANNOTATION], :void
545
+ attach_function :FPDFAnnot_GetSubtype,
546
+ %i[FPDF_ANNOTATION], :int
547
+ attach_function :FPDFAnnot_GetRect,
548
+ %i[FPDF_ANNOTATION pointer], :FPDF_BOOL
549
+ attach_function :FPDFAnnot_GetStringValue,
550
+ %i[FPDF_ANNOTATION string pointer ulong], :ulong
551
+ attach_function :FPDFAnnot_HasKey,
552
+ %i[FPDF_ANNOTATION string], :FPDF_BOOL
553
+ attach_function :FPDFAnnot_GetLink,
554
+ %i[FPDF_ANNOTATION], :FPDF_LINK
555
+ attach_function :FPDFLink_GetURL,
556
+ %i[FPDF_LINK pointer ulong], :ulong
557
+ attach_function :FPDFAction_GetType, %i[FPDF_ACTION], :uint
558
+ attach_function :FPDFAction_GetURIPath,
559
+ %i[FPDF_DOCUMENT FPDF_ACTION pointer ulong], :ulong
560
+ attach_function :FPDFLink_GetAction, %i[FPDF_LINK], :FPDF_ACTION
561
+ attach_function :FPDFLink_GetDest, %i[FPDF_DOCUMENT FPDF_LINK], :FPDF_DEST
562
+
563
+ # =========================================================================
564
+ # Forms
565
+ # =========================================================================
566
+ # FPDF_FORMFILLINFO è una struct ricca (~70 campi negli ultimi build).
567
+ # Per la sola ESTRAZIONE basta passare una versione minima con version=2
568
+ # e tutti i callback nulli — PDFium tollera NULL su quelli non chiamati
569
+ # in modalità read-only (no JavaScript, no XFA).
570
+ class FPDF_FORMFILLINFO < FFI::Struct
571
+ # Tieni allineato all'header pubblico fpdf_formfill.h. Il campo critico è
572
+ # `version` — se sbagli, init fallisce silenziosamente. Per uso read-only
573
+ # basta version=2 + tutti gli altri zero/NULL. Allochiamo un buffer molto
574
+ # generoso (256 puntatori) per essere robusti a future estensioni
575
+ # dell'header.
576
+ layout :version, :int,
577
+ :_callbacks, [:pointer, 256]
578
+ end
579
+
580
+ attach_function :FPDFDOC_InitFormFillEnvironment,
581
+ %i[FPDF_DOCUMENT pointer], :FPDF_FORMHANDLE
582
+ attach_function :FPDFDOC_ExitFormFillEnvironment,
583
+ %i[FPDF_FORMHANDLE], :void
584
+ attach_function :FPDF_FFLDraw,
585
+ %i[FPDF_FORMHANDLE FPDF_BITMAP FPDF_PAGE int int int int int int],
586
+ :void
587
+ attach_function :FPDFAnnot_GetFormFieldType,
588
+ %i[FPDF_FORMHANDLE FPDF_ANNOTATION], :int
589
+ attach_function :FPDFAnnot_GetFormFieldName,
590
+ %i[FPDF_FORMHANDLE FPDF_ANNOTATION pointer ulong], :ulong
591
+ attach_function :FPDFAnnot_GetFormFieldValue,
592
+ %i[FPDF_FORMHANDLE FPDF_ANNOTATION pointer ulong], :ulong
593
+ attach_function :FPDFAnnot_GetFormFieldFlags,
594
+ %i[FPDF_FORMHANDLE FPDF_ANNOTATION], :int
595
+ attach_function :FPDFAnnot_IsChecked,
596
+ %i[FPDF_FORMHANDLE FPDF_ANNOTATION], :FPDF_BOOL
597
+ attach_function :FPDFAnnot_GetOptionCount,
598
+ %i[FPDF_FORMHANDLE FPDF_ANNOTATION], :int
599
+ attach_function :FPDFAnnot_GetOptionLabel,
600
+ %i[FPDF_FORMHANDLE FPDF_ANNOTATION int pointer ulong], :ulong
601
+
602
+ # =========================================================================
603
+ # Bookmarks (outline)
604
+ # =========================================================================
605
+ attach_function :FPDFBookmark_GetFirstChild,
606
+ %i[FPDF_DOCUMENT FPDF_BOOKMARK], :FPDF_BOOKMARK
607
+ attach_function :FPDFBookmark_GetNextSibling,
608
+ %i[FPDF_DOCUMENT FPDF_BOOKMARK], :FPDF_BOOKMARK
609
+ attach_function :FPDFBookmark_GetTitle,
610
+ %i[FPDF_BOOKMARK pointer ulong], :ulong
611
+ attach_function :FPDFBookmark_GetDest,
612
+ %i[FPDF_DOCUMENT FPDF_BOOKMARK], :FPDF_DEST
613
+ attach_function :FPDFDest_GetDestPageIndex,
614
+ %i[FPDF_DOCUMENT FPDF_DEST], :int
615
+
616
+ # =========================================================================
617
+ # Attachments
618
+ # =========================================================================
619
+ attach_function :FPDFDoc_GetAttachmentCount, %i[FPDF_DOCUMENT], :int
620
+ attach_function :FPDFDoc_GetAttachment,
621
+ %i[FPDF_DOCUMENT int], :FPDF_ATTACHMENT
622
+ attach_function :FPDFAttachment_GetName,
623
+ %i[FPDF_ATTACHMENT pointer ulong], :ulong
624
+ attach_function :FPDFAttachment_GetFile,
625
+ %i[FPDF_ATTACHMENT pointer ulong pointer], :FPDF_BOOL
626
+
627
+ # =========================================================================
628
+ # Structure tree (per PDF tagged → estrazione semantica robusta)
629
+ # =========================================================================
630
+ #
631
+ # Per PDF "tagged" (PDF/UA, esport da Word/LibreOffice/InDesign), il
632
+ # `StructTreeRoot` espone una struttura logica del documento (Document
633
+ # → P, H1, Table, TR, TH, TD, Figure...) indipendente dal layout grafico.
634
+ # Ogni element può essere collegato al testo della pagina tramite
635
+ # `MarkedContentID`: i page objects con lo stesso MCID appartengono
636
+ # semanticamente a quell'element.
637
+ #
638
+ # Su PDF NON tagged (la maggior parte dei gestionali italiani):
639
+ # FPDF_StructTree_GetForPage ritorna NULL.
640
+ #
641
+ # Su PDF "tagged ma vuoto" (es. CR Banca d'Italia, dove il
642
+ # StructTreeRoot esiste con 700+ entries ma tutti gli elementi sono
643
+ # placeholder senza type/MCID): il tree è present ma walk produce
644
+ # output vuoto. Vedi `Rpdfium::Structure::Tree#empty?`.
645
+ typedef :pointer, :FPDF_STRUCTELEMENT_ATTR
646
+ typedef :pointer, :FPDF_STRUCTELEMENT_ATTR_VALUE
647
+
648
+ attach_function :FPDF_StructTree_GetForPage,
649
+ %i[FPDF_PAGE], :FPDF_STRUCTTREE
650
+ attach_function :FPDF_StructTree_Close, %i[FPDF_STRUCTTREE], :void
651
+ attach_function :FPDF_StructTree_CountChildren,
652
+ %i[FPDF_STRUCTTREE], :int
653
+ attach_function :FPDF_StructTree_GetChildAtIndex,
654
+ %i[FPDF_STRUCTTREE int], :FPDF_STRUCTELEMENT
655
+
656
+ # Navigazione del tree
657
+ attach_function :FPDF_StructElement_CountChildren,
658
+ %i[FPDF_STRUCTELEMENT], :int
659
+ attach_function :FPDF_StructElement_GetChildAtIndex,
660
+ %i[FPDF_STRUCTELEMENT int], :FPDF_STRUCTELEMENT
661
+ attach_function :FPDF_StructElement_GetParent,
662
+ %i[FPDF_STRUCTELEMENT], :FPDF_STRUCTELEMENT
663
+
664
+ # Identificazione element
665
+ attach_function :FPDF_StructElement_GetType,
666
+ %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
667
+ attach_function :FPDF_StructElement_GetObjType,
668
+ %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
669
+ attach_function :FPDF_StructElement_GetTitle,
670
+ %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
671
+ attach_function :FPDF_StructElement_GetID,
672
+ %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
673
+ attach_function :FPDF_StructElement_GetLang,
674
+ %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
675
+
676
+ # Testo "logico" overrides (accessibility, ligature resolution)
677
+ attach_function :FPDF_StructElement_GetActualText,
678
+ %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
679
+ attach_function :FPDF_StructElement_GetAltText,
680
+ %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
681
+ attach_function :FPDF_StructElement_GetExpansion,
682
+ %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
683
+
684
+ # Marked content IDs (collegano elementi → page objects con stesso MCID)
685
+ # GetMarkedContentID ritorna il primo MCID (per back-compat).
686
+ # GetMarkedContentIdCount + IdAtIndex per elementi con multiple MCID.
687
+ # GetChildMarkedContentID: MCID del figlio se è un MCR diretto.
688
+ attach_function :FPDF_StructElement_GetMarkedContentID,
689
+ %i[FPDF_STRUCTELEMENT], :int
690
+ attach_function :FPDF_StructElement_GetMarkedContentIdCount,
691
+ %i[FPDF_STRUCTELEMENT], :int
692
+ attach_function :FPDF_StructElement_GetMarkedContentIdAtIndex,
693
+ %i[FPDF_STRUCTELEMENT int], :int
694
+ attach_function :FPDF_StructElement_GetChildMarkedContentID,
695
+ %i[FPDF_STRUCTELEMENT int], :int
696
+
697
+ # Attributi PDF strutturali (RowSpan, ColSpan, Scope, Headers, ecc.)
698
+ # Sono in una sotto-API: ogni element ha 0+ attribute objects, ognuno
699
+ # con 0+ key/value pairs.
700
+ attach_function :FPDF_StructElement_GetAttributeCount,
701
+ %i[FPDF_STRUCTELEMENT], :int
702
+ attach_function :FPDF_StructElement_GetAttributeAtIndex,
703
+ %i[FPDF_STRUCTELEMENT int], :FPDF_STRUCTELEMENT_ATTR
704
+ attach_function :FPDF_StructElement_GetStringAttribute,
705
+ %i[FPDF_STRUCTELEMENT string pointer ulong], :ulong
706
+
707
+ # Attribute getters: enumerazione key/value
708
+ attach_function :FPDF_StructElement_Attr_GetCount,
709
+ %i[FPDF_STRUCTELEMENT_ATTR], :int
710
+ attach_function :FPDF_StructElement_Attr_GetName,
711
+ %i[FPDF_STRUCTELEMENT_ATTR int pointer ulong pointer],
712
+ :FPDF_BOOL
713
+ attach_function :FPDF_StructElement_Attr_GetValue,
714
+ %i[FPDF_STRUCTELEMENT_ATTR string],
715
+ :FPDF_STRUCTELEMENT_ATTR_VALUE
716
+ attach_function :FPDF_StructElement_Attr_GetType,
717
+ %i[FPDF_STRUCTELEMENT_ATTR_VALUE], :int
718
+ attach_function :FPDF_StructElement_Attr_GetBooleanValue,
719
+ %i[FPDF_STRUCTELEMENT_ATTR_VALUE pointer], :FPDF_BOOL
720
+ attach_function :FPDF_StructElement_Attr_GetNumberValue,
721
+ %i[FPDF_STRUCTELEMENT_ATTR_VALUE pointer], :FPDF_BOOL
722
+ attach_function :FPDF_StructElement_Attr_GetStringValue,
723
+ %i[FPDF_STRUCTELEMENT_ATTR_VALUE pointer ulong pointer],
724
+ :FPDF_BOOL
725
+ attach_function :FPDF_StructElement_Attr_GetBlobValue,
726
+ %i[FPDF_STRUCTELEMENT_ATTR_VALUE pointer ulong pointer],
727
+ :FPDF_BOOL
728
+ # Attribute con value che è un altro array (es. Headers che è array di IDs)
729
+ attach_function :FPDF_StructElement_Attr_CountChildren,
730
+ %i[FPDF_STRUCTELEMENT_ATTR_VALUE], :int
731
+ attach_function :FPDF_StructElement_Attr_GetChildAtIndex,
732
+ %i[FPDF_STRUCTELEMENT_ATTR_VALUE int],
733
+ :FPDF_STRUCTELEMENT_ATTR_VALUE
734
+
735
+ # =========================================================================
736
+ # Page box geometry — media/crop/bleed/trim/art box
737
+ # =========================================================================
738
+ # Ogni pagina PDF ha fino a 5 box rettangolari, in coordinate bottom-up:
739
+ # - media: l'area fisica completa della pagina (sempre presente)
740
+ # - crop: la sotto-area visibile (default = media se non specificata)
741
+ # - bleed: area utile per stampa con marginatura (rare)
742
+ # - trim: area finale di taglio (rare, per pre-stampa)
743
+ # - art: area di contenuto significativo (rare)
744
+ #
745
+ # In pdfplumber sono esposte come `page.mediabox`, `page.cropbox`, ecc.
746
+ # Senza accesso a cropbox, una libreria di estrazione PDF non può sapere
747
+ # qual è l'area "visibile" della pagina vs quella "fisica".
748
+ # Tutte ritornano FPDF_BOOL: 0 se il box non è definito.
749
+ attach_function :FPDFPage_GetMediaBox,
750
+ %i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
751
+ attach_function :FPDFPage_GetCropBox,
752
+ %i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
753
+ attach_function :FPDFPage_GetBleedBox,
754
+ %i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
755
+ attach_function :FPDFPage_GetTrimBox,
756
+ %i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
757
+ attach_function :FPDFPage_GetArtBox,
758
+ %i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
759
+
760
+ # =========================================================================
761
+ # Page object: stato, bounds rotati, dash pattern, marked content
762
+ # =========================================================================
763
+ # `FPDFPageObj_GetIsActive`: alcuni page object possono essere "inattivi"
764
+ # (es. nascosti da Optional Content / livelli disabilitati). Senza
765
+ # questo check, l'estrazione includerebbe contenuto non visibile.
766
+ # Restituisce 0/1 in *out_active.
767
+ attach_function :FPDFPageObj_GetIsActive,
768
+ %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
769
+
770
+ # `FPDFPageObj_GetRotatedBounds`: bbox in 4 punti (FS_QUADPOINTSF) per
771
+ # oggetti ruotati. La GetBounds standard ritorna l'AABB (Axis-Aligned
772
+ # Bounding Box), inutile per oggetti a 45°/90°. Per testo verticale o
773
+ # ruotato, questo è il bbox "vero".
774
+ attach_function :FPDFPageObj_GetRotatedBounds,
775
+ %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
776
+
777
+ # Dash pattern: utile in `line_segments` per filtrare linee guida
778
+ # tratteggiate (spesso usate come "non-printing" hints nei template).
779
+ # Le linee dashed possono confondere la detection cellule tabelle.
780
+ attach_function :FPDFPageObj_GetDashCount,
781
+ %i[FPDF_PAGEOBJECT], :int
782
+ attach_function :FPDFPageObj_GetDashArray,
783
+ %i[FPDF_PAGEOBJECT pointer size_t], :FPDF_BOOL
784
+ attach_function :FPDFPageObj_GetDashPhase,
785
+ %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
786
+
787
+ # Marked content (Tagged PDF) — operatori BMC/BDC del content stream.
788
+ # In PDF strutturati (PDF/UA, Word→PDF, InDesign export), gli operatori
789
+ # `/Span BMC ... EMC` o `/Span <</MCID 12>> BDC ... EMC` raggruppano
790
+ # semanticamente i char. Per PDF generati da gestionali italiani questi
791
+ # tag NON sono presenti; per PDF "tagged" sono il modo più affidabile
792
+ # di raggruppare token.
793
+ attach_function :FPDFPageObj_CountMarks,
794
+ %i[FPDF_PAGEOBJECT], :int
795
+ attach_function :FPDFPageObj_GetMark,
796
+ %i[FPDF_PAGEOBJECT ulong], :FPDF_PAGEOBJECTMARK
797
+ attach_function :FPDFPageObj_GetMarkedContentID,
798
+ %i[FPDF_PAGEOBJECT], :int
799
+ attach_function :FPDFPageObjMark_GetName,
800
+ %i[FPDF_PAGEOBJECTMARK pointer ulong pointer], :FPDF_BOOL
801
+ attach_function :FPDFPageObjMark_CountParams,
802
+ %i[FPDF_PAGEOBJECTMARK], :int
803
+ attach_function :FPDFPageObjMark_GetParamKey,
804
+ %i[FPDF_PAGEOBJECTMARK ulong pointer ulong pointer],
805
+ :FPDF_BOOL
806
+ attach_function :FPDFPageObjMark_GetParamValueType,
807
+ %i[FPDF_PAGEOBJECTMARK string], :int
808
+ attach_function :FPDFPageObjMark_GetParamIntValue,
809
+ %i[FPDF_PAGEOBJECTMARK string pointer], :FPDF_BOOL
810
+ attach_function :FPDFPageObjMark_GetParamStringValue,
811
+ %i[FPDF_PAGEOBJECTMARK string pointer ulong pointer],
812
+ :FPDF_BOOL
813
+
814
+ # =========================================================================
815
+ # Catalog / Document metadata
816
+ # =========================================================================
817
+ # FPDFCatalog_GetLanguage: lingua dichiarata dal documento (es. "it-IT").
818
+ # Utile per pipeline di estrazione che vogliono switchare regole
819
+ # language-specific (es. tokenizer di parole, lookup hyphen).
820
+ attach_function :FPDFCatalog_GetLanguage,
821
+ %i[FPDF_DOCUMENT pointer ulong], :ulong
822
+
823
+ # FPDFDoc_GetPageMode: stato di apertura PDF (es. PageMode.UseOutlines,
824
+ # PageMode.FullScreen). Numeric. Utile per editor PDF/viewer building.
825
+ attach_function :FPDFDoc_GetPageMode, %i[FPDF_DOCUMENT], :int
826
+
827
+ # =========================================================================
828
+ # Links (annotation Link e LinkAtPoint per ricerca per coordinata)
829
+ # =========================================================================
830
+ # `FPDFLink_GetLinkAtPoint`: dato (x, y) in coordinate pagina, ritorna
831
+ # il link annotation che lo contiene. Cuore della funzione "click handling"
832
+ # in viewer / OCR-style "extract links". Pdfplumber espone simile via
833
+ # `page.hyperlinks`.
834
+ attach_function :FPDFLink_GetLinkAtPoint,
835
+ %i[FPDF_PAGE double double], :FPDF_LINK
836
+ attach_function :FPDFLink_GetLinkZOrderAtPoint,
837
+ %i[FPDF_PAGE double double], :int
838
+ attach_function :FPDFLink_GetAnnot,
839
+ %i[FPDF_PAGE FPDF_LINK], :FPDF_ANNOTATION
840
+ attach_function :FPDFLink_GetAnnotRect,
841
+ %i[FPDF_LINK pointer], :FPDF_BOOL
842
+ # FPDFLink_GetTextRange: range di char_index nella text page corrispondenti
843
+ # al link. Permette di mappare hyperlink → testo della pagina.
844
+ attach_function :FPDFLink_GetTextRange,
845
+ %i[FPDF_LINK pointer pointer], :FPDF_BOOL
846
+ # Rect e QuadPoints: geometria del link (rectangle o quadrilatero per
847
+ # link che attraversano più righe).
848
+ attach_function :FPDFLink_GetRect,
849
+ %i[FPDF_LINK int pointer], :FPDF_BOOL
850
+ attach_function :FPDFLink_GetQuadPoints,
851
+ %i[FPDF_LINK int pointer], :FPDF_BOOL
852
+
853
+ # =========================================================================
854
+ # Action / Destination (estensioni outline + link)
855
+ # =========================================================================
856
+ # FPDFAction_GetDest: per action di tipo "GoTo", ritorna il FPDF_DEST.
857
+ # FPDFAction_GetFilePath: per action "Launch" o "RemoteGoTo", path del file
858
+ # esterno target.
859
+ attach_function :FPDFAction_GetDest,
860
+ %i[FPDF_DOCUMENT FPDF_ACTION], :FPDF_DEST
861
+ attach_function :FPDFAction_GetFilePath,
862
+ %i[FPDF_ACTION pointer ulong], :ulong
863
+ # FPDFBookmark_GetAction: action associata a un bookmark (alternativa a
864
+ # GetDest se il bookmark è un'action invece di una destinazione).
865
+ attach_function :FPDFBookmark_GetAction,
866
+ %i[FPDF_BOOKMARK], :FPDF_ACTION
867
+ # FPDFBookmark_GetCount: numero di sub-bookmark (positivo = espansi,
868
+ # negativo = collassati, 0 = leaf).
869
+ attach_function :FPDFBookmark_GetCount,
870
+ %i[FPDF_BOOKMARK], :int
871
+ # FPDFDest_GetView: tipo di view (Fit, FitH, XYZ ecc.) + parametri.
872
+ # FPDFDest_GetLocationInPage: x/y/zoom estratti dal dest.
873
+ attach_function :FPDFDest_GetView,
874
+ %i[FPDF_DEST pointer pointer], :ulong
875
+ attach_function :FPDFDest_GetLocationInPage,
876
+ %i[FPDF_DEST pointer pointer pointer pointer pointer pointer],
877
+ :FPDF_BOOL
878
+
879
+ # =========================================================================
880
+ # Font extras: GetFontData, GetAscent, GetDescent
881
+ # =========================================================================
882
+ # Già attaccate sopra: FPDFFont_GetGlyphWidth.
883
+ # Aggiungiamo: FontData (raw font program bytes — utile per inspection,
884
+ # debug embedding, font substitution) e GetGlyphPath (path vettoriale di
885
+ # un glifo, alternativa a GlyphWidth per font esotici).
886
+ # GetFontData ha la convention bool: ritorna `out_buflen` se buf è NULL.
887
+ attach_function :FPDFFont_GetFontData,
888
+ %i[FPDF_FONT pointer size_t pointer], :FPDF_BOOL
889
+ attach_function :FPDFFont_GetGlyphPath,
890
+ %i[FPDF_FONT uint float], :FPDF_GLYPHPATH
891
+ # FPDF_GLYPHPATH: handle a un path. Lo aggiungo come typedef.
892
+ # Le sue API GlyphPath_* sono niche, ma le esponiamo per simmetria.
893
+ attach_function :FPDFGlyphPath_CountGlyphSegments,
894
+ %i[FPDF_GLYPHPATH], :int
895
+ attach_function :FPDFGlyphPath_GetGlyphPathSegment,
896
+ %i[FPDF_GLYPHPATH int], :FPDF_PATHSEGMENT
897
+
898
+ # =========================================================================
899
+ # Text page: char index at position
900
+ # =========================================================================
901
+ # FPDFText_GetCharIndexAtPos: dato un punto (x, y) in coord pagina,
902
+ # ritorna l'indice del char più vicino (entro tolerance). Utile per
903
+ # "hit test" in viewer e per mapping coord → text index nella ricerca.
904
+ attach_function :FPDFText_GetCharIndexAtPos,
905
+ %i[FPDF_TEXTPAGE double double double double], :int
906
+ # FPDFText_GetTextIndexFromCharIndex / GetCharIndexFromTextIndex:
907
+ # mappano l'indice "char" (per glifo) all'indice "text" (per codepoint
908
+ # logico). I due indici differiscono per ligature/sostituzioni.
909
+ attach_function :FPDFText_GetTextIndexFromCharIndex,
910
+ %i[FPDF_TEXTPAGE int], :int
911
+ attach_function :FPDFText_GetCharIndexFromTextIndex,
912
+ %i[FPDF_TEXTPAGE int], :int
913
+
914
+ # =========================================================================
915
+ # Annotation extras: GetFlags, GetColor, GetBorder, AP, attachment points
916
+ # =========================================================================
917
+ # FPDFAnnot_GetFlags: bitmask di Flags (Hidden, Print, NoZoom ecc.).
918
+ # Senza questo, non possiamo distinguere un annotation visibile da uno
919
+ # con flag Hidden.
920
+ attach_function :FPDFAnnot_GetFlags, %i[FPDF_ANNOTATION], :int
921
+ # Colore: stroke (BORDER_COLOR) e fill (INTERIOR_COLOR).
922
+ attach_function :FPDFAnnot_GetColor,
923
+ %i[FPDF_ANNOTATION int pointer pointer pointer pointer],
924
+ :FPDF_BOOL
925
+ # Border: spessore, raggio orizzontale/verticale, dash array count.
926
+ attach_function :FPDFAnnot_GetBorder,
927
+ %i[FPDF_ANNOTATION pointer pointer pointer], :FPDF_BOOL
928
+ # AP (Appearance Stream): forma renderizzata dell'annotation in vari
929
+ # modi (Normal/Rollover/Down).
930
+ attach_function :FPDFAnnot_GetAP,
931
+ %i[FPDF_ANNOTATION int pointer ulong], :ulong
932
+ # FileAttachment: per Annotation di sottotipo FileAttachment, ottiene
933
+ # l'FPDF_ATTACHMENT.
934
+ attach_function :FPDFAnnot_GetFileAttachment,
935
+ %i[FPDF_ANNOTATION], :FPDF_ATTACHMENT
936
+ # AttachmentPoints: per highlight/markup che attraversano più righe,
937
+ # i 4 punti di ogni quadrilatero.
938
+ attach_function :FPDFAnnot_CountAttachmentPoints,
939
+ %i[FPDF_ANNOTATION], :size_t
940
+ attach_function :FPDFAnnot_GetAttachmentPoints,
941
+ %i[FPDF_ANNOTATION size_t pointer], :FPDF_BOOL
942
+
943
+ # =========================================================================
944
+ # Attachment extras
945
+ # =========================================================================
946
+ # FPDFAttachment_GetSubtype: MIME-like subtype del file allegato.
947
+ attach_function :FPDFAttachment_GetSubtype,
948
+ %i[FPDF_ATTACHMENT pointer ulong], :ulong
949
+ # FPDFAttachment_GetStringValue/HasKey: per leggere i metadati custom
950
+ # del file attachment (Description, CreationDate, ecc.).
951
+ attach_function :FPDFAttachment_HasKey,
952
+ %i[FPDF_ATTACHMENT string], :FPDF_BOOL
953
+ attach_function :FPDFAttachment_GetValueType,
954
+ %i[FPDF_ATTACHMENT string], :int
955
+ attach_function :FPDFAttachment_GetStringValue,
956
+ %i[FPDF_ATTACHMENT string pointer ulong], :ulong
957
+
958
+ # =========================================================================
959
+ # Helper: leggere stringhe UTF-16LE che PDFium ritorna in bytes
960
+ # =========================================================================
961
+ # Convenzione PDFium: la maggior parte delle Get*Text/Get*Name ritornano
962
+ # `unsigned long` (numero BYTES, terminatore incluso). Si chiama prima con
963
+ # buffer NULL/0 per ottenere la dimensione, poi con buffer allocato.
964
+ def self.read_utf16_string(method_name, *args)
965
+ args_probe = args + [FFI::Pointer::NULL, 0]
966
+ n_bytes = send(method_name, *args_probe)
967
+ return "" if n_bytes <= 2 # solo terminatore null o errore
968
+
969
+ buf = FFI::MemoryPointer.new(:uchar, n_bytes)
970
+ args_real = args + [buf, n_bytes]
971
+ send(method_name, *args_real)
972
+ utf16_bytes_to_utf8(buf.read_bytes(n_bytes))
973
+ end
974
+
975
+ # PDFium ritorna UTF-16LE little-endian con terminatore null.
976
+ def self.utf16_bytes_to_utf8(bytes)
977
+ bytes.force_encoding("UTF-16LE")
978
+ .encode("UTF-8", invalid: :replace, undef: :replace)
979
+ .delete("\x00")
980
+ end
981
+ end
982
+ end