rpdfium 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +1870 -0
- data/LICENSE +19 -0
- data/README.md +599 -0
- data/lib/rpdfium/annotation/annotation.rb +114 -0
- data/lib/rpdfium/document.rb +226 -0
- data/lib/rpdfium/errors.rb +55 -0
- data/lib/rpdfium/form/form.rb +121 -0
- data/lib/rpdfium/image/embedded.rb +145 -0
- data/lib/rpdfium/io/png.rb +65 -0
- data/lib/rpdfium/page.rb +1623 -0
- data/lib/rpdfium/raw.rb +982 -0
- data/lib/rpdfium/search/search.rb +101 -0
- data/lib/rpdfium/structure/attachment.rb +40 -0
- data/lib/rpdfium/structure/element.rb +330 -0
- data/lib/rpdfium/structure/outline.rb +48 -0
- data/lib/rpdfium/structure/tree.rb +202 -0
- data/lib/rpdfium/table/cells.rb +137 -0
- data/lib/rpdfium/table/debugger.rb +122 -0
- data/lib/rpdfium/table/edges.rb +225 -0
- data/lib/rpdfium/table/extractor.rb +246 -0
- data/lib/rpdfium/table/table.rb +184 -0
- data/lib/rpdfium/util/cluster.rb +143 -0
- data/lib/rpdfium/util/column_inference.rb +139 -0
- data/lib/rpdfium/util/label_matcher.rb +214 -0
- data/lib/rpdfium/util/text_extraction.rb +49 -0
- data/lib/rpdfium/util/word_extractor.rb +151 -0
- data/lib/rpdfium/util/word_merger.rb +102 -0
- data/lib/rpdfium/version.rb +5 -0
- data/lib/rpdfium.rb +92 -0
- metadata +134 -0
data/lib/rpdfium/raw.rb
ADDED
|
@@ -0,0 +1,982 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ffi"
|
|
4
|
+
require "rbconfig"
|
|
5
|
+
|
|
6
|
+
module Rpdfium
|
|
7
|
+
# Layer 1: bindings FFI grezzi alle API C di PDFium.
|
|
8
|
+
# Mappa 1:1 con i nomi originali. Usare le classi wrapper per il codice
|
|
9
|
+
# applicativo. Le API "Experimental" di PDFium sono marcate nei commenti:
|
|
10
|
+
# in teoria potrebbero cambiare, in pratica sono stabili da anni.
|
|
11
|
+
module Raw
|
|
12
|
+
extend FFI::Library
|
|
13
|
+
|
|
14
|
+
# Costruisce la lista di candidati che `ffi_lib` proverà in ordine.
|
|
15
|
+
#
|
|
16
|
+
# ATTENZIONE: FFI auto-appende l'estensione "naturale" della piattaforma
|
|
17
|
+
# (.dylib su macOS, .so su linux, .dll su windows) quando il path passato
|
|
18
|
+
# non termina già con un'estensione conosciuta. Quindi se passiamo
|
|
19
|
+
# `libpdfium.so` su macOS, FFI cerca `libpdfium.so.dylib` — assurdo ma
|
|
20
|
+
# documentato. Per evitarlo, filtriamo i nomi system_library_names per
|
|
21
|
+
# OS host.
|
|
22
|
+
#
|
|
23
|
+
# Inoltre: ENV["PDFIUM_LIBRARY_PATH"] e Rpdfium::Binary.library_path sono
|
|
24
|
+
# path ASSOLUTI/ESPLICITI: se non vengono trovati, NON facciamo fallback
|
|
25
|
+
# a nomi di sistema. Restituiamo subito un array di un solo path: in
|
|
26
|
+
# quel caso ffi_lib o riesce subito, o lancia LoadError chiaro
|
|
27
|
+
# (è ciò che vuole l'utente — gli ha dato un path esplicito).
|
|
28
|
+
def self.candidate_paths
|
|
29
|
+
explicit = ENV["PDFIUM_LIBRARY_PATH"]
|
|
30
|
+
return [explicit] if explicit && !explicit.empty?
|
|
31
|
+
|
|
32
|
+
if defined?(Rpdfium::Binary) && Rpdfium::Binary.respond_to?(:library_path)
|
|
33
|
+
path = Rpdfium::Binary.library_path
|
|
34
|
+
return [path] if path && !path.empty?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
system_library_names
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Nomi "di sistema" filtrati per OS host. Manteniamo `pdfium` /
|
|
41
|
+
# `libpdfium` (senza estensione) per primi: FFI auto-appende l'ext giusta.
|
|
42
|
+
# I nomi con estensione vengono SOLO se matchano l'OS host, così evitiamo
|
|
43
|
+
# il bug di doppia estensione.
|
|
44
|
+
def self.system_library_names
|
|
45
|
+
base = %w[pdfium libpdfium]
|
|
46
|
+
host = host_os
|
|
47
|
+
ext_specific = case host
|
|
48
|
+
when :macos then %w[libpdfium.dylib]
|
|
49
|
+
when :linux then %w[libpdfium.so]
|
|
50
|
+
when :windows then %w[pdfium.dll libpdfium.dll]
|
|
51
|
+
else []
|
|
52
|
+
end
|
|
53
|
+
base + ext_specific
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.host_os
|
|
57
|
+
case RbConfig::CONFIG["host_os"]
|
|
58
|
+
when /darwin/ then :macos
|
|
59
|
+
when /linux/ then :linux
|
|
60
|
+
when /mswin|mingw|cygwin/ then :windows
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
@native_loaded = false
|
|
65
|
+
@load_error = nil
|
|
66
|
+
|
|
67
|
+
def self.native_loaded?; @native_loaded; end
|
|
68
|
+
def self.load_error; @load_error; end
|
|
69
|
+
|
|
70
|
+
begin
|
|
71
|
+
ffi_lib(*candidate_paths)
|
|
72
|
+
ffi_convention :default # cdecl ovunque, anche su Win64 (build bblanchon)
|
|
73
|
+
@native_loaded = true
|
|
74
|
+
rescue ::LoadError, ::RuntimeError => e
|
|
75
|
+
# Cadiamo in modalità "stub": le attach_function generano stub che
|
|
76
|
+
# sollevano Rpdfium::LoadError alla prima invocazione. Permette di
|
|
77
|
+
# caricare la gemma per usare i moduli puri-Ruby (Edges, Cells, PNG)
|
|
78
|
+
# senza dover avere PDFium installato.
|
|
79
|
+
@load_error = e
|
|
80
|
+
ffi_lib_flags :now # no-op senza ffi_lib, ma documenta intent
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Wrap di attach_function tollerante: se il binding fallisce (libreria
|
|
84
|
+
# non caricata, simbolo non presente in questa versione di PDFium),
|
|
85
|
+
# genera comunque un metodo che alza un errore chiaro al call site,
|
|
86
|
+
# invece di far esplodere il `require`.
|
|
87
|
+
def self.attach_function(name, *args)
|
|
88
|
+
super
|
|
89
|
+
rescue FFI::NotFoundError, RuntimeError => e
|
|
90
|
+
define_singleton_method(name) do |*_a|
|
|
91
|
+
raise Rpdfium::LoadError,
|
|
92
|
+
"PDFium symbol #{name} not available: #{e.message}"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
unless @native_loaded
|
|
97
|
+
# Override di attach_function quando la libreria non si è caricata:
|
|
98
|
+
# non chiamare super (che esploderebbe), genera direttamente lo stub.
|
|
99
|
+
def self.attach_function(name, *_args)
|
|
100
|
+
err = @load_error
|
|
101
|
+
define_singleton_method(name) do |*_a|
|
|
102
|
+
raise Rpdfium::LoadError, <<~MSG.strip
|
|
103
|
+
PDFium native library not loaded.
|
|
104
|
+
Set ENV["PDFIUM_LIBRARY_PATH"] to a valid libpdfium.{so,dylib,dll},
|
|
105
|
+
or install the rpdfium-binary gem (when released).
|
|
106
|
+
Original error: #{err.message}
|
|
107
|
+
MSG
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# =========================================================================
|
|
113
|
+
# Tipi opachi
|
|
114
|
+
# =========================================================================
|
|
115
|
+
typedef :pointer, :FPDF_DOCUMENT
|
|
116
|
+
typedef :pointer, :FPDF_PAGE
|
|
117
|
+
typedef :pointer, :FPDF_TEXTPAGE
|
|
118
|
+
typedef :pointer, :FPDF_BITMAP
|
|
119
|
+
typedef :pointer, :FPDF_PAGEOBJECT
|
|
120
|
+
typedef :pointer, :FPDF_PAGEOBJECTMARK
|
|
121
|
+
typedef :pointer, :FPDF_PATHSEGMENT
|
|
122
|
+
typedef :pointer, :FPDF_FONT
|
|
123
|
+
typedef :pointer, :FPDF_ANNOTATION
|
|
124
|
+
typedef :pointer, :FPDF_FORMHANDLE
|
|
125
|
+
typedef :pointer, :FPDF_BOOKMARK
|
|
126
|
+
typedef :pointer, :FPDF_DEST
|
|
127
|
+
typedef :pointer, :FPDF_ACTION
|
|
128
|
+
typedef :pointer, :FPDF_LINK
|
|
129
|
+
typedef :pointer, :FPDF_GLYPHPATH
|
|
130
|
+
typedef :pointer, :FPDF_SCHHANDLE
|
|
131
|
+
typedef :pointer, :FPDF_ATTACHMENT
|
|
132
|
+
typedef :pointer, :FPDF_STRUCTTREE
|
|
133
|
+
typedef :pointer, :FPDF_STRUCTELEMENT
|
|
134
|
+
typedef :int, :FPDF_BOOL
|
|
135
|
+
typedef :ushort, :FPDF_WCHAR
|
|
136
|
+
|
|
137
|
+
# =========================================================================
|
|
138
|
+
# Strutture C
|
|
139
|
+
# =========================================================================
|
|
140
|
+
class FS_RECTF < FFI::Struct
|
|
141
|
+
layout :left, :float,
|
|
142
|
+
:top, :float,
|
|
143
|
+
:right, :float,
|
|
144
|
+
:bottom, :float
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
class FS_MATRIX < FFI::Struct
|
|
148
|
+
# PDF matrix: [a b 0; c d 0; e f 1] (row-major in PDF; FFI segue ordine campi)
|
|
149
|
+
layout :a, :float, :b, :float,
|
|
150
|
+
:c, :float, :d, :float,
|
|
151
|
+
:e, :float, :f, :float
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
class FS_POINTF < FFI::Struct
|
|
155
|
+
layout :x, :float, :y, :float
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
class FS_SIZEF < FFI::Struct
|
|
159
|
+
layout :width, :float, :height, :float
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
class FS_QUADPOINTSF < FFI::Struct
|
|
163
|
+
layout :x1, :float, :y1, :float,
|
|
164
|
+
:x2, :float, :y2, :float,
|
|
165
|
+
:x3, :float, :y3, :float,
|
|
166
|
+
:x4, :float, :y4, :float
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
class FPDF_IMAGEOBJ_METADATA < FFI::Struct
|
|
170
|
+
layout :width, :uint,
|
|
171
|
+
:height, :uint,
|
|
172
|
+
:horizontal_dpi, :float,
|
|
173
|
+
:vertical_dpi, :float,
|
|
174
|
+
:bits_per_pixel, :uint,
|
|
175
|
+
:colorspace, :int,
|
|
176
|
+
:marked_content_id, :int
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# =========================================================================
|
|
180
|
+
# Costanti
|
|
181
|
+
# =========================================================================
|
|
182
|
+
# Bitmap formats
|
|
183
|
+
FPDFBitmap_Unknown = 0
|
|
184
|
+
FPDFBitmap_Gray = 1
|
|
185
|
+
FPDFBitmap_BGR = 2
|
|
186
|
+
FPDFBitmap_BGRx = 3
|
|
187
|
+
FPDFBitmap_BGRA = 4
|
|
188
|
+
|
|
189
|
+
# Render flags (bit fields)
|
|
190
|
+
FPDF_ANNOT = 0x01
|
|
191
|
+
FPDF_LCD_TEXT = 0x02
|
|
192
|
+
FPDF_NO_NATIVETEXT = 0x04
|
|
193
|
+
FPDF_GRAYSCALE = 0x08
|
|
194
|
+
FPDF_REVERSE_BYTE_ORDER = 0x10 # → RGBA invece di BGRA
|
|
195
|
+
FPDF_NO_GDIPLUS = 0x40
|
|
196
|
+
FPDF_PRINTING = 0x800
|
|
197
|
+
FPDF_RENDER_NO_SMOOTHTEXT = 0x1000
|
|
198
|
+
FPDF_RENDER_NO_SMOOTHIMAGE = 0x2000
|
|
199
|
+
FPDF_RENDER_NO_SMOOTHPATH = 0x4000
|
|
200
|
+
|
|
201
|
+
# Page object types
|
|
202
|
+
PAGEOBJ_UNKNOWN = 0
|
|
203
|
+
PAGEOBJ_TEXT = 1
|
|
204
|
+
PAGEOBJ_PATH = 2
|
|
205
|
+
PAGEOBJ_IMAGE = 3
|
|
206
|
+
PAGEOBJ_SHADING = 4
|
|
207
|
+
PAGEOBJ_FORM = 5
|
|
208
|
+
|
|
209
|
+
# Path segment types
|
|
210
|
+
SEGMENT_UNKNOWN = -1
|
|
211
|
+
SEGMENT_LINETO = 0
|
|
212
|
+
SEGMENT_BEZIERTO = 1
|
|
213
|
+
SEGMENT_MOVETO = 2
|
|
214
|
+
|
|
215
|
+
# Path fill mode
|
|
216
|
+
FILLMODE_NONE = 0
|
|
217
|
+
FILLMODE_ALTERNATE = 1
|
|
218
|
+
FILLMODE_WINDING = 2
|
|
219
|
+
|
|
220
|
+
# Text render modes
|
|
221
|
+
TEXT_RENDERMODE_FILL = 0
|
|
222
|
+
TEXT_RENDERMODE_STROKE = 1
|
|
223
|
+
TEXT_RENDERMODE_FILL_STROKE = 2
|
|
224
|
+
TEXT_RENDERMODE_INVISIBLE = 3
|
|
225
|
+
|
|
226
|
+
# Annotation subtypes (PDF spec 12.5.6)
|
|
227
|
+
FPDF_ANNOT_UNKNOWN = 0
|
|
228
|
+
FPDF_ANNOT_TEXT = 1
|
|
229
|
+
FPDF_ANNOT_LINK = 2
|
|
230
|
+
FPDF_ANNOT_FREETEXT = 3
|
|
231
|
+
FPDF_ANNOT_LINE = 4
|
|
232
|
+
FPDF_ANNOT_SQUARE = 5
|
|
233
|
+
FPDF_ANNOT_CIRCLE = 6
|
|
234
|
+
FPDF_ANNOT_HIGHLIGHT = 9
|
|
235
|
+
FPDF_ANNOT_UNDERLINE = 10
|
|
236
|
+
FPDF_ANNOT_SQUIGGLY = 11
|
|
237
|
+
FPDF_ANNOT_STRIKEOUT = 12
|
|
238
|
+
FPDF_ANNOT_STAMP = 13
|
|
239
|
+
FPDF_ANNOT_INK = 15
|
|
240
|
+
FPDF_ANNOT_POPUP = 16
|
|
241
|
+
FPDF_ANNOT_FILEATTACHMENT = 17
|
|
242
|
+
FPDF_ANNOT_WIDGET = 20
|
|
243
|
+
FPDF_ANNOT_REDACT = 27
|
|
244
|
+
|
|
245
|
+
ANNOT_SUBTYPE_NAMES = {
|
|
246
|
+
FPDF_ANNOT_TEXT => "Text", FPDF_ANNOT_LINK => "Link",
|
|
247
|
+
FPDF_ANNOT_FREETEXT => "FreeText", FPDF_ANNOT_LINE => "Line",
|
|
248
|
+
FPDF_ANNOT_SQUARE => "Square", FPDF_ANNOT_CIRCLE => "Circle",
|
|
249
|
+
FPDF_ANNOT_HIGHLIGHT => "Highlight", FPDF_ANNOT_UNDERLINE => "Underline",
|
|
250
|
+
FPDF_ANNOT_SQUIGGLY => "Squiggly", FPDF_ANNOT_STRIKEOUT => "StrikeOut",
|
|
251
|
+
FPDF_ANNOT_STAMP => "Stamp", FPDF_ANNOT_INK => "Ink",
|
|
252
|
+
FPDF_ANNOT_POPUP => "Popup",
|
|
253
|
+
FPDF_ANNOT_FILEATTACHMENT => "FileAttachment",
|
|
254
|
+
FPDF_ANNOT_WIDGET => "Widget", FPDF_ANNOT_REDACT => "Redact"
|
|
255
|
+
}.freeze
|
|
256
|
+
|
|
257
|
+
# Form field types (per widget annotations)
|
|
258
|
+
FPDF_FORMFIELD_UNKNOWN = 0
|
|
259
|
+
FPDF_FORMFIELD_PUSHBUTTON = 1
|
|
260
|
+
FPDF_FORMFIELD_CHECKBOX = 2
|
|
261
|
+
FPDF_FORMFIELD_RADIOBUTTON = 3
|
|
262
|
+
FPDF_FORMFIELD_COMBOBOX = 4
|
|
263
|
+
FPDF_FORMFIELD_LISTBOX = 5
|
|
264
|
+
FPDF_FORMFIELD_TEXTFIELD = 6
|
|
265
|
+
FPDF_FORMFIELD_SIGNATURE = 7
|
|
266
|
+
|
|
267
|
+
# Search flags
|
|
268
|
+
FPDF_MATCHCASE = 0x01
|
|
269
|
+
FPDF_MATCHWHOLEWORD = 0x02
|
|
270
|
+
FPDF_CONSECUTIVE = 0x04
|
|
271
|
+
|
|
272
|
+
# Form types (FPDF_GetFormType)
|
|
273
|
+
FORMTYPE_NONE = 0
|
|
274
|
+
FORMTYPE_ACRO_FORM = 1
|
|
275
|
+
FORMTYPE_XFA_FULL = 2
|
|
276
|
+
FORMTYPE_XFA_FOREGROUND = 3
|
|
277
|
+
|
|
278
|
+
# =========================================================================
|
|
279
|
+
# Library lifecycle
|
|
280
|
+
# =========================================================================
|
|
281
|
+
attach_function :FPDF_InitLibrary, [], :void
|
|
282
|
+
attach_function :FPDF_DestroyLibrary, [], :void
|
|
283
|
+
attach_function :FPDF_GetLastError, [], :ulong
|
|
284
|
+
|
|
285
|
+
# =========================================================================
|
|
286
|
+
# Document
|
|
287
|
+
# =========================================================================
|
|
288
|
+
attach_function :FPDF_LoadDocument,
|
|
289
|
+
%i[string string], :FPDF_DOCUMENT
|
|
290
|
+
attach_function :FPDF_LoadMemDocument64,
|
|
291
|
+
%i[pointer size_t string], :FPDF_DOCUMENT
|
|
292
|
+
attach_function :FPDF_CloseDocument, %i[FPDF_DOCUMENT], :void
|
|
293
|
+
attach_function :FPDF_GetPageCount, %i[FPDF_DOCUMENT], :int
|
|
294
|
+
attach_function :FPDF_GetDocPermissions, %i[FPDF_DOCUMENT], :ulong
|
|
295
|
+
attach_function :FPDF_GetSecurityHandlerRevision, %i[FPDF_DOCUMENT], :int
|
|
296
|
+
attach_function :FPDF_GetFileVersion,
|
|
297
|
+
%i[FPDF_DOCUMENT pointer], :FPDF_BOOL
|
|
298
|
+
attach_function :FPDF_GetFormType, %i[FPDF_DOCUMENT], :int
|
|
299
|
+
|
|
300
|
+
# Metadata: FPDF_GetMetaText(doc, "Title"|"Author"|"Subject"|"Keywords"|
|
|
301
|
+
# "Creator"|"Producer"|"CreationDate"|"ModDate")
|
|
302
|
+
attach_function :FPDF_GetMetaText,
|
|
303
|
+
%i[FPDF_DOCUMENT string pointer ulong], :ulong
|
|
304
|
+
|
|
305
|
+
# Page label (PDF spec: roman/letter labelling)
|
|
306
|
+
attach_function :FPDF_GetPageLabel,
|
|
307
|
+
%i[FPDF_DOCUMENT int pointer ulong], :ulong
|
|
308
|
+
|
|
309
|
+
# =========================================================================
|
|
310
|
+
# Pages
|
|
311
|
+
# =========================================================================
|
|
312
|
+
attach_function :FPDF_LoadPage, %i[FPDF_DOCUMENT int], :FPDF_PAGE
|
|
313
|
+
attach_function :FPDF_ClosePage, %i[FPDF_PAGE], :void
|
|
314
|
+
attach_function :FPDF_GetPageWidthF, %i[FPDF_PAGE], :float
|
|
315
|
+
attach_function :FPDF_GetPageHeightF, %i[FPDF_PAGE], :float
|
|
316
|
+
attach_function :FPDF_GetPageBoundingBox,
|
|
317
|
+
%i[FPDF_PAGE pointer], :FPDF_BOOL
|
|
318
|
+
attach_function :FPDFPage_GetRotation, %i[FPDF_PAGE], :int
|
|
319
|
+
attach_function :FPDFPage_HasTransparency, %i[FPDF_PAGE], :FPDF_BOOL
|
|
320
|
+
|
|
321
|
+
# CropBox / MediaBox / BleedBox / TrimBox / ArtBox
|
|
322
|
+
%i[FPDFPage_GetMediaBox FPDFPage_GetCropBox FPDFPage_GetBleedBox
|
|
323
|
+
FPDFPage_GetTrimBox FPDFPage_GetArtBox].each do |fn|
|
|
324
|
+
attach_function fn, %i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# =========================================================================
|
|
328
|
+
# Text extraction
|
|
329
|
+
# =========================================================================
|
|
330
|
+
attach_function :FPDFText_LoadPage, %i[FPDF_PAGE], :FPDF_TEXTPAGE
|
|
331
|
+
attach_function :FPDFText_ClosePage, %i[FPDF_TEXTPAGE], :void
|
|
332
|
+
attach_function :FPDFText_CountChars, %i[FPDF_TEXTPAGE], :int
|
|
333
|
+
attach_function :FPDFText_GetUnicode, %i[FPDF_TEXTPAGE int], :uint
|
|
334
|
+
attach_function :FPDFText_GetFontSize, %i[FPDF_TEXTPAGE int], :double
|
|
335
|
+
attach_function :FPDFText_GetFontWeight, %i[FPDF_TEXTPAGE int], :int
|
|
336
|
+
attach_function :FPDFText_GetFontInfo,
|
|
337
|
+
%i[FPDF_TEXTPAGE int pointer ulong pointer], :ulong
|
|
338
|
+
# NOTE: FPDFText_GetTextRenderMode(text_page, char_index) è stato RIMOSSO
|
|
339
|
+
# da PDFium in chromium/6611 (luglio 2024). Il rimpiazzo è in due passi:
|
|
340
|
+
# 1. FPDFText_GetTextObject(text_page, char_index) → FPDF_PAGEOBJECT
|
|
341
|
+
# 2. FPDFTextObj_GetTextRenderMode(page_object) → int
|
|
342
|
+
# Wrapper di alto livello: vedi Page#chars (campo :render_mode).
|
|
343
|
+
# Riferimento: pypdfium2 issue #335, pdfium-render issue #151.
|
|
344
|
+
attach_function :FPDFText_GetTextObject,
|
|
345
|
+
%i[FPDF_TEXTPAGE int], :FPDF_PAGEOBJECT
|
|
346
|
+
attach_function :FPDFText_GetCharBox,
|
|
347
|
+
%i[FPDF_TEXTPAGE int pointer pointer pointer pointer],
|
|
348
|
+
:FPDF_BOOL
|
|
349
|
+
# "Loose" char box: bbox proporzionale alla font size, più stabile per layout
|
|
350
|
+
attach_function :FPDFText_GetLooseCharBox,
|
|
351
|
+
%i[FPDF_TEXTPAGE int pointer], :FPDF_BOOL
|
|
352
|
+
attach_function :FPDFText_GetMatrix,
|
|
353
|
+
%i[FPDF_TEXTPAGE int pointer], :FPDF_BOOL
|
|
354
|
+
attach_function :FPDFText_GetCharOrigin,
|
|
355
|
+
%i[FPDF_TEXTPAGE int pointer pointer], :FPDF_BOOL
|
|
356
|
+
attach_function :FPDFText_GetCharAngle,
|
|
357
|
+
%i[FPDF_TEXTPAGE int], :float
|
|
358
|
+
attach_function :FPDFText_HasUnicodeMapError,
|
|
359
|
+
%i[FPDF_TEXTPAGE int], :int
|
|
360
|
+
attach_function :FPDFText_IsGenerated, %i[FPDF_TEXTPAGE int], :int
|
|
361
|
+
attach_function :FPDFText_IsHyphen, %i[FPDF_TEXTPAGE int], :int
|
|
362
|
+
attach_function :FPDFText_GetText,
|
|
363
|
+
%i[FPDF_TEXTPAGE int int pointer], :int
|
|
364
|
+
attach_function :FPDFText_GetBoundedText,
|
|
365
|
+
%i[FPDF_TEXTPAGE double double double double pointer int],
|
|
366
|
+
:int
|
|
367
|
+
attach_function :FPDFText_CountRects,
|
|
368
|
+
%i[FPDF_TEXTPAGE int int], :int
|
|
369
|
+
attach_function :FPDFText_GetRect,
|
|
370
|
+
%i[FPDF_TEXTPAGE int pointer pointer pointer pointer],
|
|
371
|
+
:FPDF_BOOL
|
|
372
|
+
|
|
373
|
+
# =========================================================================
|
|
374
|
+
# Search
|
|
375
|
+
# =========================================================================
|
|
376
|
+
attach_function :FPDFText_FindStart,
|
|
377
|
+
%i[FPDF_TEXTPAGE pointer ulong int], :FPDF_SCHHANDLE
|
|
378
|
+
attach_function :FPDFText_FindNext, %i[FPDF_SCHHANDLE], :FPDF_BOOL
|
|
379
|
+
attach_function :FPDFText_FindPrev, %i[FPDF_SCHHANDLE], :FPDF_BOOL
|
|
380
|
+
attach_function :FPDFText_GetSchResultIndex, %i[FPDF_SCHHANDLE], :int
|
|
381
|
+
attach_function :FPDFText_GetSchCount, %i[FPDF_SCHHANDLE], :int
|
|
382
|
+
attach_function :FPDFText_FindClose, %i[FPDF_SCHHANDLE], :void
|
|
383
|
+
|
|
384
|
+
# =========================================================================
|
|
385
|
+
# Bitmap & rendering
|
|
386
|
+
# =========================================================================
|
|
387
|
+
attach_function :FPDFBitmap_Create, %i[int int int], :FPDF_BITMAP
|
|
388
|
+
attach_function :FPDFBitmap_CreateEx,
|
|
389
|
+
%i[int int int pointer int], :FPDF_BITMAP
|
|
390
|
+
attach_function :FPDFBitmap_Destroy, %i[FPDF_BITMAP], :void
|
|
391
|
+
attach_function :FPDFBitmap_FillRect,
|
|
392
|
+
%i[FPDF_BITMAP int int int int ulong], :void
|
|
393
|
+
attach_function :FPDFBitmap_GetBuffer, %i[FPDF_BITMAP], :pointer
|
|
394
|
+
attach_function :FPDFBitmap_GetWidth, %i[FPDF_BITMAP], :int
|
|
395
|
+
attach_function :FPDFBitmap_GetHeight, %i[FPDF_BITMAP], :int
|
|
396
|
+
attach_function :FPDFBitmap_GetStride, %i[FPDF_BITMAP], :int
|
|
397
|
+
attach_function :FPDFBitmap_GetFormat, %i[FPDF_BITMAP], :int
|
|
398
|
+
attach_function :FPDF_RenderPageBitmap,
|
|
399
|
+
%i[FPDF_BITMAP FPDF_PAGE int int int int int int],
|
|
400
|
+
:void
|
|
401
|
+
# Rendering con matrice 2x3 + clipping (per scaling/rotation arbitraria)
|
|
402
|
+
attach_function :FPDF_RenderPageBitmapWithMatrix,
|
|
403
|
+
%i[FPDF_BITMAP FPDF_PAGE pointer pointer int],
|
|
404
|
+
:void
|
|
405
|
+
|
|
406
|
+
# =========================================================================
|
|
407
|
+
# Page objects (generic)
|
|
408
|
+
# =========================================================================
|
|
409
|
+
attach_function :FPDFPage_CountObjects, %i[FPDF_PAGE], :int
|
|
410
|
+
attach_function :FPDFPage_GetObject, %i[FPDF_PAGE int], :FPDF_PAGEOBJECT
|
|
411
|
+
attach_function :FPDFPageObj_GetType, %i[FPDF_PAGEOBJECT], :int
|
|
412
|
+
attach_function :FPDFPageObj_GetBounds,
|
|
413
|
+
%i[FPDF_PAGEOBJECT pointer pointer pointer pointer],
|
|
414
|
+
:FPDF_BOOL
|
|
415
|
+
attach_function :FPDFPageObj_GetMatrix,
|
|
416
|
+
%i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
|
|
417
|
+
attach_function :FPDFPageObj_GetFillColor,
|
|
418
|
+
%i[FPDF_PAGEOBJECT pointer pointer pointer pointer],
|
|
419
|
+
:FPDF_BOOL
|
|
420
|
+
attach_function :FPDFPageObj_GetStrokeColor,
|
|
421
|
+
%i[FPDF_PAGEOBJECT pointer pointer pointer pointer],
|
|
422
|
+
:FPDF_BOOL
|
|
423
|
+
attach_function :FPDFPageObj_GetStrokeWidth,
|
|
424
|
+
%i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
|
|
425
|
+
attach_function :FPDFPageObj_GetLineCap, %i[FPDF_PAGEOBJECT], :int
|
|
426
|
+
attach_function :FPDFPageObj_GetLineJoin, %i[FPDF_PAGEOBJECT], :int
|
|
427
|
+
|
|
428
|
+
# =========================================================================
|
|
429
|
+
# Form XObjects: contenitori che incapsulano grafica (linee, rect, testo)
|
|
430
|
+
# come "subroutine grafica" riutilizzabile. Nei PDF generati da gestionali
|
|
431
|
+
# (TeamSystem, Zucchetti, ...) e da molti template Word/Excel, l'INTERA
|
|
432
|
+
# pagina è un singolo Form XObject. Senza discendervi dentro, non si
|
|
433
|
+
# vedono linee/rect/chars. Cf. PDF Spec 1.7 §8.10.
|
|
434
|
+
#
|
|
435
|
+
# Dopo FPDFFormObj_GetObject(form, i) si ottiene un FPDF_PAGEOBJECT child
|
|
436
|
+
# le cui coordinate sono nel sistema del form. La trasformazione al
|
|
437
|
+
# sistema-pagina si ottiene da FPDFPageObj_GetMatrix(form_obj, &matrix).
|
|
438
|
+
# =========================================================================
|
|
439
|
+
attach_function :FPDFFormObj_CountObjects, %i[FPDF_PAGEOBJECT], :int
|
|
440
|
+
attach_function :FPDFFormObj_GetObject,
|
|
441
|
+
%i[FPDF_PAGEOBJECT ulong], :FPDF_PAGEOBJECT
|
|
442
|
+
|
|
443
|
+
# =========================================================================
|
|
444
|
+
# Path segments — fondamentali per detection linee tabella
|
|
445
|
+
# =========================================================================
|
|
446
|
+
attach_function :FPDFPath_CountSegments, %i[FPDF_PAGEOBJECT], :int
|
|
447
|
+
attach_function :FPDFPath_GetPathSegment,
|
|
448
|
+
%i[FPDF_PAGEOBJECT int], :FPDF_PATHSEGMENT
|
|
449
|
+
attach_function :FPDFPath_GetDrawMode,
|
|
450
|
+
%i[FPDF_PAGEOBJECT pointer pointer], :FPDF_BOOL
|
|
451
|
+
attach_function :FPDFPathSegment_GetPoint,
|
|
452
|
+
%i[FPDF_PATHSEGMENT pointer pointer], :FPDF_BOOL
|
|
453
|
+
attach_function :FPDFPathSegment_GetType, %i[FPDF_PATHSEGMENT], :int
|
|
454
|
+
attach_function :FPDFPathSegment_GetClose, %i[FPDF_PATHSEGMENT], :FPDF_BOOL
|
|
455
|
+
|
|
456
|
+
# =========================================================================
|
|
457
|
+
# Image objects
|
|
458
|
+
# =========================================================================
|
|
459
|
+
attach_function :FPDFImageObj_GetImageMetadata,
|
|
460
|
+
%i[FPDF_PAGEOBJECT FPDF_PAGE pointer], :FPDF_BOOL
|
|
461
|
+
attach_function :FPDFImageObj_GetImagePixelSize,
|
|
462
|
+
%i[FPDF_PAGEOBJECT pointer pointer], :FPDF_BOOL
|
|
463
|
+
attach_function :FPDFImageObj_GetBitmap,
|
|
464
|
+
%i[FPDF_PAGEOBJECT], :FPDF_BITMAP
|
|
465
|
+
attach_function :FPDFImageObj_GetRenderedBitmap,
|
|
466
|
+
%i[FPDF_DOCUMENT FPDF_PAGE FPDF_PAGEOBJECT], :FPDF_BITMAP
|
|
467
|
+
attach_function :FPDFImageObj_GetImageDataDecoded,
|
|
468
|
+
%i[FPDF_PAGEOBJECT pointer ulong], :ulong
|
|
469
|
+
attach_function :FPDFImageObj_GetImageDataRaw,
|
|
470
|
+
%i[FPDF_PAGEOBJECT pointer ulong], :ulong
|
|
471
|
+
attach_function :FPDFImageObj_GetImageFilterCount,
|
|
472
|
+
%i[FPDF_PAGEOBJECT], :int
|
|
473
|
+
attach_function :FPDFImageObj_GetImageFilter,
|
|
474
|
+
%i[FPDF_PAGEOBJECT int pointer ulong], :ulong
|
|
475
|
+
|
|
476
|
+
# =========================================================================
|
|
477
|
+
# Text page-objects (font name di un text object, glifi)
|
|
478
|
+
# =========================================================================
|
|
479
|
+
attach_function :FPDFTextObj_GetFontSize,
|
|
480
|
+
%i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
|
|
481
|
+
attach_function :FPDFTextObj_GetText,
|
|
482
|
+
%i[FPDF_PAGEOBJECT FPDF_TEXTPAGE pointer ulong], :ulong
|
|
483
|
+
attach_function :FPDFTextObj_GetFont, %i[FPDF_PAGEOBJECT], :FPDF_FONT
|
|
484
|
+
# FPDFTextObj_GetTextRenderMode è il rimpiazzo dell'ex
|
|
485
|
+
# FPDFText_GetTextRenderMode (rimossa upstream in chromium/6611).
|
|
486
|
+
# Prende un text PAGEOBJECT, non (textpage, char_index).
|
|
487
|
+
attach_function :FPDFTextObj_GetTextRenderMode, %i[FPDF_PAGEOBJECT], :int
|
|
488
|
+
# NOTE: FPDFFont_GetFontName è marcata come legacy in PDFium recenti.
|
|
489
|
+
# Il modello nuovo prevede due API distinte:
|
|
490
|
+
# - FPDFFont_GetBaseFontName → BaseFont entry del PDF dict (può
|
|
491
|
+
# includere prefissi di subset come
|
|
492
|
+
# "ABCDEF+Helvetica")
|
|
493
|
+
# - FPDFFont_GetFamilyName → nome famiglia "pulito" (es. "Helvetica")
|
|
494
|
+
# Queste API usano `c_size_t` per lunghezza/return type invece di
|
|
495
|
+
# `c_ulong`. Su build di PDFium <= chromium/6533 non sono presenti:
|
|
496
|
+
# in tal caso lo stub `attach_function` (in raw.rb) assicura che la
|
|
497
|
+
# chiamata fallisca con LoadError chiaro al call site, non al require.
|
|
498
|
+
attach_function :FPDFFont_GetBaseFontName,
|
|
499
|
+
%i[FPDF_FONT pointer size_t], :size_t
|
|
500
|
+
attach_function :FPDFFont_GetFamilyName,
|
|
501
|
+
%i[FPDF_FONT pointer size_t], :size_t
|
|
502
|
+
# Mantenuta per compatibilità con build PDFium più vecchi. Su build
|
|
503
|
+
# nuovi può non essere presente: stesso meccanismo di stub.
|
|
504
|
+
attach_function :FPDFFont_GetFontName,
|
|
505
|
+
%i[FPDF_FONT pointer ulong], :ulong
|
|
506
|
+
attach_function :FPDFFont_GetFlags, %i[FPDF_FONT pointer], :FPDF_BOOL
|
|
507
|
+
attach_function :FPDFFont_GetWeight, %i[FPDF_FONT], :int
|
|
508
|
+
attach_function :FPDFFont_GetIsEmbedded, %i[FPDF_FONT], :int
|
|
509
|
+
attach_function :FPDFFont_GetItalicAngle,
|
|
510
|
+
%i[FPDF_FONT pointer], :FPDF_BOOL
|
|
511
|
+
|
|
512
|
+
# Metriche font ascendente/discendente in unità del font program.
|
|
513
|
+
# Per ottenere il valore in coordinate pagina serve moltiplicare per
|
|
514
|
+
# font_size del text object e poi per la scala del CTM. Utili per
|
|
515
|
+
# baseline detection e leading di linee.
|
|
516
|
+
attach_function :FPDFFont_GetAscent, %i[FPDF_FONT int pointer], :FPDF_BOOL
|
|
517
|
+
attach_function :FPDFFont_GetDescent, %i[FPDF_FONT int pointer], :FPDF_BOOL
|
|
518
|
+
|
|
519
|
+
# Larghezza nominale di un glifo nel font program ("advance width").
|
|
520
|
+
# È la larghezza che il PDF dichiara per quel glifo prima del kerning
|
|
521
|
+
# applicato dagli operatori `TJ`. In combinazione con FPDFText_GetMatrix
|
|
522
|
+
# (per la scala del CTM), permette di calcolare l'advance reale in
|
|
523
|
+
# coordinate pagina. Equivale concettualmente all'advance che pdfminer.six
|
|
524
|
+
# legge dal font program direttamente.
|
|
525
|
+
#
|
|
526
|
+
# ATTENZIONE: il valore ritornato è in unità "scalate per font_size",
|
|
527
|
+
# con font_size passato come parametro. Per la maggior parte dei PDF
|
|
528
|
+
# generati da gestionali, il font_size è 1.0 e il CTM scala
|
|
529
|
+
# (tipicamente 5×–10× per il rendering finale).
|
|
530
|
+
attach_function :FPDFFont_GetGlyphWidth,
|
|
531
|
+
%i[FPDF_FONT uint float pointer], :FPDF_BOOL
|
|
532
|
+
|
|
533
|
+
# NOTA: FPDFText_GetMatrix è già attaccata sopra (sezione text page).
|
|
534
|
+
# In combinazione con FPDFFont_GetGlyphWidth, permette di calcolare
|
|
535
|
+
# l'advance del glifo in coordinate pagina come
|
|
536
|
+
# `glyph_width × |FPDFText_GetMatrix.a|`.
|
|
537
|
+
|
|
538
|
+
# =========================================================================
|
|
539
|
+
# Annotations
|
|
540
|
+
# =========================================================================
|
|
541
|
+
attach_function :FPDFPage_GetAnnotCount, %i[FPDF_PAGE], :int
|
|
542
|
+
attach_function :FPDFPage_GetAnnot,
|
|
543
|
+
%i[FPDF_PAGE int], :FPDF_ANNOTATION
|
|
544
|
+
attach_function :FPDFPage_CloseAnnot, %i[FPDF_ANNOTATION], :void
|
|
545
|
+
attach_function :FPDFAnnot_GetSubtype,
|
|
546
|
+
%i[FPDF_ANNOTATION], :int
|
|
547
|
+
attach_function :FPDFAnnot_GetRect,
|
|
548
|
+
%i[FPDF_ANNOTATION pointer], :FPDF_BOOL
|
|
549
|
+
attach_function :FPDFAnnot_GetStringValue,
|
|
550
|
+
%i[FPDF_ANNOTATION string pointer ulong], :ulong
|
|
551
|
+
attach_function :FPDFAnnot_HasKey,
|
|
552
|
+
%i[FPDF_ANNOTATION string], :FPDF_BOOL
|
|
553
|
+
attach_function :FPDFAnnot_GetLink,
|
|
554
|
+
%i[FPDF_ANNOTATION], :FPDF_LINK
|
|
555
|
+
attach_function :FPDFLink_GetURL,
|
|
556
|
+
%i[FPDF_LINK pointer ulong], :ulong
|
|
557
|
+
attach_function :FPDFAction_GetType, %i[FPDF_ACTION], :uint
|
|
558
|
+
attach_function :FPDFAction_GetURIPath,
|
|
559
|
+
%i[FPDF_DOCUMENT FPDF_ACTION pointer ulong], :ulong
|
|
560
|
+
attach_function :FPDFLink_GetAction, %i[FPDF_LINK], :FPDF_ACTION
|
|
561
|
+
attach_function :FPDFLink_GetDest, %i[FPDF_DOCUMENT FPDF_LINK], :FPDF_DEST
|
|
562
|
+
|
|
563
|
+
# =========================================================================
|
|
564
|
+
# Forms
|
|
565
|
+
# =========================================================================
|
|
566
|
+
# FPDF_FORMFILLINFO è una struct ricca (~70 campi negli ultimi build).
|
|
567
|
+
# Per la sola ESTRAZIONE basta passare una versione minima con version=2
|
|
568
|
+
# e tutti i callback nulli — PDFium tollera NULL su quelli non chiamati
|
|
569
|
+
# in modalità read-only (no JavaScript, no XFA).
|
|
570
|
+
class FPDF_FORMFILLINFO < FFI::Struct
|
|
571
|
+
# Tieni allineato all'header pubblico fpdf_formfill.h. Il campo critico è
|
|
572
|
+
# `version` — se sbagli, init fallisce silenziosamente. Per uso read-only
|
|
573
|
+
# basta version=2 + tutti gli altri zero/NULL. Allochiamo un buffer molto
|
|
574
|
+
# generoso (256 puntatori) per essere robusti a future estensioni
|
|
575
|
+
# dell'header.
|
|
576
|
+
layout :version, :int,
|
|
577
|
+
:_callbacks, [:pointer, 256]
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
attach_function :FPDFDOC_InitFormFillEnvironment,
|
|
581
|
+
%i[FPDF_DOCUMENT pointer], :FPDF_FORMHANDLE
|
|
582
|
+
attach_function :FPDFDOC_ExitFormFillEnvironment,
|
|
583
|
+
%i[FPDF_FORMHANDLE], :void
|
|
584
|
+
attach_function :FPDF_FFLDraw,
|
|
585
|
+
%i[FPDF_FORMHANDLE FPDF_BITMAP FPDF_PAGE int int int int int int],
|
|
586
|
+
:void
|
|
587
|
+
attach_function :FPDFAnnot_GetFormFieldType,
|
|
588
|
+
%i[FPDF_FORMHANDLE FPDF_ANNOTATION], :int
|
|
589
|
+
attach_function :FPDFAnnot_GetFormFieldName,
|
|
590
|
+
%i[FPDF_FORMHANDLE FPDF_ANNOTATION pointer ulong], :ulong
|
|
591
|
+
attach_function :FPDFAnnot_GetFormFieldValue,
|
|
592
|
+
%i[FPDF_FORMHANDLE FPDF_ANNOTATION pointer ulong], :ulong
|
|
593
|
+
attach_function :FPDFAnnot_GetFormFieldFlags,
|
|
594
|
+
%i[FPDF_FORMHANDLE FPDF_ANNOTATION], :int
|
|
595
|
+
attach_function :FPDFAnnot_IsChecked,
|
|
596
|
+
%i[FPDF_FORMHANDLE FPDF_ANNOTATION], :FPDF_BOOL
|
|
597
|
+
attach_function :FPDFAnnot_GetOptionCount,
|
|
598
|
+
%i[FPDF_FORMHANDLE FPDF_ANNOTATION], :int
|
|
599
|
+
attach_function :FPDFAnnot_GetOptionLabel,
|
|
600
|
+
%i[FPDF_FORMHANDLE FPDF_ANNOTATION int pointer ulong], :ulong
|
|
601
|
+
|
|
602
|
+
# =========================================================================
|
|
603
|
+
# Bookmarks (outline)
|
|
604
|
+
# =========================================================================
|
|
605
|
+
attach_function :FPDFBookmark_GetFirstChild,
|
|
606
|
+
%i[FPDF_DOCUMENT FPDF_BOOKMARK], :FPDF_BOOKMARK
|
|
607
|
+
attach_function :FPDFBookmark_GetNextSibling,
|
|
608
|
+
%i[FPDF_DOCUMENT FPDF_BOOKMARK], :FPDF_BOOKMARK
|
|
609
|
+
attach_function :FPDFBookmark_GetTitle,
|
|
610
|
+
%i[FPDF_BOOKMARK pointer ulong], :ulong
|
|
611
|
+
attach_function :FPDFBookmark_GetDest,
|
|
612
|
+
%i[FPDF_DOCUMENT FPDF_BOOKMARK], :FPDF_DEST
|
|
613
|
+
attach_function :FPDFDest_GetDestPageIndex,
|
|
614
|
+
%i[FPDF_DOCUMENT FPDF_DEST], :int
|
|
615
|
+
|
|
616
|
+
# =========================================================================
|
|
617
|
+
# Attachments
|
|
618
|
+
# =========================================================================
|
|
619
|
+
attach_function :FPDFDoc_GetAttachmentCount, %i[FPDF_DOCUMENT], :int
|
|
620
|
+
attach_function :FPDFDoc_GetAttachment,
|
|
621
|
+
%i[FPDF_DOCUMENT int], :FPDF_ATTACHMENT
|
|
622
|
+
attach_function :FPDFAttachment_GetName,
|
|
623
|
+
%i[FPDF_ATTACHMENT pointer ulong], :ulong
|
|
624
|
+
attach_function :FPDFAttachment_GetFile,
|
|
625
|
+
%i[FPDF_ATTACHMENT pointer ulong pointer], :FPDF_BOOL
|
|
626
|
+
|
|
627
|
+
# =========================================================================
|
|
628
|
+
# Structure tree (per PDF tagged → estrazione semantica robusta)
|
|
629
|
+
# =========================================================================
|
|
630
|
+
#
|
|
631
|
+
# Per PDF "tagged" (PDF/UA, esport da Word/LibreOffice/InDesign), il
|
|
632
|
+
# `StructTreeRoot` espone una struttura logica del documento (Document
|
|
633
|
+
# → P, H1, Table, TR, TH, TD, Figure...) indipendente dal layout grafico.
|
|
634
|
+
# Ogni element può essere collegato al testo della pagina tramite
|
|
635
|
+
# `MarkedContentID`: i page objects con lo stesso MCID appartengono
|
|
636
|
+
# semanticamente a quell'element.
|
|
637
|
+
#
|
|
638
|
+
# Su PDF NON tagged (la maggior parte dei gestionali italiani):
|
|
639
|
+
# FPDF_StructTree_GetForPage ritorna NULL.
|
|
640
|
+
#
|
|
641
|
+
# Su PDF "tagged ma vuoto" (es. CR Banca d'Italia, dove il
|
|
642
|
+
# StructTreeRoot esiste con 700+ entries ma tutti gli elementi sono
|
|
643
|
+
# placeholder senza type/MCID): il tree è present ma walk produce
|
|
644
|
+
# output vuoto. Vedi `Rpdfium::Structure::Tree#empty?`.
|
|
645
|
+
typedef :pointer, :FPDF_STRUCTELEMENT_ATTR
|
|
646
|
+
typedef :pointer, :FPDF_STRUCTELEMENT_ATTR_VALUE
|
|
647
|
+
|
|
648
|
+
attach_function :FPDF_StructTree_GetForPage,
|
|
649
|
+
%i[FPDF_PAGE], :FPDF_STRUCTTREE
|
|
650
|
+
attach_function :FPDF_StructTree_Close, %i[FPDF_STRUCTTREE], :void
|
|
651
|
+
attach_function :FPDF_StructTree_CountChildren,
|
|
652
|
+
%i[FPDF_STRUCTTREE], :int
|
|
653
|
+
attach_function :FPDF_StructTree_GetChildAtIndex,
|
|
654
|
+
%i[FPDF_STRUCTTREE int], :FPDF_STRUCTELEMENT
|
|
655
|
+
|
|
656
|
+
# Navigazione del tree
|
|
657
|
+
attach_function :FPDF_StructElement_CountChildren,
|
|
658
|
+
%i[FPDF_STRUCTELEMENT], :int
|
|
659
|
+
attach_function :FPDF_StructElement_GetChildAtIndex,
|
|
660
|
+
%i[FPDF_STRUCTELEMENT int], :FPDF_STRUCTELEMENT
|
|
661
|
+
attach_function :FPDF_StructElement_GetParent,
|
|
662
|
+
%i[FPDF_STRUCTELEMENT], :FPDF_STRUCTELEMENT
|
|
663
|
+
|
|
664
|
+
# Identificazione element
|
|
665
|
+
attach_function :FPDF_StructElement_GetType,
|
|
666
|
+
%i[FPDF_STRUCTELEMENT pointer ulong], :ulong
|
|
667
|
+
attach_function :FPDF_StructElement_GetObjType,
|
|
668
|
+
%i[FPDF_STRUCTELEMENT pointer ulong], :ulong
|
|
669
|
+
attach_function :FPDF_StructElement_GetTitle,
|
|
670
|
+
%i[FPDF_STRUCTELEMENT pointer ulong], :ulong
|
|
671
|
+
attach_function :FPDF_StructElement_GetID,
|
|
672
|
+
%i[FPDF_STRUCTELEMENT pointer ulong], :ulong
|
|
673
|
+
attach_function :FPDF_StructElement_GetLang,
|
|
674
|
+
%i[FPDF_STRUCTELEMENT pointer ulong], :ulong
|
|
675
|
+
|
|
676
|
+
# Testo "logico" overrides (accessibility, ligature resolution)
|
|
677
|
+
attach_function :FPDF_StructElement_GetActualText,
|
|
678
|
+
%i[FPDF_STRUCTELEMENT pointer ulong], :ulong
|
|
679
|
+
attach_function :FPDF_StructElement_GetAltText,
|
|
680
|
+
%i[FPDF_STRUCTELEMENT pointer ulong], :ulong
|
|
681
|
+
attach_function :FPDF_StructElement_GetExpansion,
|
|
682
|
+
%i[FPDF_STRUCTELEMENT pointer ulong], :ulong
|
|
683
|
+
|
|
684
|
+
# Marked content IDs (collegano elementi → page objects con stesso MCID)
|
|
685
|
+
# GetMarkedContentID ritorna il primo MCID (per back-compat).
|
|
686
|
+
# GetMarkedContentIdCount + IdAtIndex per elementi con multiple MCID.
|
|
687
|
+
# GetChildMarkedContentID: MCID del figlio se è un MCR diretto.
|
|
688
|
+
attach_function :FPDF_StructElement_GetMarkedContentID,
|
|
689
|
+
%i[FPDF_STRUCTELEMENT], :int
|
|
690
|
+
attach_function :FPDF_StructElement_GetMarkedContentIdCount,
|
|
691
|
+
%i[FPDF_STRUCTELEMENT], :int
|
|
692
|
+
attach_function :FPDF_StructElement_GetMarkedContentIdAtIndex,
|
|
693
|
+
%i[FPDF_STRUCTELEMENT int], :int
|
|
694
|
+
attach_function :FPDF_StructElement_GetChildMarkedContentID,
|
|
695
|
+
%i[FPDF_STRUCTELEMENT int], :int
|
|
696
|
+
|
|
697
|
+
# Attributi PDF strutturali (RowSpan, ColSpan, Scope, Headers, ecc.)
|
|
698
|
+
# Sono in una sotto-API: ogni element ha 0+ attribute objects, ognuno
|
|
699
|
+
# con 0+ key/value pairs.
|
|
700
|
+
attach_function :FPDF_StructElement_GetAttributeCount,
|
|
701
|
+
%i[FPDF_STRUCTELEMENT], :int
|
|
702
|
+
attach_function :FPDF_StructElement_GetAttributeAtIndex,
|
|
703
|
+
%i[FPDF_STRUCTELEMENT int], :FPDF_STRUCTELEMENT_ATTR
|
|
704
|
+
attach_function :FPDF_StructElement_GetStringAttribute,
|
|
705
|
+
%i[FPDF_STRUCTELEMENT string pointer ulong], :ulong
|
|
706
|
+
|
|
707
|
+
# Attribute getters: enumerazione key/value
|
|
708
|
+
attach_function :FPDF_StructElement_Attr_GetCount,
|
|
709
|
+
%i[FPDF_STRUCTELEMENT_ATTR], :int
|
|
710
|
+
attach_function :FPDF_StructElement_Attr_GetName,
|
|
711
|
+
%i[FPDF_STRUCTELEMENT_ATTR int pointer ulong pointer],
|
|
712
|
+
:FPDF_BOOL
|
|
713
|
+
attach_function :FPDF_StructElement_Attr_GetValue,
|
|
714
|
+
%i[FPDF_STRUCTELEMENT_ATTR string],
|
|
715
|
+
:FPDF_STRUCTELEMENT_ATTR_VALUE
|
|
716
|
+
attach_function :FPDF_StructElement_Attr_GetType,
|
|
717
|
+
%i[FPDF_STRUCTELEMENT_ATTR_VALUE], :int
|
|
718
|
+
attach_function :FPDF_StructElement_Attr_GetBooleanValue,
|
|
719
|
+
%i[FPDF_STRUCTELEMENT_ATTR_VALUE pointer], :FPDF_BOOL
|
|
720
|
+
attach_function :FPDF_StructElement_Attr_GetNumberValue,
|
|
721
|
+
%i[FPDF_STRUCTELEMENT_ATTR_VALUE pointer], :FPDF_BOOL
|
|
722
|
+
attach_function :FPDF_StructElement_Attr_GetStringValue,
|
|
723
|
+
%i[FPDF_STRUCTELEMENT_ATTR_VALUE pointer ulong pointer],
|
|
724
|
+
:FPDF_BOOL
|
|
725
|
+
attach_function :FPDF_StructElement_Attr_GetBlobValue,
|
|
726
|
+
%i[FPDF_STRUCTELEMENT_ATTR_VALUE pointer ulong pointer],
|
|
727
|
+
:FPDF_BOOL
|
|
728
|
+
# Attribute con value che è un altro array (es. Headers che è array di IDs)
|
|
729
|
+
attach_function :FPDF_StructElement_Attr_CountChildren,
|
|
730
|
+
%i[FPDF_STRUCTELEMENT_ATTR_VALUE], :int
|
|
731
|
+
attach_function :FPDF_StructElement_Attr_GetChildAtIndex,
|
|
732
|
+
%i[FPDF_STRUCTELEMENT_ATTR_VALUE int],
|
|
733
|
+
:FPDF_STRUCTELEMENT_ATTR_VALUE
|
|
734
|
+
|
|
735
|
+
# =========================================================================
|
|
736
|
+
# Page box geometry — media/crop/bleed/trim/art box
|
|
737
|
+
# =========================================================================
|
|
738
|
+
# Ogni pagina PDF ha fino a 5 box rettangolari, in coordinate bottom-up:
|
|
739
|
+
# - media: l'area fisica completa della pagina (sempre presente)
|
|
740
|
+
# - crop: la sotto-area visibile (default = media se non specificata)
|
|
741
|
+
# - bleed: area utile per stampa con marginatura (rare)
|
|
742
|
+
# - trim: area finale di taglio (rare, per pre-stampa)
|
|
743
|
+
# - art: area di contenuto significativo (rare)
|
|
744
|
+
#
|
|
745
|
+
# In pdfplumber sono esposte come `page.mediabox`, `page.cropbox`, ecc.
|
|
746
|
+
# Senza accesso a cropbox, una libreria di estrazione PDF non può sapere
|
|
747
|
+
# qual è l'area "visibile" della pagina vs quella "fisica".
|
|
748
|
+
# Tutte ritornano FPDF_BOOL: 0 se il box non è definito.
|
|
749
|
+
attach_function :FPDFPage_GetMediaBox,
|
|
750
|
+
%i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
|
|
751
|
+
attach_function :FPDFPage_GetCropBox,
|
|
752
|
+
%i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
|
|
753
|
+
attach_function :FPDFPage_GetBleedBox,
|
|
754
|
+
%i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
|
|
755
|
+
attach_function :FPDFPage_GetTrimBox,
|
|
756
|
+
%i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
|
|
757
|
+
attach_function :FPDFPage_GetArtBox,
|
|
758
|
+
%i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
|
|
759
|
+
|
|
760
|
+
# =========================================================================
|
|
761
|
+
# Page object: stato, bounds rotati, dash pattern, marked content
|
|
762
|
+
# =========================================================================
|
|
763
|
+
# `FPDFPageObj_GetIsActive`: alcuni page object possono essere "inattivi"
|
|
764
|
+
# (es. nascosti da Optional Content / livelli disabilitati). Senza
|
|
765
|
+
# questo check, l'estrazione includerebbe contenuto non visibile.
|
|
766
|
+
# Restituisce 0/1 in *out_active.
|
|
767
|
+
attach_function :FPDFPageObj_GetIsActive,
|
|
768
|
+
%i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
|
|
769
|
+
|
|
770
|
+
# `FPDFPageObj_GetRotatedBounds`: bbox in 4 punti (FS_QUADPOINTSF) per
|
|
771
|
+
# oggetti ruotati. La GetBounds standard ritorna l'AABB (Axis-Aligned
|
|
772
|
+
# Bounding Box), inutile per oggetti a 45°/90°. Per testo verticale o
|
|
773
|
+
# ruotato, questo è il bbox "vero".
|
|
774
|
+
attach_function :FPDFPageObj_GetRotatedBounds,
|
|
775
|
+
%i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
|
|
776
|
+
|
|
777
|
+
# Dash pattern: utile in `line_segments` per filtrare linee guida
|
|
778
|
+
# tratteggiate (spesso usate come "non-printing" hints nei template).
|
|
779
|
+
# Le linee dashed possono confondere la detection cellule tabelle.
|
|
780
|
+
attach_function :FPDFPageObj_GetDashCount,
|
|
781
|
+
%i[FPDF_PAGEOBJECT], :int
|
|
782
|
+
attach_function :FPDFPageObj_GetDashArray,
|
|
783
|
+
%i[FPDF_PAGEOBJECT pointer size_t], :FPDF_BOOL
|
|
784
|
+
attach_function :FPDFPageObj_GetDashPhase,
|
|
785
|
+
%i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
|
|
786
|
+
|
|
787
|
+
# Marked content (Tagged PDF) — operatori BMC/BDC del content stream.
|
|
788
|
+
# In PDF strutturati (PDF/UA, Word→PDF, InDesign export), gli operatori
|
|
789
|
+
# `/Span BMC ... EMC` o `/Span <</MCID 12>> BDC ... EMC` raggruppano
|
|
790
|
+
# semanticamente i char. Per PDF generati da gestionali italiani questi
|
|
791
|
+
# tag NON sono presenti; per PDF "tagged" sono il modo più affidabile
|
|
792
|
+
# di raggruppare token.
|
|
793
|
+
attach_function :FPDFPageObj_CountMarks,
|
|
794
|
+
%i[FPDF_PAGEOBJECT], :int
|
|
795
|
+
attach_function :FPDFPageObj_GetMark,
|
|
796
|
+
%i[FPDF_PAGEOBJECT ulong], :FPDF_PAGEOBJECTMARK
|
|
797
|
+
attach_function :FPDFPageObj_GetMarkedContentID,
|
|
798
|
+
%i[FPDF_PAGEOBJECT], :int
|
|
799
|
+
attach_function :FPDFPageObjMark_GetName,
|
|
800
|
+
%i[FPDF_PAGEOBJECTMARK pointer ulong pointer], :FPDF_BOOL
|
|
801
|
+
attach_function :FPDFPageObjMark_CountParams,
|
|
802
|
+
%i[FPDF_PAGEOBJECTMARK], :int
|
|
803
|
+
attach_function :FPDFPageObjMark_GetParamKey,
|
|
804
|
+
%i[FPDF_PAGEOBJECTMARK ulong pointer ulong pointer],
|
|
805
|
+
:FPDF_BOOL
|
|
806
|
+
attach_function :FPDFPageObjMark_GetParamValueType,
|
|
807
|
+
%i[FPDF_PAGEOBJECTMARK string], :int
|
|
808
|
+
attach_function :FPDFPageObjMark_GetParamIntValue,
|
|
809
|
+
%i[FPDF_PAGEOBJECTMARK string pointer], :FPDF_BOOL
|
|
810
|
+
attach_function :FPDFPageObjMark_GetParamStringValue,
|
|
811
|
+
%i[FPDF_PAGEOBJECTMARK string pointer ulong pointer],
|
|
812
|
+
:FPDF_BOOL
|
|
813
|
+
|
|
814
|
+
# =========================================================================
|
|
815
|
+
# Catalog / Document metadata
|
|
816
|
+
# =========================================================================
|
|
817
|
+
# FPDFCatalog_GetLanguage: lingua dichiarata dal documento (es. "it-IT").
|
|
818
|
+
# Utile per pipeline di estrazione che vogliono switchare regole
|
|
819
|
+
# language-specific (es. tokenizer di parole, lookup hyphen).
|
|
820
|
+
attach_function :FPDFCatalog_GetLanguage,
|
|
821
|
+
%i[FPDF_DOCUMENT pointer ulong], :ulong
|
|
822
|
+
|
|
823
|
+
# FPDFDoc_GetPageMode: stato di apertura PDF (es. PageMode.UseOutlines,
|
|
824
|
+
# PageMode.FullScreen). Numeric. Utile per editor PDF/viewer building.
|
|
825
|
+
attach_function :FPDFDoc_GetPageMode, %i[FPDF_DOCUMENT], :int
|
|
826
|
+
|
|
827
|
+
# =========================================================================
|
|
828
|
+
# Links (annotation Link e LinkAtPoint per ricerca per coordinata)
|
|
829
|
+
# =========================================================================
|
|
830
|
+
# `FPDFLink_GetLinkAtPoint`: dato (x, y) in coordinate pagina, ritorna
|
|
831
|
+
# il link annotation che lo contiene. Cuore della funzione "click handling"
|
|
832
|
+
# in viewer / OCR-style "extract links". Pdfplumber espone simile via
|
|
833
|
+
# `page.hyperlinks`.
|
|
834
|
+
attach_function :FPDFLink_GetLinkAtPoint,
|
|
835
|
+
%i[FPDF_PAGE double double], :FPDF_LINK
|
|
836
|
+
attach_function :FPDFLink_GetLinkZOrderAtPoint,
|
|
837
|
+
%i[FPDF_PAGE double double], :int
|
|
838
|
+
attach_function :FPDFLink_GetAnnot,
|
|
839
|
+
%i[FPDF_PAGE FPDF_LINK], :FPDF_ANNOTATION
|
|
840
|
+
attach_function :FPDFLink_GetAnnotRect,
|
|
841
|
+
%i[FPDF_LINK pointer], :FPDF_BOOL
|
|
842
|
+
# FPDFLink_GetTextRange: range di char_index nella text page corrispondenti
|
|
843
|
+
# al link. Permette di mappare hyperlink → testo della pagina.
|
|
844
|
+
attach_function :FPDFLink_GetTextRange,
|
|
845
|
+
%i[FPDF_LINK pointer pointer], :FPDF_BOOL
|
|
846
|
+
# Rect e QuadPoints: geometria del link (rectangle o quadrilatero per
|
|
847
|
+
# link che attraversano più righe).
|
|
848
|
+
attach_function :FPDFLink_GetRect,
|
|
849
|
+
%i[FPDF_LINK int pointer], :FPDF_BOOL
|
|
850
|
+
attach_function :FPDFLink_GetQuadPoints,
|
|
851
|
+
%i[FPDF_LINK int pointer], :FPDF_BOOL
|
|
852
|
+
|
|
853
|
+
# =========================================================================
|
|
854
|
+
# Action / Destination (estensioni outline + link)
|
|
855
|
+
# =========================================================================
|
|
856
|
+
# FPDFAction_GetDest: per action di tipo "GoTo", ritorna il FPDF_DEST.
|
|
857
|
+
# FPDFAction_GetFilePath: per action "Launch" o "RemoteGoTo", path del file
|
|
858
|
+
# esterno target.
|
|
859
|
+
attach_function :FPDFAction_GetDest,
|
|
860
|
+
%i[FPDF_DOCUMENT FPDF_ACTION], :FPDF_DEST
|
|
861
|
+
attach_function :FPDFAction_GetFilePath,
|
|
862
|
+
%i[FPDF_ACTION pointer ulong], :ulong
|
|
863
|
+
# FPDFBookmark_GetAction: action associata a un bookmark (alternativa a
|
|
864
|
+
# GetDest se il bookmark è un'action invece di una destinazione).
|
|
865
|
+
attach_function :FPDFBookmark_GetAction,
|
|
866
|
+
%i[FPDF_BOOKMARK], :FPDF_ACTION
|
|
867
|
+
# FPDFBookmark_GetCount: numero di sub-bookmark (positivo = espansi,
|
|
868
|
+
# negativo = collassati, 0 = leaf).
|
|
869
|
+
attach_function :FPDFBookmark_GetCount,
|
|
870
|
+
%i[FPDF_BOOKMARK], :int
|
|
871
|
+
# FPDFDest_GetView: tipo di view (Fit, FitH, XYZ ecc.) + parametri.
|
|
872
|
+
# FPDFDest_GetLocationInPage: x/y/zoom estratti dal dest.
|
|
873
|
+
attach_function :FPDFDest_GetView,
|
|
874
|
+
%i[FPDF_DEST pointer pointer], :ulong
|
|
875
|
+
attach_function :FPDFDest_GetLocationInPage,
|
|
876
|
+
%i[FPDF_DEST pointer pointer pointer pointer pointer pointer],
|
|
877
|
+
:FPDF_BOOL
|
|
878
|
+
|
|
879
|
+
# =========================================================================
|
|
880
|
+
# Font extras: GetFontData, GetAscent, GetDescent
|
|
881
|
+
# =========================================================================
|
|
882
|
+
# Già attaccate sopra: FPDFFont_GetGlyphWidth.
|
|
883
|
+
# Aggiungiamo: FontData (raw font program bytes — utile per inspection,
|
|
884
|
+
# debug embedding, font substitution) e GetGlyphPath (path vettoriale di
|
|
885
|
+
# un glifo, alternativa a GlyphWidth per font esotici).
|
|
886
|
+
# GetFontData ha la convention bool: ritorna `out_buflen` se buf è NULL.
|
|
887
|
+
attach_function :FPDFFont_GetFontData,
|
|
888
|
+
%i[FPDF_FONT pointer size_t pointer], :FPDF_BOOL
|
|
889
|
+
attach_function :FPDFFont_GetGlyphPath,
|
|
890
|
+
%i[FPDF_FONT uint float], :FPDF_GLYPHPATH
|
|
891
|
+
# FPDF_GLYPHPATH: handle a un path. Lo aggiungo come typedef.
|
|
892
|
+
# Le sue API GlyphPath_* sono niche, ma le esponiamo per simmetria.
|
|
893
|
+
attach_function :FPDFGlyphPath_CountGlyphSegments,
|
|
894
|
+
%i[FPDF_GLYPHPATH], :int
|
|
895
|
+
attach_function :FPDFGlyphPath_GetGlyphPathSegment,
|
|
896
|
+
%i[FPDF_GLYPHPATH int], :FPDF_PATHSEGMENT
|
|
897
|
+
|
|
898
|
+
# =========================================================================
|
|
899
|
+
# Text page: char index at position
|
|
900
|
+
# =========================================================================
|
|
901
|
+
# FPDFText_GetCharIndexAtPos: dato un punto (x, y) in coord pagina,
|
|
902
|
+
# ritorna l'indice del char più vicino (entro tolerance). Utile per
|
|
903
|
+
# "hit test" in viewer e per mapping coord → text index nella ricerca.
|
|
904
|
+
attach_function :FPDFText_GetCharIndexAtPos,
|
|
905
|
+
%i[FPDF_TEXTPAGE double double double double], :int
|
|
906
|
+
# FPDFText_GetTextIndexFromCharIndex / GetCharIndexFromTextIndex:
|
|
907
|
+
# mappano l'indice "char" (per glifo) all'indice "text" (per codepoint
|
|
908
|
+
# logico). I due indici differiscono per ligature/sostituzioni.
|
|
909
|
+
attach_function :FPDFText_GetTextIndexFromCharIndex,
|
|
910
|
+
%i[FPDF_TEXTPAGE int], :int
|
|
911
|
+
attach_function :FPDFText_GetCharIndexFromTextIndex,
|
|
912
|
+
%i[FPDF_TEXTPAGE int], :int
|
|
913
|
+
|
|
914
|
+
# =========================================================================
|
|
915
|
+
# Annotation extras: GetFlags, GetColor, GetBorder, AP, attachment points
|
|
916
|
+
# =========================================================================
|
|
917
|
+
# FPDFAnnot_GetFlags: bitmask di Flags (Hidden, Print, NoZoom ecc.).
|
|
918
|
+
# Senza questo, non possiamo distinguere un annotation visibile da uno
|
|
919
|
+
# con flag Hidden.
|
|
920
|
+
attach_function :FPDFAnnot_GetFlags, %i[FPDF_ANNOTATION], :int
|
|
921
|
+
# Colore: stroke (BORDER_COLOR) e fill (INTERIOR_COLOR).
|
|
922
|
+
attach_function :FPDFAnnot_GetColor,
|
|
923
|
+
%i[FPDF_ANNOTATION int pointer pointer pointer pointer],
|
|
924
|
+
:FPDF_BOOL
|
|
925
|
+
# Border: spessore, raggio orizzontale/verticale, dash array count.
|
|
926
|
+
attach_function :FPDFAnnot_GetBorder,
|
|
927
|
+
%i[FPDF_ANNOTATION pointer pointer pointer], :FPDF_BOOL
|
|
928
|
+
# AP (Appearance Stream): forma renderizzata dell'annotation in vari
|
|
929
|
+
# modi (Normal/Rollover/Down).
|
|
930
|
+
attach_function :FPDFAnnot_GetAP,
|
|
931
|
+
%i[FPDF_ANNOTATION int pointer ulong], :ulong
|
|
932
|
+
# FileAttachment: per Annotation di sottotipo FileAttachment, ottiene
|
|
933
|
+
# l'FPDF_ATTACHMENT.
|
|
934
|
+
attach_function :FPDFAnnot_GetFileAttachment,
|
|
935
|
+
%i[FPDF_ANNOTATION], :FPDF_ATTACHMENT
|
|
936
|
+
# AttachmentPoints: per highlight/markup che attraversano più righe,
|
|
937
|
+
# i 4 punti di ogni quadrilatero.
|
|
938
|
+
attach_function :FPDFAnnot_CountAttachmentPoints,
|
|
939
|
+
%i[FPDF_ANNOTATION], :size_t
|
|
940
|
+
attach_function :FPDFAnnot_GetAttachmentPoints,
|
|
941
|
+
%i[FPDF_ANNOTATION size_t pointer], :FPDF_BOOL
|
|
942
|
+
|
|
943
|
+
# =========================================================================
|
|
944
|
+
# Attachment extras
|
|
945
|
+
# =========================================================================
|
|
946
|
+
# FPDFAttachment_GetSubtype: MIME-like subtype del file allegato.
|
|
947
|
+
attach_function :FPDFAttachment_GetSubtype,
|
|
948
|
+
%i[FPDF_ATTACHMENT pointer ulong], :ulong
|
|
949
|
+
# FPDFAttachment_GetStringValue/HasKey: per leggere i metadati custom
|
|
950
|
+
# del file attachment (Description, CreationDate, ecc.).
|
|
951
|
+
attach_function :FPDFAttachment_HasKey,
|
|
952
|
+
%i[FPDF_ATTACHMENT string], :FPDF_BOOL
|
|
953
|
+
attach_function :FPDFAttachment_GetValueType,
|
|
954
|
+
%i[FPDF_ATTACHMENT string], :int
|
|
955
|
+
attach_function :FPDFAttachment_GetStringValue,
|
|
956
|
+
%i[FPDF_ATTACHMENT string pointer ulong], :ulong
|
|
957
|
+
|
|
958
|
+
# =========================================================================
|
|
959
|
+
# Helper: leggere stringhe UTF-16LE che PDFium ritorna in bytes
|
|
960
|
+
# =========================================================================
|
|
961
|
+
# Convenzione PDFium: la maggior parte delle Get*Text/Get*Name ritornano
|
|
962
|
+
# `unsigned long` (numero BYTES, terminatore incluso). Si chiama prima con
|
|
963
|
+
# buffer NULL/0 per ottenere la dimensione, poi con buffer allocato.
|
|
964
|
+
def self.read_utf16_string(method_name, *args)
|
|
965
|
+
args_probe = args + [FFI::Pointer::NULL, 0]
|
|
966
|
+
n_bytes = send(method_name, *args_probe)
|
|
967
|
+
return "" if n_bytes <= 2 # solo terminatore null o errore
|
|
968
|
+
|
|
969
|
+
buf = FFI::MemoryPointer.new(:uchar, n_bytes)
|
|
970
|
+
args_real = args + [buf, n_bytes]
|
|
971
|
+
send(method_name, *args_real)
|
|
972
|
+
utf16_bytes_to_utf8(buf.read_bytes(n_bytes))
|
|
973
|
+
end
|
|
974
|
+
|
|
975
|
+
# PDFium ritorna UTF-16LE little-endian con terminatore null.
|
|
976
|
+
def self.utf16_bytes_to_utf8(bytes)
|
|
977
|
+
bytes.force_encoding("UTF-16LE")
|
|
978
|
+
.encode("UTF-8", invalid: :replace, undef: :replace)
|
|
979
|
+
.delete("\x00")
|
|
980
|
+
end
|
|
981
|
+
end
|
|
982
|
+
end
|