rpdfium 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rpdfium/raw.rb CHANGED
@@ -4,27 +4,27 @@ require "ffi"
4
4
  require "rbconfig"
5
5
 
6
6
  module Rpdfium
7
- # Layer 1: bindings FFI grezzi alle API C di PDFium.
8
- # Mappa 1:1 con i nomi originali. Usare le classi wrapper per il codice
9
- # applicativo. Le API "Experimental" di PDFium sono marcate nei commenti:
10
- # in teoria potrebbero cambiare, in pratica sono stabili da anni.
7
+ # Layer 1: raw FFI bindings to the PDFium C API.
8
+ # 1:1 mapping with the original names. Use the wrapper classes for
9
+ # application code. PDFium "Experimental" APIs are marked in the comments:
10
+ # in theory they could change, in practice they have been stable for years.
11
11
  module Raw
12
12
  extend FFI::Library
13
13
 
14
- # Costruisce la lista di candidati che `ffi_lib` proverà in ordine.
14
+ # Builds the list of candidates that `ffi_lib` will try in order.
15
15
  #
16
- # ATTENZIONE: FFI auto-appende l'estensione "naturale" della piattaforma
17
- # (.dylib su macOS, .so su linux, .dll su windows) quando il path passato
18
- # non termina già con un'estensione conosciuta. Quindi se passiamo
19
- # `libpdfium.so` su macOS, FFI cerca `libpdfium.so.dylib` — assurdo ma
20
- # documentato. Per evitarlo, filtriamo i nomi system_library_names per
21
- # OS host.
16
+ # WARNING: FFI auto-appends the platform's "natural" extension
17
+ # (.dylib on macOS, .so on Linux, .dll on Windows) when the supplied path
18
+ # does not already end with a known extension. Therefore, if we pass
19
+ # `libpdfium.so` on macOS, FFI looks for `libpdfium.so.dylib` — absurd but
20
+ # documented. To avoid this, we filter the system_library_names by
21
+ # host OS.
22
22
  #
23
- # Inoltre: ENV["PDFIUM_LIBRARY_PATH"] e Rpdfium::Binary.library_path sono
24
- # path ASSOLUTI/ESPLICITI: se non vengono trovati, NON facciamo fallback
25
- # a nomi di sistema. Restituiamo subito un array di un solo path: in
26
- # quel caso ffi_lib o riesce subito, o lancia LoadError chiaro
27
- # (è ciò che vuole l'utentegli ha dato un path esplicito).
23
+ # Additionally: ENV["PDFIUM_LIBRARY_PATH"] and Rpdfium::Binary.library_path
24
+ # are ABSOLUTE/EXPLICIT paths: if they are not found, we do NOT fall back
25
+ # to system names. We immediately return an array of a single path: in
26
+ # that case ffi_lib either succeeds right away, or raises a clear LoadError
27
+ # (which is what the user wants they provided an explicit path).
28
28
  def self.candidate_paths
29
29
  explicit = ENV["PDFIUM_LIBRARY_PATH"]
30
30
  return [explicit] if explicit && !explicit.empty?
@@ -37,10 +37,10 @@ module Rpdfium
37
37
  system_library_names
38
38
  end
39
39
 
40
- # Nomi "di sistema" filtrati per OS host. Manteniamo `pdfium` /
41
- # `libpdfium` (senza estensione) per primi: FFI auto-appende l'ext giusta.
42
- # I nomi con estensione vengono SOLO se matchano l'OS host, così evitiamo
43
- # il bug di doppia estensione.
40
+ # "System" names filtered by host OS. We keep `pdfium` /
41
+ # `libpdfium` (without extension) first: FFI auto-appends the right ext.
42
+ # Names with an extension are included ONLY if they match the host OS, so
43
+ # we avoid the double-extension bug.
44
44
  def self.system_library_names
45
45
  base = %w[pdfium libpdfium]
46
46
  host = host_os
@@ -69,21 +69,21 @@ module Rpdfium
69
69
 
70
70
  begin
71
71
  ffi_lib(*candidate_paths)
72
- ffi_convention :default # cdecl ovunque, anche su Win64 (build bblanchon)
72
+ ffi_convention :default # cdecl everywhere, even on Win64 (bblanchon build)
73
73
  @native_loaded = true
74
74
  rescue ::LoadError, ::RuntimeError => e
75
- # Cadiamo in modalità "stub": le attach_function generano stub che
76
- # sollevano Rpdfium::LoadError alla prima invocazione. Permette di
77
- # caricare la gemma per usare i moduli puri-Ruby (Edges, Cells, PNG)
78
- # senza dover avere PDFium installato.
75
+ # We fall back to "stub" mode: the attach_function calls generate stubs
76
+ # that raise Rpdfium::LoadError on first invocation. This allows the gem
77
+ # to be loaded in order to use the pure-Ruby modules (Edges, Cells, PNG)
78
+ # without having PDFium installed.
79
79
  @load_error = e
80
- ffi_lib_flags :now # no-op senza ffi_lib, ma documenta intent
80
+ ffi_lib_flags :now # no-op without ffi_lib, but documents intent
81
81
  end
82
82
 
83
- # Wrap di attach_function tollerante: se il binding fallisce (libreria
84
- # non caricata, simbolo non presente in questa versione di PDFium),
85
- # genera comunque un metodo che alza un errore chiaro al call site,
86
- # invece di far esplodere il `require`.
83
+ # Tolerant attach_function wrapper: if the binding fails (library
84
+ # not loaded, symbol not present in this version of PDFium),
85
+ # it still generates a method that raises a clear error at the call site,
86
+ # instead of blowing up the `require`.
87
87
  def self.attach_function(name, *args)
88
88
  super
89
89
  rescue FFI::NotFoundError, RuntimeError => e
@@ -94,8 +94,8 @@ module Rpdfium
94
94
  end
95
95
 
96
96
  unless @native_loaded
97
- # Override di attach_function quando la libreria non si è caricata:
98
- # non chiamare super (che esploderebbe), genera direttamente lo stub.
97
+ # Override of attach_function when the library failed to load:
98
+ # do not call super (which would blow up), generate the stub directly.
99
99
  def self.attach_function(name, *_args)
100
100
  err = @load_error
101
101
  define_singleton_method(name) do |*_a|
@@ -110,7 +110,7 @@ module Rpdfium
110
110
  end
111
111
 
112
112
  # =========================================================================
113
- # Tipi opachi
113
+ # Opaque types
114
114
  # =========================================================================
115
115
  typedef :pointer, :FPDF_DOCUMENT
116
116
  typedef :pointer, :FPDF_PAGE
@@ -135,7 +135,7 @@ module Rpdfium
135
135
  typedef :ushort, :FPDF_WCHAR
136
136
 
137
137
  # =========================================================================
138
- # Strutture C
138
+ # C structures
139
139
  # =========================================================================
140
140
  class FS_RECTF < FFI::Struct
141
141
  layout :left, :float,
@@ -145,7 +145,7 @@ module Rpdfium
145
145
  end
146
146
 
147
147
  class FS_MATRIX < FFI::Struct
148
- # PDF matrix: [a b 0; c d 0; e f 1] (row-major in PDF; FFI segue ordine campi)
148
+ # PDF matrix: [a b 0; c d 0; e f 1] (row-major in PDF; FFI follows field order)
149
149
  layout :a, :float, :b, :float,
150
150
  :c, :float, :d, :float,
151
151
  :e, :float, :f, :float
@@ -177,7 +177,7 @@ module Rpdfium
177
177
  end
178
178
 
179
179
  # =========================================================================
180
- # Costanti
180
+ # Constants
181
181
  # =========================================================================
182
182
  # Bitmap formats
183
183
  FPDFBitmap_Unknown = 0
@@ -191,7 +191,7 @@ module Rpdfium
191
191
  FPDF_LCD_TEXT = 0x02
192
192
  FPDF_NO_NATIVETEXT = 0x04
193
193
  FPDF_GRAYSCALE = 0x08
194
- FPDF_REVERSE_BYTE_ORDER = 0x10 # → RGBA invece di BGRA
194
+ FPDF_REVERSE_BYTE_ORDER = 0x10 # → RGBA instead of BGRA
195
195
  FPDF_NO_GDIPLUS = 0x40
196
196
  FPDF_PRINTING = 0x800
197
197
  FPDF_RENDER_NO_SMOOTHTEXT = 0x1000
@@ -254,7 +254,7 @@ module Rpdfium
254
254
  FPDF_ANNOT_WIDGET => "Widget", FPDF_ANNOT_REDACT => "Redact"
255
255
  }.freeze
256
256
 
257
- # Form field types (per widget annotations)
257
+ # Form field types (for widget annotations)
258
258
  FPDF_FORMFIELD_UNKNOWN = 0
259
259
  FPDF_FORMFIELD_PUSHBUTTON = 1
260
260
  FPDF_FORMFIELD_CHECKBOX = 2
@@ -335,18 +335,18 @@ module Rpdfium
335
335
  attach_function :FPDFText_GetFontWeight, %i[FPDF_TEXTPAGE int], :int
336
336
  attach_function :FPDFText_GetFontInfo,
337
337
  %i[FPDF_TEXTPAGE int pointer ulong pointer], :ulong
338
- # NOTE: FPDFText_GetTextRenderMode(text_page, char_index) è stato RIMOSSO
339
- # da PDFium in chromium/6611 (luglio 2024). Il rimpiazzo è in due passi:
338
+ # NOTE: FPDFText_GetTextRenderMode(text_page, char_index) was REMOVED
339
+ # from PDFium in chromium/6611 (July 2024). The replacement is two steps:
340
340
  # 1. FPDFText_GetTextObject(text_page, char_index) → FPDF_PAGEOBJECT
341
341
  # 2. FPDFTextObj_GetTextRenderMode(page_object) → int
342
- # Wrapper di alto livello: vedi Page#chars (campo :render_mode).
343
- # Riferimento: pypdfium2 issue #335, pdfium-render issue #151.
342
+ # High-level wrapper: see Page#chars (the :render_mode field).
343
+ # Reference: pypdfium2 issue #335, pdfium-render issue #151.
344
344
  attach_function :FPDFText_GetTextObject,
345
345
  %i[FPDF_TEXTPAGE int], :FPDF_PAGEOBJECT
346
346
  attach_function :FPDFText_GetCharBox,
347
347
  %i[FPDF_TEXTPAGE int pointer pointer pointer pointer],
348
348
  :FPDF_BOOL
349
- # "Loose" char box: bbox proporzionale alla font size, più stabile per layout
349
+ # "Loose" char box: bbox proportional to the font size, more stable for layout
350
350
  attach_function :FPDFText_GetLooseCharBox,
351
351
  %i[FPDF_TEXTPAGE int pointer], :FPDF_BOOL
352
352
  attach_function :FPDFText_GetMatrix,
@@ -398,7 +398,7 @@ module Rpdfium
398
398
  attach_function :FPDF_RenderPageBitmap,
399
399
  %i[FPDF_BITMAP FPDF_PAGE int int int int int int],
400
400
  :void
401
- # Rendering con matrice 2x3 + clipping (per scaling/rotation arbitraria)
401
+ # Rendering with a 2x3 matrix + clipping (for arbitrary scaling/rotation)
402
402
  attach_function :FPDF_RenderPageBitmapWithMatrix,
403
403
  %i[FPDF_BITMAP FPDF_PAGE pointer pointer int],
404
404
  :void
@@ -426,22 +426,22 @@ module Rpdfium
426
426
  attach_function :FPDFPageObj_GetLineJoin, %i[FPDF_PAGEOBJECT], :int
427
427
 
428
428
  # =========================================================================
429
- # Form XObjects: contenitori che incapsulano grafica (linee, rect, testo)
430
- # come "subroutine grafica" riutilizzabile. Nei PDF generati da gestionali
431
- # (TeamSystem, Zucchetti, ...) e da molti template Word/Excel, l'INTERA
432
- # pagina è un singolo Form XObject. Senza discendervi dentro, non si
433
- # vedono linee/rect/chars. Cf. PDF Spec 1.7 §8.10.
429
+ # Form XObjects: containers that encapsulate graphics (lines, rects, text)
430
+ # as a reusable "graphics subroutine". In PDFs generated by management
431
+ # software (TeamSystem, Zucchetti, ...) and by many Word/Excel templates,
432
+ # the ENTIRE page is a single Form XObject. Without descending into it, no
433
+ # lines/rects/chars are visible. Cf. PDF Spec 1.7 §8.10.
434
434
  #
435
- # Dopo FPDFFormObj_GetObject(form, i) si ottiene un FPDF_PAGEOBJECT child
436
- # le cui coordinate sono nel sistema del form. La trasformazione al
437
- # sistema-pagina si ottiene da FPDFPageObj_GetMatrix(form_obj, &matrix).
435
+ # After FPDFFormObj_GetObject(form, i) one obtains a child FPDF_PAGEOBJECT
436
+ # whose coordinates are in the form's system. The transformation to the
437
+ # page system is obtained from FPDFPageObj_GetMatrix(form_obj, &matrix).
438
438
  # =========================================================================
439
439
  attach_function :FPDFFormObj_CountObjects, %i[FPDF_PAGEOBJECT], :int
440
440
  attach_function :FPDFFormObj_GetObject,
441
441
  %i[FPDF_PAGEOBJECT ulong], :FPDF_PAGEOBJECT
442
442
 
443
443
  # =========================================================================
444
- # Path segments — fondamentali per detection linee tabella
444
+ # Path segments — fundamental for table line detection
445
445
  # =========================================================================
446
446
  attach_function :FPDFPath_CountSegments, %i[FPDF_PAGEOBJECT], :int
447
447
  attach_function :FPDFPath_GetPathSegment,
@@ -474,33 +474,33 @@ module Rpdfium
474
474
  %i[FPDF_PAGEOBJECT int pointer ulong], :ulong
475
475
 
476
476
  # =========================================================================
477
- # Text page-objects (font name di un text object, glifi)
477
+ # Text page-objects (font name of a text object, glyphs)
478
478
  # =========================================================================
479
479
  attach_function :FPDFTextObj_GetFontSize,
480
480
  %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
481
481
  attach_function :FPDFTextObj_GetText,
482
482
  %i[FPDF_PAGEOBJECT FPDF_TEXTPAGE pointer ulong], :ulong
483
483
  attach_function :FPDFTextObj_GetFont, %i[FPDF_PAGEOBJECT], :FPDF_FONT
484
- # FPDFTextObj_GetTextRenderMode è il rimpiazzo dell'ex
485
- # FPDFText_GetTextRenderMode (rimossa upstream in chromium/6611).
486
- # Prende un text PAGEOBJECT, non (textpage, char_index).
484
+ # FPDFTextObj_GetTextRenderMode is the replacement for the former
485
+ # FPDFText_GetTextRenderMode (removed upstream in chromium/6611).
486
+ # It takes a text PAGEOBJECT, not (textpage, char_index).
487
487
  attach_function :FPDFTextObj_GetTextRenderMode, %i[FPDF_PAGEOBJECT], :int
488
- # NOTE: FPDFFont_GetFontName è marcata come legacy in PDFium recenti.
489
- # Il modello nuovo prevede due API distinte:
490
- # - FPDFFont_GetBaseFontName → BaseFont entry del PDF dict (può
491
- # includere prefissi di subset come
488
+ # NOTE: FPDFFont_GetFontName is marked as legacy in recent PDFium.
489
+ # The new model provides two distinct APIs:
490
+ # - FPDFFont_GetBaseFontName → BaseFont entry of the PDF dict (may
491
+ # include subset prefixes such as
492
492
  # "ABCDEF+Helvetica")
493
- # - FPDFFont_GetFamilyName → nome famiglia "pulito" (es. "Helvetica")
494
- # Queste API usano `c_size_t` per lunghezza/return type invece di
495
- # `c_ulong`. Su build di PDFium <= chromium/6533 non sono presenti:
496
- # in tal caso lo stub `attach_function` (in raw.rb) assicura che la
497
- # chiamata fallisca con LoadError chiaro al call site, non al require.
493
+ # - FPDFFont_GetFamilyName → "clean" family name (e.g. "Helvetica")
494
+ # These APIs use `c_size_t` for length/return type instead of
495
+ # `c_ulong`. On PDFium builds <= chromium/6533 they are not present:
496
+ # in that case the `attach_function` stub (in raw.rb) ensures that the
497
+ # call fails with a clear LoadError at the call site, not at require.
498
498
  attach_function :FPDFFont_GetBaseFontName,
499
499
  %i[FPDF_FONT pointer size_t], :size_t
500
500
  attach_function :FPDFFont_GetFamilyName,
501
501
  %i[FPDF_FONT pointer size_t], :size_t
502
- # Mantenuta per compatibilità con build PDFium più vecchi. Su build
503
- # nuovi può non essere presente: stesso meccanismo di stub.
502
+ # Kept for compatibility with older PDFium builds. On newer builds
503
+ # it may not be present: same stub mechanism.
504
504
  attach_function :FPDFFont_GetFontName,
505
505
  %i[FPDF_FONT pointer ulong], :ulong
506
506
  attach_function :FPDFFont_GetFlags, %i[FPDF_FONT pointer], :FPDF_BOOL
@@ -509,30 +509,30 @@ module Rpdfium
509
509
  attach_function :FPDFFont_GetItalicAngle,
510
510
  %i[FPDF_FONT pointer], :FPDF_BOOL
511
511
 
512
- # Metriche font ascendente/discendente in unità del font program.
513
- # Per ottenere il valore in coordinate pagina serve moltiplicare per
514
- # font_size del text object e poi per la scala del CTM. Utili per
515
- # baseline detection e leading di linee.
512
+ # Font ascent/descent metrics in font-program units.
513
+ # To obtain the value in page coordinates, multiply by the text object's
514
+ # font_size and then by the CTM scale. Useful for
515
+ # baseline detection and line leading.
516
516
  attach_function :FPDFFont_GetAscent, %i[FPDF_FONT int pointer], :FPDF_BOOL
517
517
  attach_function :FPDFFont_GetDescent, %i[FPDF_FONT int pointer], :FPDF_BOOL
518
518
 
519
- # Larghezza nominale di un glifo nel font program ("advance width").
520
- # È la larghezza che il PDF dichiara per quel glifo prima del kerning
521
- # applicato dagli operatori `TJ`. In combinazione con FPDFText_GetMatrix
522
- # (per la scala del CTM), permette di calcolare l'advance reale in
523
- # coordinate pagina. Equivale concettualmente all'advance che pdfminer.six
524
- # legge dal font program direttamente.
519
+ # Nominal width of a glyph in the font program ("advance width").
520
+ # It is the width the PDF declares for that glyph before the kerning
521
+ # applied by the `TJ` operators. In combination with FPDFText_GetMatrix
522
+ # (for the CTM scale), it allows the real advance in page coordinates to
523
+ # be computed. Conceptually equivalent to the advance that pdfminer.six
524
+ # reads directly from the font program.
525
525
  #
526
- # ATTENZIONE: il valore ritornato è in unità "scalate per font_size",
527
- # con font_size passato come parametro. Per la maggior parte dei PDF
528
- # generati da gestionali, il font_size è 1.0 e il CTM scala
529
- # (tipicamente 5×–10× per il rendering finale).
526
+ # WARNING: the returned value is in "font_size-scaled" units,
527
+ # with font_size passed as a parameter. For most PDFs
528
+ # generated by management software, the font_size is 1.0 and the CTM
529
+ # scales (typically 5×–10× for the final rendering).
530
530
  attach_function :FPDFFont_GetGlyphWidth,
531
531
  %i[FPDF_FONT uint float pointer], :FPDF_BOOL
532
532
 
533
- # NOTA: FPDFText_GetMatrix è già attaccata sopra (sezione text page).
534
- # In combinazione con FPDFFont_GetGlyphWidth, permette di calcolare
535
- # l'advance del glifo in coordinate pagina come
533
+ # NOTE: FPDFText_GetMatrix is already attached above (text page section).
534
+ # In combination with FPDFFont_GetGlyphWidth, it allows the glyph advance
535
+ # in page coordinates to be computed as
536
536
  # `glyph_width × |FPDFText_GetMatrix.a|`.
537
537
 
538
538
  # =========================================================================
@@ -563,16 +563,16 @@ module Rpdfium
563
563
  # =========================================================================
564
564
  # Forms
565
565
  # =========================================================================
566
- # FPDF_FORMFILLINFO è una struct ricca (~70 campi negli ultimi build).
567
- # Per la sola ESTRAZIONE basta passare una versione minima con version=2
568
- # e tutti i callback nulli — PDFium tollera NULL su quelli non chiamati
569
- # in modalità read-only (no JavaScript, no XFA).
566
+ # FPDF_FORMFILLINFO is a rich struct (~70 fields in the latest builds).
567
+ # For EXTRACTION alone it is enough to pass a minimal version with version=2
568
+ # and all callbacks null — PDFium tolerates NULL on those not called
569
+ # in read-only mode (no JavaScript, no XFA).
570
570
  class FPDF_FORMFILLINFO < FFI::Struct
571
- # Tieni allineato all'header pubblico fpdf_formfill.h. Il campo critico è
572
- # `version` — se sbagli, init fallisce silenziosamente. Per uso read-only
573
- # basta version=2 + tutti gli altri zero/NULL. Allochiamo un buffer molto
574
- # generoso (256 puntatori) per essere robusti a future estensioni
575
- # dell'header.
571
+ # Keep aligned with the public header fpdf_formfill.h. The critical field
572
+ # is `version` — if it is wrong, init fails silently. For read-only use
573
+ # version=2 + all others zero/NULL is enough. We allocate a very
574
+ # generous buffer (256 pointers) to be robust against future extensions
575
+ # of the header.
576
576
  layout :version, :int,
577
577
  :_callbacks, [:pointer, 256]
578
578
  end
@@ -625,23 +625,23 @@ module Rpdfium
625
625
  %i[FPDF_ATTACHMENT pointer ulong pointer], :FPDF_BOOL
626
626
 
627
627
  # =========================================================================
628
- # Structure tree (per PDF tagged estrazione semantica robusta)
628
+ # Structure tree (for tagged PDF → robust semantic extraction)
629
629
  # =========================================================================
630
630
  #
631
- # Per PDF "tagged" (PDF/UA, esport da Word/LibreOffice/InDesign), il
632
- # `StructTreeRoot` espone una struttura logica del documento (Document
633
- # → P, H1, Table, TR, TH, TD, Figure...) indipendente dal layout grafico.
634
- # Ogni element può essere collegato al testo della pagina tramite
635
- # `MarkedContentID`: i page objects con lo stesso MCID appartengono
636
- # semanticamente a quell'element.
631
+ # For "tagged" PDFs (PDF/UA, exports from Word/LibreOffice/InDesign), the
632
+ # `StructTreeRoot` exposes a logical structure of the document (Document
633
+ # → P, H1, Table, TR, TH, TD, Figure...) independent of the graphical
634
+ # layout. Each element can be linked to the page text via
635
+ # `MarkedContentID`: page objects with the same MCID belong
636
+ # semantically to that element.
637
637
  #
638
- # Su PDF NON tagged (la maggior parte dei gestionali italiani):
639
- # FPDF_StructTree_GetForPage ritorna NULL.
638
+ # On NON-tagged PDFs (most Italian management-software output):
639
+ # FPDF_StructTree_GetForPage returns NULL.
640
640
  #
641
- # Su PDF "tagged ma vuoto" (es. CR Banca d'Italia, dove il
642
- # StructTreeRoot esiste con 700+ entries ma tutti gli elementi sono
643
- # placeholder senza type/MCID): il tree è present ma walk produce
644
- # output vuoto. Vedi `Rpdfium::Structure::Tree#empty?`.
641
+ # On "tagged but empty" PDFs (e.g. a Banca d'Italia CR, where the
642
+ # StructTreeRoot exists with 700+ entries but all elements are
643
+ # placeholders without type/MCID): the tree is present but the walk
644
+ # produces empty output. See `Rpdfium::Structure::Tree#empty?`.
645
645
  typedef :pointer, :FPDF_STRUCTELEMENT_ATTR
646
646
  typedef :pointer, :FPDF_STRUCTELEMENT_ATTR_VALUE
647
647
 
@@ -653,7 +653,7 @@ module Rpdfium
653
653
  attach_function :FPDF_StructTree_GetChildAtIndex,
654
654
  %i[FPDF_STRUCTTREE int], :FPDF_STRUCTELEMENT
655
655
 
656
- # Navigazione del tree
656
+ # Tree navigation
657
657
  attach_function :FPDF_StructElement_CountChildren,
658
658
  %i[FPDF_STRUCTELEMENT], :int
659
659
  attach_function :FPDF_StructElement_GetChildAtIndex,
@@ -661,7 +661,7 @@ module Rpdfium
661
661
  attach_function :FPDF_StructElement_GetParent,
662
662
  %i[FPDF_STRUCTELEMENT], :FPDF_STRUCTELEMENT
663
663
 
664
- # Identificazione element
664
+ # Element identification
665
665
  attach_function :FPDF_StructElement_GetType,
666
666
  %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
667
667
  attach_function :FPDF_StructElement_GetObjType,
@@ -673,7 +673,7 @@ module Rpdfium
673
673
  attach_function :FPDF_StructElement_GetLang,
674
674
  %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
675
675
 
676
- # Testo "logico" overrides (accessibility, ligature resolution)
676
+ # "Logical" text overrides (accessibility, ligature resolution)
677
677
  attach_function :FPDF_StructElement_GetActualText,
678
678
  %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
679
679
  attach_function :FPDF_StructElement_GetAltText,
@@ -681,10 +681,10 @@ module Rpdfium
681
681
  attach_function :FPDF_StructElement_GetExpansion,
682
682
  %i[FPDF_STRUCTELEMENT pointer ulong], :ulong
683
683
 
684
- # Marked content IDs (collegano elementi → page objects con stesso MCID)
685
- # GetMarkedContentID ritorna il primo MCID (per back-compat).
686
- # GetMarkedContentIdCount + IdAtIndex per elementi con multiple MCID.
687
- # GetChildMarkedContentID: MCID del figlio se è un MCR diretto.
684
+ # Marked content IDs (link elements → page objects with the same MCID)
685
+ # GetMarkedContentID returns the first MCID (for back-compat).
686
+ # GetMarkedContentIdCount + IdAtIndex for elements with multiple MCIDs.
687
+ # GetChildMarkedContentID: MCID of the child if it is a direct MCR.
688
688
  attach_function :FPDF_StructElement_GetMarkedContentID,
689
689
  %i[FPDF_STRUCTELEMENT], :int
690
690
  attach_function :FPDF_StructElement_GetMarkedContentIdCount,
@@ -694,9 +694,9 @@ module Rpdfium
694
694
  attach_function :FPDF_StructElement_GetChildMarkedContentID,
695
695
  %i[FPDF_STRUCTELEMENT int], :int
696
696
 
697
- # Attributi PDF strutturali (RowSpan, ColSpan, Scope, Headers, ecc.)
698
- # Sono in una sotto-API: ogni element ha 0+ attribute objects, ognuno
699
- # con 0+ key/value pairs.
697
+ # Structural PDF attributes (RowSpan, ColSpan, Scope, Headers, etc.)
698
+ # They live in a sub-API: each element has 0+ attribute objects, each
699
+ # with 0+ key/value pairs.
700
700
  attach_function :FPDF_StructElement_GetAttributeCount,
701
701
  %i[FPDF_STRUCTELEMENT], :int
702
702
  attach_function :FPDF_StructElement_GetAttributeAtIndex,
@@ -704,7 +704,7 @@ module Rpdfium
704
704
  attach_function :FPDF_StructElement_GetStringAttribute,
705
705
  %i[FPDF_STRUCTELEMENT string pointer ulong], :ulong
706
706
 
707
- # Attribute getters: enumerazione key/value
707
+ # Attribute getters: key/value enumeration
708
708
  attach_function :FPDF_StructElement_Attr_GetCount,
709
709
  %i[FPDF_STRUCTELEMENT_ATTR], :int
710
710
  attach_function :FPDF_StructElement_Attr_GetName,
@@ -725,7 +725,7 @@ module Rpdfium
725
725
  attach_function :FPDF_StructElement_Attr_GetBlobValue,
726
726
  %i[FPDF_STRUCTELEMENT_ATTR_VALUE pointer ulong pointer],
727
727
  :FPDF_BOOL
728
- # Attribute con value che è un altro array (es. Headers che è array di IDs)
728
+ # Attribute whose value is another array (e.g. Headers, an array of IDs)
729
729
  attach_function :FPDF_StructElement_Attr_CountChildren,
730
730
  %i[FPDF_STRUCTELEMENT_ATTR_VALUE], :int
731
731
  attach_function :FPDF_StructElement_Attr_GetChildAtIndex,
@@ -735,17 +735,17 @@ module Rpdfium
735
735
  # =========================================================================
736
736
  # Page box geometry — media/crop/bleed/trim/art box
737
737
  # =========================================================================
738
- # Ogni pagina PDF ha fino a 5 box rettangolari, in coordinate bottom-up:
739
- # - media: l'area fisica completa della pagina (sempre presente)
740
- # - crop: la sotto-area visibile (default = media se non specificata)
741
- # - bleed: area utile per stampa con marginatura (rare)
742
- # - trim: area finale di taglio (rare, per pre-stampa)
743
- # - art: area di contenuto significativo (rare)
738
+ # Each PDF page has up to 5 rectangular boxes, in bottom-up coordinates:
739
+ # - media: the complete physical area of the page (always present)
740
+ # - crop: the visible sub-area (default = media if not specified)
741
+ # - bleed: usable area for printing with bleed margins (rare)
742
+ # - trim: final cut area (rare, for pre-press)
743
+ # - art: area of significant content (rare)
744
744
  #
745
- # In pdfplumber sono esposte come `page.mediabox`, `page.cropbox`, ecc.
746
- # Senza accesso a cropbox, una libreria di estrazione PDF non può sapere
747
- # qual è l'area "visibile" della pagina vs quella "fisica".
748
- # Tutte ritornano FPDF_BOOL: 0 se il box non è definito.
745
+ # In pdfplumber these are exposed as `page.mediabox`, `page.cropbox`, etc.
746
+ # Without access to the cropbox, a PDF extraction library cannot know
747
+ # which is the "visible" area of the page vs the "physical" one.
748
+ # They all return FPDF_BOOL: 0 if the box is not defined.
749
749
  attach_function :FPDFPage_GetMediaBox,
750
750
  %i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
751
751
  attach_function :FPDFPage_GetCropBox,
@@ -758,25 +758,25 @@ module Rpdfium
758
758
  %i[FPDF_PAGE pointer pointer pointer pointer], :FPDF_BOOL
759
759
 
760
760
  # =========================================================================
761
- # Page object: stato, bounds rotati, dash pattern, marked content
761
+ # Page object: state, rotated bounds, dash pattern, marked content
762
762
  # =========================================================================
763
- # `FPDFPageObj_GetIsActive`: alcuni page object possono essere "inattivi"
764
- # (es. nascosti da Optional Content / livelli disabilitati). Senza
765
- # questo check, l'estrazione includerebbe contenuto non visibile.
766
- # Restituisce 0/1 in *out_active.
763
+ # `FPDFPageObj_GetIsActive`: some page objects may be "inactive"
764
+ # (e.g. hidden by Optional Content / disabled layers). Without
765
+ # this check, extraction would include non-visible content.
766
+ # Returns 0/1 in *out_active.
767
767
  attach_function :FPDFPageObj_GetIsActive,
768
768
  %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
769
769
 
770
- # `FPDFPageObj_GetRotatedBounds`: bbox in 4 punti (FS_QUADPOINTSF) per
771
- # oggetti ruotati. La GetBounds standard ritorna l'AABB (Axis-Aligned
772
- # Bounding Box), inutile per oggetti a 45°/90°. Per testo verticale o
773
- # ruotato, questo è il bbox "vero".
770
+ # `FPDFPageObj_GetRotatedBounds`: bbox as 4 points (FS_QUADPOINTSF) for
771
+ # rotated objects. The standard GetBounds returns the AABB (Axis-Aligned
772
+ # Bounding Box), useless for objects at 45°/90°. For vertical or
773
+ # rotated text, this is the "true" bbox.
774
774
  attach_function :FPDFPageObj_GetRotatedBounds,
775
775
  %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
776
776
 
777
- # Dash pattern: utile in `line_segments` per filtrare linee guida
778
- # tratteggiate (spesso usate come "non-printing" hints nei template).
779
- # Le linee dashed possono confondere la detection cellule tabelle.
777
+ # Dash pattern: useful in `line_segments` to filter out dashed
778
+ # guide lines (often used as "non-printing" hints in templates).
779
+ # Dashed lines can confuse table cell detection.
780
780
  attach_function :FPDFPageObj_GetDashCount,
781
781
  %i[FPDF_PAGEOBJECT], :int
782
782
  attach_function :FPDFPageObj_GetDashArray,
@@ -784,12 +784,12 @@ module Rpdfium
784
784
  attach_function :FPDFPageObj_GetDashPhase,
785
785
  %i[FPDF_PAGEOBJECT pointer], :FPDF_BOOL
786
786
 
787
- # Marked content (Tagged PDF) — operatori BMC/BDC del content stream.
788
- # In PDF strutturati (PDF/UA, Word→PDF, InDesign export), gli operatori
789
- # `/Span BMC ... EMC` o `/Span <</MCID 12>> BDC ... EMC` raggruppano
790
- # semanticamente i char. Per PDF generati da gestionali italiani questi
791
- # tag NON sono presenti; per PDF "tagged" sono il modo più affidabile
792
- # di raggruppare token.
787
+ # Marked content (Tagged PDF) — BMC/BDC operators of the content stream.
788
+ # In structured PDFs (PDF/UA, Word→PDF, InDesign export), the operators
789
+ # `/Span BMC ... EMC` or `/Span <</MCID 12>> BDC ... EMC` group
790
+ # chars semantically. For PDFs generated by Italian management software
791
+ # these tags are NOT present; for "tagged" PDFs they are the most reliable
792
+ # way to group tokens.
793
793
  attach_function :FPDFPageObj_CountMarks,
794
794
  %i[FPDF_PAGEOBJECT], :int
795
795
  attach_function :FPDFPageObj_GetMark,
@@ -814,23 +814,23 @@ module Rpdfium
814
814
  # =========================================================================
815
815
  # Catalog / Document metadata
816
816
  # =========================================================================
817
- # FPDFCatalog_GetLanguage: lingua dichiarata dal documento (es. "it-IT").
818
- # Utile per pipeline di estrazione che vogliono switchare regole
819
- # language-specific (es. tokenizer di parole, lookup hyphen).
817
+ # FPDFCatalog_GetLanguage: language declared by the document (e.g. "it-IT").
818
+ # Useful for extraction pipelines that want to switch language-specific
819
+ # rules (e.g. word tokenizer, hyphen lookup).
820
820
  attach_function :FPDFCatalog_GetLanguage,
821
821
  %i[FPDF_DOCUMENT pointer ulong], :ulong
822
822
 
823
- # FPDFDoc_GetPageMode: stato di apertura PDF (es. PageMode.UseOutlines,
824
- # PageMode.FullScreen). Numeric. Utile per editor PDF/viewer building.
823
+ # FPDFDoc_GetPageMode: PDF open state (e.g. PageMode.UseOutlines,
824
+ # PageMode.FullScreen). Numeric. Useful for PDF editor/viewer building.
825
825
  attach_function :FPDFDoc_GetPageMode, %i[FPDF_DOCUMENT], :int
826
826
 
827
827
  # =========================================================================
828
- # Links (annotation Link e LinkAtPoint per ricerca per coordinata)
828
+ # Links (Link annotation and LinkAtPoint for coordinate-based lookup)
829
829
  # =========================================================================
830
- # `FPDFLink_GetLinkAtPoint`: dato (x, y) in coordinate pagina, ritorna
831
- # il link annotation che lo contiene. Cuore della funzione "click handling"
832
- # in viewer / OCR-style "extract links". Pdfplumber espone simile via
833
- # `page.hyperlinks`.
830
+ # `FPDFLink_GetLinkAtPoint`: given (x, y) in page coordinates, returns
831
+ # the link annotation that contains it. The core of "click handling"
832
+ # in viewers / OCR-style "extract links". Pdfplumber exposes something
833
+ # similar via `page.hyperlinks`.
834
834
  attach_function :FPDFLink_GetLinkAtPoint,
835
835
  %i[FPDF_PAGE double double], :FPDF_LINK
836
836
  attach_function :FPDFLink_GetLinkZOrderAtPoint,
@@ -839,37 +839,37 @@ module Rpdfium
839
839
  %i[FPDF_PAGE FPDF_LINK], :FPDF_ANNOTATION
840
840
  attach_function :FPDFLink_GetAnnotRect,
841
841
  %i[FPDF_LINK pointer], :FPDF_BOOL
842
- # FPDFLink_GetTextRange: range di char_index nella text page corrispondenti
843
- # al link. Permette di mappare hyperlink → testo della pagina.
842
+ # FPDFLink_GetTextRange: range of char_index in the text page corresponding
843
+ # to the link. Allows mapping hyperlink → page text.
844
844
  attach_function :FPDFLink_GetTextRange,
845
845
  %i[FPDF_LINK pointer pointer], :FPDF_BOOL
846
- # Rect e QuadPoints: geometria del link (rectangle o quadrilatero per
847
- # link che attraversano più righe).
846
+ # Rect and QuadPoints: link geometry (rectangle or quadrilateral for
847
+ # links that span multiple lines).
848
848
  attach_function :FPDFLink_GetRect,
849
849
  %i[FPDF_LINK int pointer], :FPDF_BOOL
850
850
  attach_function :FPDFLink_GetQuadPoints,
851
851
  %i[FPDF_LINK int pointer], :FPDF_BOOL
852
852
 
853
853
  # =========================================================================
854
- # Action / Destination (estensioni outline + link)
854
+ # Action / Destination (outline + link extensions)
855
855
  # =========================================================================
856
- # FPDFAction_GetDest: per action di tipo "GoTo", ritorna il FPDF_DEST.
857
- # FPDFAction_GetFilePath: per action "Launch" o "RemoteGoTo", path del file
858
- # esterno target.
856
+ # FPDFAction_GetDest: for "GoTo"-type actions, returns the FPDF_DEST.
857
+ # FPDFAction_GetFilePath: for "Launch" or "RemoteGoTo" actions, the path of
858
+ # the target external file.
859
859
  attach_function :FPDFAction_GetDest,
860
860
  %i[FPDF_DOCUMENT FPDF_ACTION], :FPDF_DEST
861
861
  attach_function :FPDFAction_GetFilePath,
862
862
  %i[FPDF_ACTION pointer ulong], :ulong
863
- # FPDFBookmark_GetAction: action associata a un bookmark (alternativa a
864
- # GetDest se il bookmark è un'action invece di una destinazione).
863
+ # FPDFBookmark_GetAction: action associated with a bookmark (alternative to
864
+ # GetDest if the bookmark is an action instead of a destination).
865
865
  attach_function :FPDFBookmark_GetAction,
866
866
  %i[FPDF_BOOKMARK], :FPDF_ACTION
867
- # FPDFBookmark_GetCount: numero di sub-bookmark (positivo = espansi,
868
- # negativo = collassati, 0 = leaf).
867
+ # FPDFBookmark_GetCount: number of sub-bookmarks (positive = expanded,
868
+ # negative = collapsed, 0 = leaf).
869
869
  attach_function :FPDFBookmark_GetCount,
870
870
  %i[FPDF_BOOKMARK], :int
871
- # FPDFDest_GetView: tipo di view (Fit, FitH, XYZ ecc.) + parametri.
872
- # FPDFDest_GetLocationInPage: x/y/zoom estratti dal dest.
871
+ # FPDFDest_GetView: view type (Fit, FitH, XYZ, etc.) + parameters.
872
+ # FPDFDest_GetLocationInPage: x/y/zoom extracted from the dest.
873
873
  attach_function :FPDFDest_GetView,
874
874
  %i[FPDF_DEST pointer pointer], :ulong
875
875
  attach_function :FPDFDest_GetLocationInPage,
@@ -879,17 +879,17 @@ module Rpdfium
879
879
  # =========================================================================
880
880
  # Font extras: GetFontData, GetAscent, GetDescent
881
881
  # =========================================================================
882
- # Già attaccate sopra: FPDFFont_GetGlyphWidth.
883
- # Aggiungiamo: FontData (raw font program bytes — utile per inspection,
884
- # debug embedding, font substitution) e GetGlyphPath (path vettoriale di
885
- # un glifo, alternativa a GlyphWidth per font esotici).
886
- # GetFontData ha la convention bool: ritorna `out_buflen` se buf è NULL.
882
+ # Already attached above: FPDFFont_GetGlyphWidth.
883
+ # We add: FontData (raw font program bytes — useful for inspection,
884
+ # embedding debugging, font substitution) and GetGlyphPath (vector path of
885
+ # a glyph, an alternative to GlyphWidth for exotic fonts).
886
+ # GetFontData follows the bool convention: it returns `out_buflen` if buf is NULL.
887
887
  attach_function :FPDFFont_GetFontData,
888
888
  %i[FPDF_FONT pointer size_t pointer], :FPDF_BOOL
889
889
  attach_function :FPDFFont_GetGlyphPath,
890
890
  %i[FPDF_FONT uint float], :FPDF_GLYPHPATH
891
- # FPDF_GLYPHPATH: handle a un path. Lo aggiungo come typedef.
892
- # Le sue API GlyphPath_* sono niche, ma le esponiamo per simmetria.
891
+ # FPDF_GLYPHPATH: handle to a path. Added as a typedef.
892
+ # Its GlyphPath_* APIs are niche, but we expose them for symmetry.
893
893
  attach_function :FPDFGlyphPath_CountGlyphSegments,
894
894
  %i[FPDF_GLYPHPATH], :int
895
895
  attach_function :FPDFGlyphPath_GetGlyphPathSegment,
@@ -898,14 +898,14 @@ module Rpdfium
898
898
  # =========================================================================
899
899
  # Text page: char index at position
900
900
  # =========================================================================
901
- # FPDFText_GetCharIndexAtPos: dato un punto (x, y) in coord pagina,
902
- # ritorna l'indice del char più vicino (entro tolerance). Utile per
903
- # "hit test" in viewer e per mapping coord → text index nella ricerca.
901
+ # FPDFText_GetCharIndexAtPos: given a point (x, y) in page coordinates,
902
+ # returns the index of the nearest char (within tolerance). Useful for
903
+ # "hit test" in viewers and for mapping coord → text index during search.
904
904
  attach_function :FPDFText_GetCharIndexAtPos,
905
905
  %i[FPDF_TEXTPAGE double double double double], :int
906
906
  # FPDFText_GetTextIndexFromCharIndex / GetCharIndexFromTextIndex:
907
- # mappano l'indice "char" (per glifo) all'indice "text" (per codepoint
908
- # logico). I due indici differiscono per ligature/sostituzioni.
907
+ # map the "char" index (per glyph) to the "text" index (per logical
908
+ # codepoint). The two indices differ due to ligatures/substitutions.
909
909
  attach_function :FPDFText_GetTextIndexFromCharIndex,
910
910
  %i[FPDF_TEXTPAGE int], :int
911
911
  attach_function :FPDFText_GetCharIndexFromTextIndex,
@@ -914,27 +914,27 @@ module Rpdfium
914
914
  # =========================================================================
915
915
  # Annotation extras: GetFlags, GetColor, GetBorder, AP, attachment points
916
916
  # =========================================================================
917
- # FPDFAnnot_GetFlags: bitmask di Flags (Hidden, Print, NoZoom ecc.).
918
- # Senza questo, non possiamo distinguere un annotation visibile da uno
919
- # con flag Hidden.
917
+ # FPDFAnnot_GetFlags: bitmask of Flags (Hidden, Print, NoZoom, etc.).
918
+ # Without this, we cannot distinguish a visible annotation from one
919
+ # with the Hidden flag.
920
920
  attach_function :FPDFAnnot_GetFlags, %i[FPDF_ANNOTATION], :int
921
- # Colore: stroke (BORDER_COLOR) e fill (INTERIOR_COLOR).
921
+ # Color: stroke (BORDER_COLOR) and fill (INTERIOR_COLOR).
922
922
  attach_function :FPDFAnnot_GetColor,
923
923
  %i[FPDF_ANNOTATION int pointer pointer pointer pointer],
924
924
  :FPDF_BOOL
925
- # Border: spessore, raggio orizzontale/verticale, dash array count.
925
+ # Border: thickness, horizontal/vertical radius, dash array count.
926
926
  attach_function :FPDFAnnot_GetBorder,
927
927
  %i[FPDF_ANNOTATION pointer pointer pointer], :FPDF_BOOL
928
- # AP (Appearance Stream): forma renderizzata dell'annotation in vari
929
- # modi (Normal/Rollover/Down).
928
+ # AP (Appearance Stream): rendered form of the annotation in various
929
+ # modes (Normal/Rollover/Down).
930
930
  attach_function :FPDFAnnot_GetAP,
931
931
  %i[FPDF_ANNOTATION int pointer ulong], :ulong
932
- # FileAttachment: per Annotation di sottotipo FileAttachment, ottiene
933
- # l'FPDF_ATTACHMENT.
932
+ # FileAttachment: for annotations of subtype FileAttachment, obtains
933
+ # the FPDF_ATTACHMENT.
934
934
  attach_function :FPDFAnnot_GetFileAttachment,
935
935
  %i[FPDF_ANNOTATION], :FPDF_ATTACHMENT
936
- # AttachmentPoints: per highlight/markup che attraversano più righe,
937
- # i 4 punti di ogni quadrilatero.
936
+ # AttachmentPoints: for highlight/markup spanning multiple lines,
937
+ # the 4 points of each quadrilateral.
938
938
  attach_function :FPDFAnnot_CountAttachmentPoints,
939
939
  %i[FPDF_ANNOTATION], :size_t
940
940
  attach_function :FPDFAnnot_GetAttachmentPoints,
@@ -943,11 +943,11 @@ module Rpdfium
943
943
  # =========================================================================
944
944
  # Attachment extras
945
945
  # =========================================================================
946
- # FPDFAttachment_GetSubtype: MIME-like subtype del file allegato.
946
+ # FPDFAttachment_GetSubtype: MIME-like subtype of the attached file.
947
947
  attach_function :FPDFAttachment_GetSubtype,
948
948
  %i[FPDF_ATTACHMENT pointer ulong], :ulong
949
- # FPDFAttachment_GetStringValue/HasKey: per leggere i metadati custom
950
- # del file attachment (Description, CreationDate, ecc.).
949
+ # FPDFAttachment_GetStringValue/HasKey: to read the custom metadata
950
+ # of the file attachment (Description, CreationDate, etc.).
951
951
  attach_function :FPDFAttachment_HasKey,
952
952
  %i[FPDF_ATTACHMENT string], :FPDF_BOOL
953
953
  attach_function :FPDFAttachment_GetValueType,
@@ -956,15 +956,15 @@ module Rpdfium
956
956
  %i[FPDF_ATTACHMENT string pointer ulong], :ulong
957
957
 
958
958
  # =========================================================================
959
- # Helper: leggere stringhe UTF-16LE che PDFium ritorna in bytes
959
+ # Helper: reading UTF-16LE strings that PDFium returns as bytes
960
960
  # =========================================================================
961
- # Convenzione PDFium: la maggior parte delle Get*Text/Get*Name ritornano
962
- # `unsigned long` (numero BYTES, terminatore incluso). Si chiama prima con
963
- # buffer NULL/0 per ottenere la dimensione, poi con buffer allocato.
961
+ # PDFium convention: most Get*Text/Get*Name calls return
962
+ # `unsigned long` (number of BYTES, terminator included). It is called
963
+ # first with a NULL/0 buffer to obtain the size, then with an allocated buffer.
964
964
  def self.read_utf16_string(method_name, *args)
965
965
  args_probe = args + [FFI::Pointer::NULL, 0]
966
966
  n_bytes = send(method_name, *args_probe)
967
- return "" if n_bytes <= 2 # solo terminatore null o errore
967
+ return "" if n_bytes <= 2 # only the null terminator or an error
968
968
 
969
969
  buf = FFI::MemoryPointer.new(:uchar, n_bytes)
970
970
  args_real = args + [buf, n_bytes]
@@ -972,7 +972,20 @@ module Rpdfium
972
972
  utf16_bytes_to_utf8(buf.read_bytes(n_bytes))
973
973
  end
974
974
 
975
- # PDFium ritorna UTF-16LE little-endian con terminatore null.
975
+ # Same two-call convention, but for the few APIs that return 7-bit
976
+ # ASCII bytes instead of UTF-16LE (e.g. FPDFAction_GetURIPath).
977
+ def self.read_ascii_string(method_name, *args)
978
+ args_probe = args + [FFI::Pointer::NULL, 0]
979
+ n_bytes = send(method_name, *args_probe)
980
+ return "" if n_bytes <= 1 # only the null terminator or an error
981
+
982
+ buf = FFI::MemoryPointer.new(:uchar, n_bytes)
983
+ args_real = args + [buf, n_bytes]
984
+ send(method_name, *args_real)
985
+ buf.read_bytes(n_bytes).delete("\x00").force_encoding("UTF-8")
986
+ end
987
+
988
+ # PDFium returns little-endian UTF-16LE with a null terminator.
976
989
  def self.utf16_bytes_to_utf8(bytes)
977
990
  bytes.force_encoding("UTF-16LE")
978
991
  .encode("UTF-8", invalid: :replace, undef: :replace)