rpdfium 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +1870 -0
- data/LICENSE +19 -0
- data/README.md +599 -0
- data/lib/rpdfium/annotation/annotation.rb +114 -0
- data/lib/rpdfium/document.rb +226 -0
- data/lib/rpdfium/errors.rb +55 -0
- data/lib/rpdfium/form/form.rb +121 -0
- data/lib/rpdfium/image/embedded.rb +145 -0
- data/lib/rpdfium/io/png.rb +65 -0
- data/lib/rpdfium/page.rb +1623 -0
- data/lib/rpdfium/raw.rb +982 -0
- data/lib/rpdfium/search/search.rb +101 -0
- data/lib/rpdfium/structure/attachment.rb +40 -0
- data/lib/rpdfium/structure/element.rb +330 -0
- data/lib/rpdfium/structure/outline.rb +48 -0
- data/lib/rpdfium/structure/tree.rb +202 -0
- data/lib/rpdfium/table/cells.rb +137 -0
- data/lib/rpdfium/table/debugger.rb +122 -0
- data/lib/rpdfium/table/edges.rb +225 -0
- data/lib/rpdfium/table/extractor.rb +246 -0
- data/lib/rpdfium/table/table.rb +184 -0
- data/lib/rpdfium/util/cluster.rb +143 -0
- data/lib/rpdfium/util/column_inference.rb +139 -0
- data/lib/rpdfium/util/label_matcher.rb +214 -0
- data/lib/rpdfium/util/text_extraction.rb +49 -0
- data/lib/rpdfium/util/word_extractor.rb +151 -0
- data/lib/rpdfium/util/word_merger.rb +102 -0
- data/lib/rpdfium/version.rb +5 -0
- data/lib/rpdfium.rb +92 -0
- metadata +134 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rpdfium
|
|
4
|
+
# Wrapper per FPDF_ANNOTATION. Le annotazioni includono link, highlight,
|
|
5
|
+
# commenti, widget di form. PDFium richiede di chiudere ogni handle con
|
|
6
|
+
# FPDFPage_CloseAnnot, gestito qui via finalizer.
|
|
7
|
+
class Annotation
|
|
8
|
+
SUBTYPES = {
|
|
9
|
+
Raw::FPDF_ANNOT_UNKNOWN => :unknown,
|
|
10
|
+
Raw::FPDF_ANNOT_TEXT => :text,
|
|
11
|
+
Raw::FPDF_ANNOT_LINK => :link,
|
|
12
|
+
Raw::FPDF_ANNOT_FREETEXT => :free_text,
|
|
13
|
+
Raw::FPDF_ANNOT_LINE => :line,
|
|
14
|
+
Raw::FPDF_ANNOT_SQUARE => :square,
|
|
15
|
+
Raw::FPDF_ANNOT_CIRCLE => :circle,
|
|
16
|
+
Raw::FPDF_ANNOT_HIGHLIGHT => :highlight,
|
|
17
|
+
Raw::FPDF_ANNOT_UNDERLINE => :underline,
|
|
18
|
+
Raw::FPDF_ANNOT_SQUIGGLY => :squiggly,
|
|
19
|
+
Raw::FPDF_ANNOT_STRIKEOUT => :strikeout,
|
|
20
|
+
Raw::FPDF_ANNOT_STAMP => :stamp,
|
|
21
|
+
Raw::FPDF_ANNOT_INK => :ink,
|
|
22
|
+
Raw::FPDF_ANNOT_POPUP => :popup,
|
|
23
|
+
Raw::FPDF_ANNOT_FILEATTACHMENT => :file_attachment,
|
|
24
|
+
Raw::FPDF_ANNOT_WIDGET => :widget,
|
|
25
|
+
Raw::FPDF_ANNOT_REDACT => :redact
|
|
26
|
+
}.freeze
|
|
27
|
+
|
|
28
|
+
attr_reader :page, :index
|
|
29
|
+
|
|
30
|
+
def initialize(page, index)
|
|
31
|
+
@page = page
|
|
32
|
+
@index = index
|
|
33
|
+
handle = Raw.FPDFPage_GetAnnot(page.handle, index)
|
|
34
|
+
raise Error, "Could not load annotation #{index}" if handle.null?
|
|
35
|
+
|
|
36
|
+
@state = { handle: handle, closed: false }
|
|
37
|
+
ObjectSpace.define_finalizer(self, self.class.finalizer(@state))
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.finalizer(state)
|
|
41
|
+
proc do
|
|
42
|
+
next if state[:closed]
|
|
43
|
+
next if state[:handle].null?
|
|
44
|
+
|
|
45
|
+
Raw.FPDFPage_CloseAnnot(state[:handle])
|
|
46
|
+
state[:closed] = true
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def handle
|
|
51
|
+
@state[:handle]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def subtype
|
|
55
|
+
SUBTYPES[Raw.FPDFAnnot_GetSubtype(@state[:handle])] || :unknown
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def bbox
|
|
59
|
+
rect = Raw::FS_RECTF.new
|
|
60
|
+
return nil if Raw.FPDFAnnot_GetRect(@state[:handle], rect) == 0
|
|
61
|
+
|
|
62
|
+
h = @page.height
|
|
63
|
+
{ x0: rect[:left], x1: rect[:right],
|
|
64
|
+
top: h - rect[:top], bottom: h - rect[:bottom] }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Valore di una chiave del dict di annotazione (UTF-16LE).
|
|
68
|
+
# Chiavi comuni: "Contents" (testo annotazione), "T" (autore),
|
|
69
|
+
# "M" (mod date), "NM" (uniq name).
|
|
70
|
+
def [](key)
|
|
71
|
+
Raw.read_utf16_string(:FPDFAnnot_GetStringValue, @state[:handle], key.to_s)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def has_key?(key)
|
|
75
|
+
Raw.FPDFAnnot_HasKey(@state[:handle], key.to_s) == 1
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Per annotazioni :link → URL di destinazione (se esterno) o nil.
|
|
79
|
+
def link_uri
|
|
80
|
+
return nil unless subtype == :link
|
|
81
|
+
|
|
82
|
+
link_handle = Raw.FPDFAnnot_GetLink(@state[:handle])
|
|
83
|
+
return nil if link_handle.null?
|
|
84
|
+
|
|
85
|
+
action = Raw.FPDFLink_GetAction(link_handle)
|
|
86
|
+
return nil if action.null?
|
|
87
|
+
|
|
88
|
+
Raw.read_utf16_string(:FPDFAction_GetURIPath, @page.document.handle, action)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Per link interni → indice pagina di destinazione, o nil.
|
|
92
|
+
def link_dest_page
|
|
93
|
+
return nil unless subtype == :link
|
|
94
|
+
|
|
95
|
+
link_handle = Raw.FPDFAnnot_GetLink(@state[:handle])
|
|
96
|
+
return nil if link_handle.null?
|
|
97
|
+
|
|
98
|
+
dest = Raw.FPDFLink_GetDest(@page.document.handle, link_handle)
|
|
99
|
+
return nil if dest.null?
|
|
100
|
+
|
|
101
|
+
idx = Raw.FPDFDest_GetDestPageIndex(@page.document.handle, dest)
|
|
102
|
+
idx >= 0 ? idx : nil
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def close
|
|
106
|
+
return if @state[:closed]
|
|
107
|
+
|
|
108
|
+
Raw.FPDFPage_CloseAnnot(@state[:handle]) unless @state[:handle].null?
|
|
109
|
+
@state[:handle] = FFI::Pointer::NULL
|
|
110
|
+
@state[:closed] = true
|
|
111
|
+
ObjectSpace.undefine_finalizer(self)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rpdfium
|
|
4
|
+
# Wrapper di livello documento. Espone:
|
|
5
|
+
# - apertura da path / IO / bytes / pagina by index
|
|
6
|
+
# - metadata (Title, Author, ecc.)
|
|
7
|
+
# - permissions
|
|
8
|
+
# - outline (bookmarks)
|
|
9
|
+
# - attachments
|
|
10
|
+
# - form environment (lazy)
|
|
11
|
+
class Document
|
|
12
|
+
include Enumerable
|
|
13
|
+
|
|
14
|
+
META_KEYS = %w[Title Author Subject Keywords Creator Producer
|
|
15
|
+
CreationDate ModDate Trapped].freeze
|
|
16
|
+
|
|
17
|
+
attr_reader :source
|
|
18
|
+
|
|
19
|
+
def self.open(input, password: nil, &block)
|
|
20
|
+
doc = new(input, password: password)
|
|
21
|
+
return doc unless block_given?
|
|
22
|
+
|
|
23
|
+
begin
|
|
24
|
+
yield doc
|
|
25
|
+
ensure
|
|
26
|
+
doc.close
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def initialize(input, password: nil)
|
|
31
|
+
Rpdfium.init!
|
|
32
|
+
@password = password
|
|
33
|
+
@source = input
|
|
34
|
+
handle, retain_buffer = load_handle(input, password)
|
|
35
|
+
if handle.null?
|
|
36
|
+
code = Rpdfium.last_error_code
|
|
37
|
+
msg = Rpdfium.last_error_message
|
|
38
|
+
raise PasswordError, msg if code == 4
|
|
39
|
+
|
|
40
|
+
raise LoadError, "Failed to load PDF: #{msg}"
|
|
41
|
+
end
|
|
42
|
+
# Stato condiviso tra istanza e finalizer. Wrappato in Hash mutabile
|
|
43
|
+
# perché la closure del finalizer e il close() esplicito devono vedere
|
|
44
|
+
# lo stesso :closed flag — altrimenti chi arriva secondo richiama
|
|
45
|
+
# FPDF_CloseDocument su un handle già liberato e PDFium segfaulta.
|
|
46
|
+
@state = {
|
|
47
|
+
handle: handle,
|
|
48
|
+
retain_buffer: retain_buffer,
|
|
49
|
+
closed: false
|
|
50
|
+
}
|
|
51
|
+
@form_env = nil
|
|
52
|
+
@page_cache = {}
|
|
53
|
+
# IMPORTANTE: il finalizer cattura @state (Hash), NON self. Catturare
|
|
54
|
+
# self impedirebbe al GC di raccogliere il Document. Inoltre il
|
|
55
|
+
# finalizer NON tocca @page_cache: le Page hanno il loro finalizer
|
|
56
|
+
# individuale, e l'ordine di esecuzione tra finalizer è non
|
|
57
|
+
# deterministico in Ruby.
|
|
58
|
+
ObjectSpace.define_finalizer(self, self.class.finalizer(@state))
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def self.finalizer(state)
|
|
62
|
+
proc do
|
|
63
|
+
next if state[:closed]
|
|
64
|
+
next if state[:handle].null?
|
|
65
|
+
|
|
66
|
+
Raw.FPDF_CloseDocument(state[:handle])
|
|
67
|
+
state[:closed] = true
|
|
68
|
+
state[:retain_buffer] = nil
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def handle
|
|
73
|
+
@state[:handle]
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# ===== Pages =====
|
|
77
|
+
|
|
78
|
+
def page_count
|
|
79
|
+
ensure_open!
|
|
80
|
+
Raw.FPDF_GetPageCount(@state[:handle])
|
|
81
|
+
end
|
|
82
|
+
alias size page_count
|
|
83
|
+
alias length page_count
|
|
84
|
+
|
|
85
|
+
def page(index)
|
|
86
|
+
ensure_open!
|
|
87
|
+
raise PageError, "Page index #{index} out of range" unless (0...page_count).cover?(index)
|
|
88
|
+
|
|
89
|
+
# Le pagine sono cacheable: ricaricarle è costoso e gli oggetti sono
|
|
90
|
+
# immutabili dal punto di vista applicativo (in modalità read-only).
|
|
91
|
+
@page_cache[index] ||= Page.new(self, index)
|
|
92
|
+
end
|
|
93
|
+
alias [] page
|
|
94
|
+
|
|
95
|
+
def each
|
|
96
|
+
return enum_for(:each) unless block_given?
|
|
97
|
+
|
|
98
|
+
page_count.times { |i| yield page(i) }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def page_label(index)
|
|
102
|
+
Raw.read_utf16_string(:FPDF_GetPageLabel, @state[:handle], index)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# ===== Metadata =====
|
|
106
|
+
|
|
107
|
+
def metadata
|
|
108
|
+
META_KEYS.each_with_object({}) do |key, h|
|
|
109
|
+
v = Raw.read_utf16_string(:FPDF_GetMetaText, @state[:handle], key)
|
|
110
|
+
h[key.downcase.to_sym] = v unless v.empty?
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def file_version
|
|
115
|
+
buf = FFI::MemoryPointer.new(:int)
|
|
116
|
+
return nil if Raw.FPDF_GetFileVersion(@state[:handle], buf) == 0
|
|
117
|
+
|
|
118
|
+
v = buf.read_int
|
|
119
|
+
# PDFium ritorna 14 → 1.4, 17 → 1.7
|
|
120
|
+
"#{v / 10}.#{v % 10}"
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Permission bits secondo PDF spec (Table 22 §7.6.3.2)
|
|
124
|
+
PERMISSIONS = {
|
|
125
|
+
print: 1 << 2,
|
|
126
|
+
modify: 1 << 3,
|
|
127
|
+
copy: 1 << 4,
|
|
128
|
+
annotate: 1 << 5,
|
|
129
|
+
fill_forms: 1 << 8,
|
|
130
|
+
extract_acc: 1 << 9,
|
|
131
|
+
assemble: 1 << 10,
|
|
132
|
+
print_hq: 1 << 11
|
|
133
|
+
}.freeze
|
|
134
|
+
|
|
135
|
+
def permissions
|
|
136
|
+
bits = Raw.FPDF_GetDocPermissions(@state[:handle])
|
|
137
|
+
PERMISSIONS.transform_values { |mask| (bits & mask) == mask }
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# ===== Form type =====
|
|
141
|
+
|
|
142
|
+
FORM_TYPES = {
|
|
143
|
+
Raw::FORMTYPE_NONE => :none,
|
|
144
|
+
Raw::FORMTYPE_ACRO_FORM => :acroform,
|
|
145
|
+
Raw::FORMTYPE_XFA_FULL => :xfa_full,
|
|
146
|
+
Raw::FORMTYPE_XFA_FOREGROUND => :xfa_foreground
|
|
147
|
+
}.freeze
|
|
148
|
+
|
|
149
|
+
def form_type
|
|
150
|
+
FORM_TYPES[Raw.FPDF_GetFormType(@state[:handle])] || :unknown
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def has_forms?
|
|
154
|
+
form_type != :none
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Lazy form environment. Necessario per:
|
|
158
|
+
# - leggere FormFieldType/Value/Name su widget annotations
|
|
159
|
+
# - renderizzare i form fields sopra la pagina (FFLDraw)
|
|
160
|
+
def form_env
|
|
161
|
+
@form_env ||= Form::Environment.new(self) if has_forms?
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# ===== Outline =====
|
|
165
|
+
|
|
166
|
+
def outline
|
|
167
|
+
Outline.from_document(self)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# ===== Attachments =====
|
|
171
|
+
|
|
172
|
+
def attachments
|
|
173
|
+
n = Raw.FPDFDoc_GetAttachmentCount(@state[:handle])
|
|
174
|
+
Array.new(n) { |i| Attachment.new(self, i) }
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# ===== Close =====
|
|
178
|
+
|
|
179
|
+
def close
|
|
180
|
+
return if @state[:closed]
|
|
181
|
+
|
|
182
|
+
# Ordine: chiudi prima form env e pagine cached, poi documento.
|
|
183
|
+
@form_env&.close
|
|
184
|
+
@page_cache.each_value(&:close)
|
|
185
|
+
@page_cache.clear
|
|
186
|
+
Raw.FPDF_CloseDocument(@state[:handle]) unless @state[:handle].null?
|
|
187
|
+
@state[:handle] = FFI::Pointer::NULL
|
|
188
|
+
@state[:retain_buffer] = nil
|
|
189
|
+
@state[:closed] = true
|
|
190
|
+
ObjectSpace.undefine_finalizer(self)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def closed?
|
|
194
|
+
@state[:closed]
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
private
|
|
198
|
+
|
|
199
|
+
def ensure_open!
|
|
200
|
+
raise Error, "Document is closed" if @state[:closed]
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def load_handle(input, password)
|
|
204
|
+
case input
|
|
205
|
+
when String
|
|
206
|
+
if File.file?(input)
|
|
207
|
+
[Raw.FPDF_LoadDocument(input, password), nil]
|
|
208
|
+
else
|
|
209
|
+
load_from_bytes(input, password)
|
|
210
|
+
end
|
|
211
|
+
when IO, StringIO
|
|
212
|
+
load_from_bytes(input.read, password)
|
|
213
|
+
else
|
|
214
|
+
raise ArgumentError, "Unsupported input: #{input.class}"
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def load_from_bytes(bytes, password)
|
|
219
|
+
# CRITICO: PDFium NON copia i bytes — li referenzia. Dobbiamo tenere
|
|
220
|
+
# vivo il buffer per tutta la vita del documento.
|
|
221
|
+
buf = FFI::MemoryPointer.new(:uchar, bytes.bytesize)
|
|
222
|
+
buf.put_bytes(0, bytes)
|
|
223
|
+
[Raw.FPDF_LoadMemDocument64(buf, bytes.bytesize, password), buf]
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rpdfium
|
|
4
|
+
class Error < StandardError; end
|
|
5
|
+
class LoadError < Error; end
|
|
6
|
+
class PageError < Error; end
|
|
7
|
+
class PasswordError < Error; end
|
|
8
|
+
class FormError < Error; end
|
|
9
|
+
|
|
10
|
+
PDFIUM_ERRORS = {
|
|
11
|
+
0 => "Success",
|
|
12
|
+
1 => "Unknown error",
|
|
13
|
+
2 => "File not found or could not be opened",
|
|
14
|
+
3 => "File not in PDF format or corrupted",
|
|
15
|
+
4 => "Password required or incorrect",
|
|
16
|
+
5 => "Unsupported security scheme",
|
|
17
|
+
6 => "Page not found or content error"
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
class << self
|
|
21
|
+
def init!
|
|
22
|
+
@init_mutex ||= Mutex.new
|
|
23
|
+
@init_mutex.synchronize do
|
|
24
|
+
return if @initialized
|
|
25
|
+
|
|
26
|
+
unless Raw.native_loaded?
|
|
27
|
+
raise LoadError, <<~MSG.strip
|
|
28
|
+
PDFium native library not loaded.
|
|
29
|
+
Set ENV["PDFIUM_LIBRARY_PATH"] to libpdfium.{so,dylib,dll}, or
|
|
30
|
+
install the rpdfium-binary gem.
|
|
31
|
+
Original load error: #{Raw.load_error&.message}
|
|
32
|
+
MSG
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
Raw.FPDF_InitLibrary
|
|
36
|
+
@initialized = true
|
|
37
|
+
# Cleanup automatico a process exit. Ordine garantito: tutti i
|
|
38
|
+
# finalizer Ruby vengono eseguiti prima di at_exit dei nostri blocchi.
|
|
39
|
+
at_exit { Raw.FPDF_DestroyLibrary if @initialized }
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def initialized?
|
|
44
|
+
@initialized == true
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def last_error_code
|
|
48
|
+
Raw.FPDF_GetLastError
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def last_error_message
|
|
52
|
+
PDFIUM_ERRORS[last_error_code] || "Unknown PDFium error (#{last_error_code})"
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rpdfium
|
|
4
|
+
module Form
|
|
5
|
+
# FPDF_FORMHANDLE è necessario per leggere widget annotations.
|
|
6
|
+
# In modalità read-only basta inizializzarlo con una FORMFILLINFO minimale
|
|
7
|
+
# (version=2, callbacks NULL). PDFium chiama i callback solo durante
|
|
8
|
+
# interazione utente o JavaScript, che noi non usiamo.
|
|
9
|
+
class Environment
|
|
10
|
+
attr_reader :document
|
|
11
|
+
|
|
12
|
+
def initialize(document)
|
|
13
|
+
@document = document
|
|
14
|
+
@info = Raw::FPDF_FORMFILLINFO.new
|
|
15
|
+
@info[:version] = 2
|
|
16
|
+
# Tutti i puntatori restano NULL (default di FFI::Struct).
|
|
17
|
+
handle = Raw.FPDFDOC_InitFormFillEnvironment(document.handle, @info)
|
|
18
|
+
if handle.null?
|
|
19
|
+
raise FormError,
|
|
20
|
+
"FPDFDOC_InitFormFillEnvironment failed (form_type=#{document.form_type})"
|
|
21
|
+
end
|
|
22
|
+
@state = { handle: handle, closed: false }
|
|
23
|
+
ObjectSpace.define_finalizer(self, self.class.finalizer(@state))
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.finalizer(state)
|
|
27
|
+
proc do
|
|
28
|
+
next if state[:closed]
|
|
29
|
+
next if state[:handle].null?
|
|
30
|
+
|
|
31
|
+
Raw.FPDFDOC_ExitFormFillEnvironment(state[:handle])
|
|
32
|
+
state[:closed] = true
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def handle
|
|
37
|
+
@state[:handle]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def close
|
|
41
|
+
return if @state[:closed]
|
|
42
|
+
|
|
43
|
+
Raw.FPDFDOC_ExitFormFillEnvironment(@state[:handle]) unless @state[:handle].null?
|
|
44
|
+
@state[:handle] = FFI::Pointer::NULL
|
|
45
|
+
@info = nil
|
|
46
|
+
@state[:closed] = true
|
|
47
|
+
ObjectSpace.undefine_finalizer(self)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Wrapper per un widget di form. Si costruisce a partire da
|
|
52
|
+
# un'annotazione di tipo :widget e l'env del documento.
|
|
53
|
+
class Field
|
|
54
|
+
TYPES = {
|
|
55
|
+
Raw::FPDF_FORMFIELD_UNKNOWN => :unknown,
|
|
56
|
+
Raw::FPDF_FORMFIELD_PUSHBUTTON => :pushbutton,
|
|
57
|
+
Raw::FPDF_FORMFIELD_CHECKBOX => :checkbox,
|
|
58
|
+
Raw::FPDF_FORMFIELD_RADIOBUTTON => :radiobutton,
|
|
59
|
+
Raw::FPDF_FORMFIELD_COMBOBOX => :combobox,
|
|
60
|
+
Raw::FPDF_FORMFIELD_LISTBOX => :listbox,
|
|
61
|
+
Raw::FPDF_FORMFIELD_TEXTFIELD => :textfield,
|
|
62
|
+
Raw::FPDF_FORMFIELD_SIGNATURE => :signature
|
|
63
|
+
}.freeze
|
|
64
|
+
|
|
65
|
+
attr_reader :env, :annotation
|
|
66
|
+
|
|
67
|
+
def initialize(env, annotation)
|
|
68
|
+
@env = env
|
|
69
|
+
@annotation = annotation
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def type
|
|
73
|
+
TYPES[Raw.FPDFAnnot_GetFormFieldType(@env.handle, @annotation.handle)] || :unknown
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def name
|
|
77
|
+
Raw.read_utf16_string(:FPDFAnnot_GetFormFieldName, @env.handle, @annotation.handle)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def value
|
|
81
|
+
Raw.read_utf16_string(:FPDFAnnot_GetFormFieldValue, @env.handle, @annotation.handle)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def flags
|
|
85
|
+
Raw.FPDFAnnot_GetFormFieldFlags(@env.handle, @annotation.handle)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# PDF spec §12.7.4.1: bit 1=read-only, bit 2=required, bit 3=no-export
|
|
89
|
+
def readonly?; (flags & (1 << 0)).positive?; end
|
|
90
|
+
def required?; (flags & (1 << 1)).positive?; end
|
|
91
|
+
|
|
92
|
+
# Per checkbox e radio
|
|
93
|
+
def checked?
|
|
94
|
+
return false unless %i[checkbox radiobutton].include?(type)
|
|
95
|
+
|
|
96
|
+
Raw.FPDFAnnot_IsChecked(@env.handle, @annotation.handle) == 1
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Per combobox/listbox
|
|
100
|
+
def options
|
|
101
|
+
n = Raw.FPDFAnnot_GetOptionCount(@env.handle, @annotation.handle)
|
|
102
|
+
return [] if n <= 0
|
|
103
|
+
|
|
104
|
+
Array.new(n) do |i|
|
|
105
|
+
Raw.read_utf16_string(:FPDFAnnot_GetOptionLabel,
|
|
106
|
+
@env.handle, @annotation.handle, i)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def to_h
|
|
111
|
+
{
|
|
112
|
+
name: name, type: type, value: value,
|
|
113
|
+
readonly: readonly?, required: required?,
|
|
114
|
+
checked: (%i[checkbox radiobutton].include?(type) ? checked? : nil),
|
|
115
|
+
options: (%i[combobox listbox].include?(type) ? options : nil),
|
|
116
|
+
bbox: @annotation.bbox
|
|
117
|
+
}.compact
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rpdfium
|
|
4
|
+
module Image
|
|
5
|
+
# Wrapper per un image object inserito in una pagina. Permette di:
|
|
6
|
+
# - leggere metadata (dimensione pixel, DPI, colorspace, BPP)
|
|
7
|
+
# - ottenere bytes raw (così come stoccati: tipicamente JPEG)
|
|
8
|
+
# - ottenere bytes decoded (raster post-filtri)
|
|
9
|
+
# - ottenere bitmap renderizzato (con maschere e matrice applicate)
|
|
10
|
+
class Embedded
|
|
11
|
+
COLORSPACES = {
|
|
12
|
+
0 => :unknown, 1 => :devicegray, 2 => :devicergb, 3 => :devicecmyk,
|
|
13
|
+
4 => :calgray, 5 => :calrgb, 6 => :lab, 7 => :iccbased,
|
|
14
|
+
8 => :separation, 9 => :devicen, 10 => :indexed, 11 => :pattern
|
|
15
|
+
}.freeze
|
|
16
|
+
|
|
17
|
+
attr_reader :page, :handle
|
|
18
|
+
|
|
19
|
+
def initialize(page, page_object_handle)
|
|
20
|
+
@page = page
|
|
21
|
+
@handle = page_object_handle
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def metadata
|
|
25
|
+
meta = Raw::FPDF_IMAGEOBJ_METADATA.new
|
|
26
|
+
return nil if Raw.FPDFImageObj_GetImageMetadata(@handle, @page.handle, meta) == 0
|
|
27
|
+
|
|
28
|
+
{
|
|
29
|
+
width: meta[:width],
|
|
30
|
+
height: meta[:height],
|
|
31
|
+
horizontal_dpi: meta[:horizontal_dpi],
|
|
32
|
+
vertical_dpi: meta[:vertical_dpi],
|
|
33
|
+
bits_per_pixel: meta[:bits_per_pixel],
|
|
34
|
+
colorspace: COLORSPACES[meta[:colorspace]] || :unknown
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def pixel_size
|
|
39
|
+
wbuf = FFI::MemoryPointer.new(:uint)
|
|
40
|
+
hbuf = FFI::MemoryPointer.new(:uint)
|
|
41
|
+
return nil if Raw.FPDFImageObj_GetImagePixelSize(@handle, wbuf, hbuf) == 0
|
|
42
|
+
|
|
43
|
+
[wbuf.read_uint, hbuf.read_uint]
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def bbox
|
|
47
|
+
l = FFI::MemoryPointer.new(:float)
|
|
48
|
+
r = FFI::MemoryPointer.new(:float)
|
|
49
|
+
b = FFI::MemoryPointer.new(:float)
|
|
50
|
+
t = FFI::MemoryPointer.new(:float)
|
|
51
|
+
return nil if Raw.FPDFPageObj_GetBounds(@handle, l, r, b, t) == 0
|
|
52
|
+
|
|
53
|
+
h = @page.height
|
|
54
|
+
{ x0: l.read_float, x1: r.read_float,
|
|
55
|
+
top: h - t.read_float, bottom: h - b.read_float }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Filtri applicati nell'ordine PDF: es. ["DCTDecode"] → JPEG,
|
|
59
|
+
# ["FlateDecode"] → zlib, ["DCTDecode","DCTDecode"] → ricodifiche.
|
|
60
|
+
def filters
|
|
61
|
+
n = Raw.FPDFImageObj_GetImageFilterCount(@handle)
|
|
62
|
+
Array.new(n) do |i|
|
|
63
|
+
# Probe + read
|
|
64
|
+
len = Raw.FPDFImageObj_GetImageFilter(@handle, i, FFI::Pointer::NULL, 0)
|
|
65
|
+
if len > 1
|
|
66
|
+
buf = FFI::MemoryPointer.new(:uchar, len)
|
|
67
|
+
Raw.FPDFImageObj_GetImageFilter(@handle, i, buf, len)
|
|
68
|
+
buf.read_bytes(len - 1).force_encoding("UTF-8")
|
|
69
|
+
else
|
|
70
|
+
""
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Bytes "raw": come sono stoccati nel PDF. Se filters == ["DCTDecode"]
|
|
76
|
+
# questi bytes sono un JPEG completo che puoi salvare con estensione .jpg.
|
|
77
|
+
def raw_bytes
|
|
78
|
+
len = Raw.FPDFImageObj_GetImageDataRaw(@handle, FFI::Pointer::NULL, 0)
|
|
79
|
+
return "" if len.zero?
|
|
80
|
+
|
|
81
|
+
buf = FFI::MemoryPointer.new(:uchar, len)
|
|
82
|
+
Raw.FPDFImageObj_GetImageDataRaw(@handle, buf, len)
|
|
83
|
+
buf.read_bytes(len)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Bytes decoded: pixel raster dopo l'applicazione dei filtri.
|
|
87
|
+
# Layout dipende dal colorspace.
|
|
88
|
+
def decoded_bytes
|
|
89
|
+
len = Raw.FPDFImageObj_GetImageDataDecoded(@handle, FFI::Pointer::NULL, 0)
|
|
90
|
+
return "" if len.zero?
|
|
91
|
+
|
|
92
|
+
buf = FFI::MemoryPointer.new(:uchar, len)
|
|
93
|
+
Raw.FPDFImageObj_GetImageDataDecoded(@handle, buf, len)
|
|
94
|
+
buf.read_bytes(len)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Bitmap renderizzato applicando matrice e maschere. Ritorna [w, h, bytes(BGRA)].
|
|
98
|
+
def render_bitmap
|
|
99
|
+
bitmap = Raw.FPDFImageObj_GetRenderedBitmap(
|
|
100
|
+
@page.document.handle, @page.handle, @handle
|
|
101
|
+
)
|
|
102
|
+
return nil if bitmap.null?
|
|
103
|
+
|
|
104
|
+
begin
|
|
105
|
+
w = Raw.FPDFBitmap_GetWidth(bitmap)
|
|
106
|
+
h = Raw.FPDFBitmap_GetHeight(bitmap)
|
|
107
|
+
stride = Raw.FPDFBitmap_GetStride(bitmap)
|
|
108
|
+
buf = Raw.FPDFBitmap_GetBuffer(bitmap)
|
|
109
|
+
[w, h, buf.read_bytes(stride * h), stride]
|
|
110
|
+
ensure
|
|
111
|
+
Raw.FPDFBitmap_Destroy(bitmap)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Salva il file. Se i filtri sono DCTDecode → scrive .jpg diretto.
|
|
116
|
+
# Altrimenti renderizza il bitmap a PNG.
|
|
117
|
+
def save(path)
|
|
118
|
+
if filters == ["DCTDecode"]
|
|
119
|
+
File.binwrite(path, raw_bytes)
|
|
120
|
+
else
|
|
121
|
+
w, h, bytes, stride = render_bitmap
|
|
122
|
+
# I bitmap resi sono BGRA: convertiamo a RGBA per il PNG writer
|
|
123
|
+
rgba = swap_bgra_to_rgba(bytes, w, h, stride)
|
|
124
|
+
Rpdfium::IO::PNG.write(path, w, h, rgba, stride: w * 4)
|
|
125
|
+
end
|
|
126
|
+
path
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
private
|
|
130
|
+
|
|
131
|
+
def swap_bgra_to_rgba(bgra, w, h, stride)
|
|
132
|
+
out = String.new(capacity: w * h * 4, encoding: Encoding::ASCII_8BIT)
|
|
133
|
+
h.times do |y|
|
|
134
|
+
row = bgra.byteslice(y * stride, w * 4)
|
|
135
|
+
# Scambia B<->R per ogni pixel
|
|
136
|
+
(0...row.bytesize).step(4) do |i|
|
|
137
|
+
out << row.getbyte(i + 2) << row.getbyte(i + 1) <<
|
|
138
|
+
row.getbyte(i) << row.getbyte(i + 3)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
out
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|