acro_that 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +8 -0
- data/.rubocop.yml +78 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +86 -0
- data/README.md +360 -0
- data/Rakefile +18 -0
- data/acro_that.gemspec +34 -0
- data/docs/README.md +99 -0
- data/docs/dict_scan_explained.md +341 -0
- data/docs/object_streams.md +311 -0
- data/docs/pdf_structure.md +251 -0
- data/lib/acro_that/actions/add_field.rb +278 -0
- data/lib/acro_that/actions/add_signature_appearance.rb +422 -0
- data/lib/acro_that/actions/base.rb +44 -0
- data/lib/acro_that/actions/remove_field.rb +158 -0
- data/lib/acro_that/actions/update_field.rb +301 -0
- data/lib/acro_that/dict_scan.rb +413 -0
- data/lib/acro_that/document.rb +331 -0
- data/lib/acro_that/field.rb +143 -0
- data/lib/acro_that/incremental_writer.rb +244 -0
- data/lib/acro_that/object_resolver.rb +376 -0
- data/lib/acro_that/objstm.rb +75 -0
- data/lib/acro_that/pdf_writer.rb +97 -0
- data/lib/acro_that/version.rb +5 -0
- data/lib/acro_that.rb +24 -0
- metadata +143 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AcroThat
|
|
4
|
+
module Actions
|
|
5
|
+
# Action to update a field's value and optionally rename it in a PDF document
|
|
6
|
+
class UpdateField
|
|
7
|
+
include Base
|
|
8
|
+
|
|
9
|
+
def initialize(document, name, new_value, new_name: nil)
|
|
10
|
+
@document = document
|
|
11
|
+
@name = name
|
|
12
|
+
@new_value = new_value
|
|
13
|
+
@new_name = new_name
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def call
|
|
17
|
+
# First try to find in list_fields (already written fields)
|
|
18
|
+
fld = @document.list_fields.find { |f| f.name == @name }
|
|
19
|
+
|
|
20
|
+
# If not found, check if field was just added (in patches) and create a Field object for it
|
|
21
|
+
unless fld
|
|
22
|
+
patches = @document.instance_variable_get(:@patches)
|
|
23
|
+
field_patch = patches.find do |p|
|
|
24
|
+
next unless p[:body]
|
|
25
|
+
next unless p[:body].include?("/T")
|
|
26
|
+
|
|
27
|
+
t_tok = DictScan.value_token_after("/T", p[:body])
|
|
28
|
+
next unless t_tok
|
|
29
|
+
|
|
30
|
+
field_name = DictScan.decode_pdf_string(t_tok)
|
|
31
|
+
field_name == @name
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
if field_patch && field_patch[:body].include?("/FT")
|
|
35
|
+
ft_tok = DictScan.value_token_after("/FT", field_patch[:body])
|
|
36
|
+
if ft_tok
|
|
37
|
+
# Create a temporary Field object for newly added field
|
|
38
|
+
position = {}
|
|
39
|
+
fld = Field.new(@name, nil, ft_tok, field_patch[:ref], @document, position)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
return false unless fld
|
|
45
|
+
|
|
46
|
+
# Check if this is a signature field and if new_value looks like image data
|
|
47
|
+
if fld.signature_field?
|
|
48
|
+
# Check if new_value looks like base64 image data or data URI
|
|
49
|
+
image_data = @new_value
|
|
50
|
+
if image_data && image_data.is_a?(String) && (image_data.start_with?("data:image/") || (image_data.length > 50 && image_data.match?(%r{^[A-Za-z0-9+/]*={0,2}$})))
|
|
51
|
+
# Try adding signature appearance
|
|
52
|
+
action = Actions::AddSignatureAppearance.new(@document, fld.ref, image_data)
|
|
53
|
+
result = action.call
|
|
54
|
+
return result if result
|
|
55
|
+
# If appearance fails, fall through to normal update
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
original = get_object_body_with_patch(fld.ref)
|
|
60
|
+
return false unless original
|
|
61
|
+
|
|
62
|
+
# Determine if this is a widget annotation or field object
|
|
63
|
+
is_widget = original.include?("/Subtype /Widget")
|
|
64
|
+
field_ref = fld.ref # Default: the ref we found is the field
|
|
65
|
+
|
|
66
|
+
# If this is a widget, we need to also update the parent field object (if it exists)
|
|
67
|
+
# Otherwise, this widget IS the field (flat structure)
|
|
68
|
+
if is_widget
|
|
69
|
+
parent_tok = DictScan.value_token_after("/Parent", original)
|
|
70
|
+
if parent_tok && parent_tok =~ /\A(\d+)\s+(\d+)\s+R/
|
|
71
|
+
field_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
|
|
72
|
+
field_body = get_object_body_with_patch(field_ref)
|
|
73
|
+
if field_body && !field_body.include?("/Subtype /Widget")
|
|
74
|
+
new_field_body = patch_field_value_body(field_body, @new_value)
|
|
75
|
+
|
|
76
|
+
# Check if multiline and remove appearance stream from parent field too
|
|
77
|
+
is_multiline = DictScan.is_multiline_field?(field_body) || DictScan.is_multiline_field?(new_field_body)
|
|
78
|
+
if is_multiline
|
|
79
|
+
new_field_body = DictScan.remove_appearance_stream(new_field_body)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
if new_field_body && new_field_body.include?("<<") && new_field_body.include?(">>")
|
|
83
|
+
apply_patch(field_ref, new_field_body, field_body)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Update the object we found (widget or field) - always update what we found
|
|
90
|
+
new_body = patch_field_value_body(original, @new_value)
|
|
91
|
+
|
|
92
|
+
# Check if this is a multiline field - if so, remove appearance stream
|
|
93
|
+
# macOS Preview needs appearance streams to be regenerated for multiline fields
|
|
94
|
+
is_multiline = check_if_multiline_field(field_ref)
|
|
95
|
+
if is_multiline
|
|
96
|
+
new_body = DictScan.remove_appearance_stream(new_body)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Update field name (/T) if requested
|
|
100
|
+
if @new_name && !@new_name.empty?
|
|
101
|
+
new_body = patch_field_name_body(new_body, @new_name)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Validate the patched body is valid before adding to patches
|
|
105
|
+
unless new_body && new_body.include?("<<") && new_body.include?(">>")
|
|
106
|
+
warn "Warning: Invalid patched body for #{fld.ref.inspect}, skipping update"
|
|
107
|
+
return false
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
apply_patch(fld.ref, new_body, original)
|
|
111
|
+
|
|
112
|
+
# If we renamed the field, also update the parent field object and all widgets
|
|
113
|
+
if @new_name && !@new_name.empty?
|
|
114
|
+
# Update parent field object if it exists (separate from widget)
|
|
115
|
+
if field_ref != fld.ref
|
|
116
|
+
field_body = get_object_body_with_patch(field_ref)
|
|
117
|
+
if field_body && !field_body.include?("/Subtype /Widget")
|
|
118
|
+
new_field_body = patch_field_name_body(field_body, @new_name)
|
|
119
|
+
if new_field_body && new_field_body.include?("<<") && new_field_body.include?(">>")
|
|
120
|
+
apply_patch(field_ref, new_field_body, field_body)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Update all widget annotations that reference this field
|
|
126
|
+
update_widget_names_for_field(field_ref, @new_name)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Also update any widget annotations that reference this field via /Parent
|
|
130
|
+
update_widget_annotations_for_field(field_ref, @new_value)
|
|
131
|
+
|
|
132
|
+
# Best-effort: set NeedAppearances to true so viewers regenerate appearances
|
|
133
|
+
ensure_need_appearances
|
|
134
|
+
|
|
135
|
+
true
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
private
|
|
139
|
+
|
|
140
|
+
def patch_field_value_body(dict_body, new_value)
|
|
141
|
+
# Simple, reliable approach: Use DictScan methods that preserve structure
|
|
142
|
+
# Don't manipulate the dictionary body - let DictScan handle it
|
|
143
|
+
|
|
144
|
+
# Ensure we have a valid dictionary
|
|
145
|
+
return dict_body unless dict_body&.include?("<<")
|
|
146
|
+
|
|
147
|
+
# Encode the new value
|
|
148
|
+
v_token = DictScan.encode_pdf_string(new_value)
|
|
149
|
+
|
|
150
|
+
# Find /V using pattern matching to ensure we get the complete key
|
|
151
|
+
v_key_pattern = %r{/V(?=[\s(<\[/])}
|
|
152
|
+
has_v = dict_body.match(v_key_pattern)
|
|
153
|
+
|
|
154
|
+
# Update /V - use replace_key_value which handles the replacement carefully
|
|
155
|
+
patched = if has_v
|
|
156
|
+
DictScan.replace_key_value(dict_body, "/V", v_token)
|
|
157
|
+
else
|
|
158
|
+
DictScan.upsert_key_value(dict_body, "/V", v_token)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Verify replacement worked and dictionary is still valid
|
|
162
|
+
unless patched && patched.include?("<<") && patched.include?(">>")
|
|
163
|
+
warn "Warning: Dictionary corrupted after /V replacement"
|
|
164
|
+
return dict_body # Return original if corrupted
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Update /AS for checkboxes/radio buttons if needed
|
|
168
|
+
# Check for /FT /Btn more carefully
|
|
169
|
+
ft_pattern = %r{/FT\s+/Btn}
|
|
170
|
+
if ft_pattern.match(patched) && (as_needed = DictScan.appearance_choice_for(new_value, patched))
|
|
171
|
+
as_pattern = %r{/AS(?=[\s(<\[/])}
|
|
172
|
+
has_as = patched.match(as_pattern)
|
|
173
|
+
|
|
174
|
+
patched = if has_as
|
|
175
|
+
DictScan.replace_key_value(patched, "/AS", as_needed)
|
|
176
|
+
else
|
|
177
|
+
DictScan.upsert_key_value(patched, "/AS", as_needed)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Verify /AS replacement worked
|
|
181
|
+
unless patched && patched.include?("<<") && patched.include?(">>")
|
|
182
|
+
warn "Warning: Dictionary corrupted after /AS replacement"
|
|
183
|
+
# Revert to before /AS change
|
|
184
|
+
return DictScan.replace_key_value(dict_body, "/V", v_token) if has_v
|
|
185
|
+
|
|
186
|
+
return dict_body
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
patched
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def patch_field_name_body(dict_body, new_name)
|
|
194
|
+
# Ensure we have a valid dictionary
|
|
195
|
+
return dict_body unless dict_body&.include?("<<")
|
|
196
|
+
|
|
197
|
+
# Encode the new name
|
|
198
|
+
t_token = DictScan.encode_pdf_string(new_name)
|
|
199
|
+
|
|
200
|
+
# Find /T using pattern matching
|
|
201
|
+
t_key_pattern = %r{/T(?=[\s(<\[/])}
|
|
202
|
+
has_t = dict_body.match(t_key_pattern)
|
|
203
|
+
|
|
204
|
+
# Update /T - use replace_key_value which handles the replacement carefully
|
|
205
|
+
patched = if has_t
|
|
206
|
+
DictScan.replace_key_value(dict_body, "/T", t_token)
|
|
207
|
+
else
|
|
208
|
+
DictScan.upsert_key_value(dict_body, "/T", t_token)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Verify replacement worked and dictionary is still valid
|
|
212
|
+
unless patched && patched.include?("<<") && patched.include?(">>")
|
|
213
|
+
warn "Warning: Dictionary corrupted after /T replacement"
|
|
214
|
+
return dict_body # Return original if corrupted
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
patched
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def update_widget_annotations_for_field(field_ref, new_value)
|
|
221
|
+
# Check if the field is multiline by looking at the field object
|
|
222
|
+
field_body = get_object_body_with_patch(field_ref)
|
|
223
|
+
is_multiline = field_body && DictScan.is_multiline_field?(field_body)
|
|
224
|
+
|
|
225
|
+
resolver.each_object do |ref, body|
|
|
226
|
+
next unless body
|
|
227
|
+
next unless DictScan.is_widget?(body)
|
|
228
|
+
next unless body.include?("/Parent")
|
|
229
|
+
|
|
230
|
+
body = get_object_body_with_patch(ref)
|
|
231
|
+
|
|
232
|
+
parent_tok = DictScan.value_token_after("/Parent", body)
|
|
233
|
+
next unless parent_tok && parent_tok =~ /\A(\d+)\s+(\d+)\s+R/
|
|
234
|
+
|
|
235
|
+
widget_parent_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
|
|
236
|
+
next unless widget_parent_ref == field_ref
|
|
237
|
+
|
|
238
|
+
widget_body_patched = patch_field_value_body(body, new_value)
|
|
239
|
+
|
|
240
|
+
# For multiline fields, remove appearance stream from widgets too
|
|
241
|
+
if is_multiline
|
|
242
|
+
widget_body_patched = DictScan.remove_appearance_stream(widget_body_patched)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
apply_patch(ref, widget_body_patched, body)
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def update_widget_names_for_field(field_ref, new_name)
|
|
250
|
+
resolver.each_object do |ref, body|
|
|
251
|
+
next unless body
|
|
252
|
+
next unless DictScan.is_widget?(body)
|
|
253
|
+
|
|
254
|
+
body = get_object_body_with_patch(ref)
|
|
255
|
+
|
|
256
|
+
# Match widgets by /Parent reference
|
|
257
|
+
if body.include?("/Parent")
|
|
258
|
+
parent_tok = DictScan.value_token_after("/Parent", body)
|
|
259
|
+
if parent_tok && parent_tok =~ /\A(\d+)\s+(\d+)\s+R/
|
|
260
|
+
widget_parent_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
|
|
261
|
+
if widget_parent_ref == field_ref
|
|
262
|
+
widget_body_patched = patch_field_name_body(body, new_name)
|
|
263
|
+
apply_patch(ref, widget_body_patched, body)
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Also match widgets by field name (/T) - some widgets might not have /Parent
|
|
269
|
+
next unless body.include?("/T")
|
|
270
|
+
|
|
271
|
+
t_tok = DictScan.value_token_after("/T", body)
|
|
272
|
+
next unless t_tok
|
|
273
|
+
|
|
274
|
+
widget_name = DictScan.decode_pdf_string(t_tok)
|
|
275
|
+
if widget_name && widget_name == @name
|
|
276
|
+
widget_body_patched = patch_field_name_body(body, new_name)
|
|
277
|
+
apply_patch(ref, widget_body_patched, body)
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def ensure_need_appearances
|
|
283
|
+
af_ref = acroform_ref
|
|
284
|
+
return unless af_ref
|
|
285
|
+
|
|
286
|
+
acro_body = get_object_body_with_patch(af_ref)
|
|
287
|
+
return if acro_body.include?("/NeedAppearances")
|
|
288
|
+
|
|
289
|
+
acro_patched = DictScan.upsert_key_value(acro_body, "/NeedAppearances", "true")
|
|
290
|
+
apply_patch(af_ref, acro_patched, acro_body)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def check_if_multiline_field(field_ref)
|
|
294
|
+
field_body = get_object_body_with_patch(field_ref)
|
|
295
|
+
return false unless field_body
|
|
296
|
+
|
|
297
|
+
DictScan.is_multiline_field?(field_body)
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
end
|