acro_that 0.1.8 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,9 +48,8 @@ module AcroThat
48
48
  # Check if new_value looks like base64 image data or data URI
49
49
  image_data = @new_value
50
50
  if image_data && image_data.is_a?(String) && (image_data.start_with?("data:image/") || (image_data.length > 50 && image_data.match?(%r{^[A-Za-z0-9+/]*={0,2}$})))
51
- # Try adding signature appearance
52
- action = Actions::AddSignatureAppearance.new(@document, fld.ref, image_data)
53
- result = action.call
51
+ # Try adding signature appearance using Signature field class
52
+ result = AcroThat::Fields::Signature.add_appearance(@document, fld.ref, image_data)
54
53
  return result if result
55
54
  # If appearance fails, fall through to normal update
56
55
  end
@@ -143,17 +142,18 @@ module AcroThat
143
142
  end
144
143
  end
145
144
 
146
- # Only create checkbox appearances (not radio buttons)
147
- unless is_radio
145
+ if is_radio
146
+ # For radio buttons, update all widget appearances (overwrite existing)
147
+ update_radio_button_appearances(field_ref)
148
+ else
149
+ # For checkboxes, create/update appearance
148
150
  widget_ref = find_checkbox_widget(fld.ref)
149
151
  if widget_ref
150
152
  widget_body = get_object_body_with_patch(widget_ref)
151
- # Create appearances if /AP doesn't exist
152
- unless widget_body&.include?("/AP")
153
- rect = extract_widget_rect(widget_body)
154
- if rect && rect[:width].positive? && rect[:height].positive?
155
- add_checkbox_appearance(widget_ref, rect[:width], rect[:height])
156
- end
153
+ # Create appearances if /AP doesn't exist, or overwrite if it does
154
+ rect = extract_widget_rect(widget_body)
155
+ if rect && rect[:width].positive? && rect[:height].positive?
156
+ add_checkbox_appearance(widget_ref, rect[:width], rect[:height])
157
157
  end
158
158
  end
159
159
  end
@@ -178,8 +178,34 @@ module AcroThat
178
178
  ft_pattern = %r{/FT\s+/Btn}
179
179
  is_button_field = ft_pattern.match(dict_body)
180
180
 
181
- normalized_value = if is_button_field
182
- # For checkboxes/radio buttons, normalize to "Yes" or "Off"
181
+ # Check if it's a radio button by checking field flags
182
+ # For widgets, check the parent field's flags since widgets don't have /Ff directly
183
+ is_radio = false
184
+ if is_button_field
185
+ field_flags_match = dict_body.match(%r{/Ff\s+(\d+)})
186
+ if field_flags_match
187
+ field_flags = field_flags_match[1].to_i
188
+ # Radio button flag is bit 15 = 32768
189
+ is_radio = field_flags.anybits?(32_768)
190
+ elsif dict_body.include?("/Parent")
191
+ # This is a widget - check parent field's flags
192
+ parent_tok = DictScan.value_token_after("/Parent", dict_body)
193
+ if parent_tok && parent_tok =~ /\A(\d+)\s+(\d+)\s+R/
194
+ parent_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
195
+ parent_body = get_object_body_with_patch(parent_ref)
196
+ if parent_body
197
+ parent_flags_match = parent_body.match(%r{/Ff\s+(\d+)})
198
+ if parent_flags_match
199
+ parent_flags = parent_flags_match[1].to_i
200
+ is_radio = parent_flags.anybits?(32_768)
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end
206
+
207
+ normalized_value = if is_button_field && !is_radio
208
+ # For checkboxes, normalize to "Yes" or "Off"
183
209
  # Accept "Yes", "/Yes" (PDF name format), true (boolean), or "true" (string)
184
210
  value_str = new_value.to_s
185
211
  is_checked = ["Yes", "/Yes", "true"].include?(value_str) || new_value == true
@@ -189,7 +215,13 @@ module AcroThat
189
215
  end
190
216
 
191
217
  # Encode the normalized value
192
- v_token = DictScan.encode_pdf_string(normalized_value)
218
+ # For checkboxes, use PDF name format to match /AS appearance state format
219
+ # For radio buttons and other fields, use PDF string format
220
+ v_token = if is_button_field && !is_radio
221
+ DictScan.encode_pdf_name(normalized_value)
222
+ else
223
+ DictScan.encode_pdf_string(normalized_value)
224
+ end
193
225
 
194
226
  # Find /V using pattern matching to ensure we get the complete key
195
227
  v_key_pattern = %r{/V(?=[\s(<\[/])}
@@ -338,9 +370,14 @@ module AcroThat
338
370
  return unless af_ref
339
371
 
340
372
  acro_body = get_object_body_with_patch(af_ref)
341
- return if acro_body.include?("/NeedAppearances")
342
-
343
- acro_patched = DictScan.upsert_key_value(acro_body, "/NeedAppearances", "true")
373
+ # Set /NeedAppearances false to use our custom appearance streams
374
+ # If we set it to true, viewers will ignore our custom appearances and generate defaults
375
+ # (e.g., circular radio buttons instead of our square checkboxes)
376
+ acro_patched = if acro_body.include?("/NeedAppearances")
377
+ DictScan.replace_key_value(acro_body, "/NeedAppearances", "false")
378
+ else
379
+ DictScan.upsert_key_value(acro_body, "/NeedAppearances", "false")
380
+ end
344
381
  apply_patch(af_ref, acro_patched, acro_body)
345
382
  end
346
383
 
@@ -396,6 +433,76 @@ module AcroThat
396
433
  nil
397
434
  end
398
435
 
436
+ def update_radio_button_appearances(parent_ref)
437
+ # Find all widgets that are children of this parent field
438
+ widgets = []
439
+
440
+ # Check patches first
441
+ patches = @document.instance_variable_get(:@patches)
442
+ patches.each do |patch|
443
+ next unless patch[:body]
444
+ next unless DictScan.is_widget?(patch[:body])
445
+
446
+ next unless patch[:body] =~ %r{/Parent\s+(\d+)\s+(\d+)\s+R}
447
+
448
+ widget_parent_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
449
+ if widget_parent_ref == parent_ref
450
+ widgets << patch[:ref]
451
+ end
452
+ end
453
+
454
+ # Also check resolver (for existing widgets)
455
+ resolver.each_object do |ref, body|
456
+ next unless body && DictScan.is_widget?(body)
457
+
458
+ next unless body =~ %r{/Parent\s+(\d+)\s+(\d+)\s+R}
459
+
460
+ widget_parent_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
461
+ if (widget_parent_ref == parent_ref) && !widgets.include?(ref)
462
+ widgets << ref
463
+ end
464
+ end
465
+
466
+ # Update appearance for each widget using Radio class method
467
+ widgets.each do |widget_ref|
468
+ widget_body = get_object_body_with_patch(widget_ref)
469
+ next unless widget_body
470
+
471
+ # Get widget dimensions
472
+ rect = extract_widget_rect(widget_body)
473
+ next unless rect && rect[:width].positive? && rect[:height].positive?
474
+
475
+ # Get export value from widget's /AP /N dictionary
476
+ export_value = nil
477
+ if widget_body.include?("/AP")
478
+ ap_tok = DictScan.value_token_after("/AP", widget_body)
479
+ if ap_tok && ap_tok.start_with?("<<")
480
+ n_tok = DictScan.value_token_after("/N", ap_tok)
481
+ if n_tok && n_tok.start_with?("<<")
482
+ # Extract export value (not /Off)
483
+ export_values = n_tok.scan(%r{/([^\s<>\[\]]+)\s+\d+\s+\d+\s+R}).flatten.reject { |v| v == "Off" }
484
+ export_value = export_values.first if export_values.any?
485
+ end
486
+ end
487
+ end
488
+
489
+ # If no export value found, generate one
490
+ export_value ||= "widget_#{widget_ref[0]}"
491
+
492
+ # Create a Radio instance to reuse appearance creation logic
493
+ radio_handler = AcroThat::Fields::Radio.new(@document, "", { width: rect[:width], height: rect[:height] })
494
+ radio_handler.send(
495
+ :add_radio_button_appearance,
496
+ widget_ref[0],
497
+ export_value,
498
+ 0, 0, # x, y not needed when overwriting
499
+ rect[:width],
500
+ rect[:height],
501
+ parent_ref
502
+ )
503
+ end
504
+ end
505
+
399
506
  def extract_widget_rect(widget_body)
400
507
  return nil unless widget_body
401
508
 
@@ -470,34 +577,57 @@ module AcroThat
470
577
  end
471
578
 
472
579
  def create_checkbox_yes_appearance(width, height)
473
- # Create a form XObject that draws a checked checkbox
474
- # Box outline + checkmark
475
- # Scale to match width and height
476
- # Simple appearance: draw a box and a checkmark
477
- # For simplicity, use PDF drawing operators
478
- # Box: rectangle from (0,0) to (width, height)
479
- # Checkmark: simple path drawing
480
-
481
- # PDF content stream for checked checkbox
482
- # Draw just the checkmark (no box border)
580
+ line_width = [width * 0.05, height * 0.05].min
483
581
  border_width = [width * 0.08, height * 0.08].min
484
582
 
485
- # Calculate checkmark path
486
- check_x1 = width * 0.25
487
- check_y1 = height * 0.45
488
- check_x2 = width * 0.45
489
- check_y2 = height * 0.25
490
- check_x3 = width * 0.75
491
- check_y3 = height * 0.75
583
+ # Define checkmark in normalized coordinates (0-1 range) for consistent aspect ratio
584
+ # Checkmark shape: three points forming a checkmark
585
+ norm_x1 = 0.25
586
+ norm_y1 = 0.55
587
+ norm_x2 = 0.45
588
+ norm_y2 = 0.35
589
+ norm_x3 = 0.75
590
+ norm_y3 = 0.85
591
+
592
+ # Calculate scale to maximize size while maintaining aspect ratio
593
+ # Use the smaller dimension to ensure it fits
594
+ scale = [width, height].min * 0.85 # Use 85% of the smaller dimension
595
+
596
+ # Calculate checkmark dimensions
597
+ check_width = scale
598
+ check_height = scale
599
+
600
+ # Center the checkmark in the box
601
+ offset_x = (width - check_width) / 2
602
+ offset_y = (height - check_height) / 2
603
+
604
+ # Calculate actual coordinates
605
+ check_x1 = offset_x + norm_x1 * check_width
606
+ check_y1 = offset_y + norm_y1 * check_height
607
+ check_x2 = offset_x + norm_x2 * check_width
608
+ check_y2 = offset_y + norm_y2 * check_height
609
+ check_x3 = offset_x + norm_x3 * check_width
610
+ check_y3 = offset_y + norm_y3 * check_height
492
611
 
493
612
  content_stream = "q\n"
494
- content_stream += "0 0 0 rg\n" # Black color (darker)
495
- content_stream += "#{border_width} w\n" # Line width
496
- # Draw checkmark only (no box border)
613
+ # Draw square border around field bounds
614
+ content_stream += "0 0 0 RG\n" # Black stroke color
615
+ content_stream += "#{line_width} w\n" # Line width
616
+ # Draw rectangle from (0,0) to (width, height)
617
+ content_stream += "0 0 m\n"
618
+ content_stream += "#{width} 0 l\n"
619
+ content_stream += "#{width} #{height} l\n"
620
+ content_stream += "0 #{height} l\n"
621
+ content_stream += "0 0 l\n"
622
+ content_stream += "S\n" # Stroke the border
623
+
624
+ # Draw checkmark
625
+ content_stream += "0 0 0 rg\n" # Black fill color
626
+ content_stream += "#{border_width} w\n" # Line width for checkmark
497
627
  content_stream += "#{check_x1} #{check_y1} m\n"
498
628
  content_stream += "#{check_x2} #{check_y2} l\n"
499
629
  content_stream += "#{check_x3} #{check_y3} l\n"
500
- content_stream += "S\n" # Stroke
630
+ content_stream += "S\n" # Stroke the checkmark
501
631
  content_stream += "Q\n"
502
632
 
503
633
  build_form_xobject(content_stream, width, height)
@@ -130,6 +130,32 @@ module AcroThat
130
130
  end
131
131
  end
132
132
 
133
+ # Encode a string as a PDF name, escaping special characters with hex encoding
134
+ # PDF names must escape: # ( ) < > [ ] { } / % and control characters
135
+ # Example: "(Two Hr) Priority 2" becomes "/#28Two Hr#29 Priority 2"
136
+ def encode_pdf_name(name)
137
+ name_str = name.to_s
138
+ # Remove leading / if present (we'll add it back)
139
+ name_str = name_str[1..] if name_str.start_with?("/")
140
+
141
+ # Encode special characters as hex
142
+ encoded = name_str.each_byte.map do |byte|
143
+ char = byte.chr
144
+ # PDF name special characters that need hex encoding: # ( ) < > [ ] { } / %
145
+ # Also encode control characters (0x00-0x1F, 0x7F) and non-ASCII (0x80-0xFF)
146
+ if ["#", "(", ")", "<", ">", "[", "]", "{", "}", "/", "%"].include?(char) ||
147
+ byte.between?(0x00, 0x1F) || byte == 0x7F || byte.between?(0x80, 0xFF)
148
+ # Hex encode: # followed by 2-digit hex
149
+ "##{byte.to_s(16).upcase.rjust(2, '0')}"
150
+ else
151
+ # Regular printable ASCII: use as-is
152
+ char
153
+ end
154
+ end.join
155
+
156
+ "/#{encoded}"
157
+ end
158
+
133
159
  # Format a metadata key as a PDF dictionary key (ensure it starts with /)
134
160
  def format_pdf_key(key)
135
161
  key_str = key.to_s
@@ -28,6 +28,8 @@ module AcroThat
28
28
  @raw = extract_pdf_from_form_data(raw_bytes).freeze
29
29
  @resolver = AcroThat::ObjectResolver.new(@raw)
30
30
  @patches = []
31
+ # Track radio button groups: group_id -> parent_field_ref
32
+ @radio_groups = {}
31
33
  end
32
34
 
33
35
  # Flatten this document to remove incremental updates
@@ -35,18 +37,27 @@ module AcroThat
35
37
  root_ref = @resolver.root_ref
36
38
  raise "Cannot flatten: no /Root found" unless root_ref
37
39
 
38
- objects = []
40
+ # First pass: collect only references (lightweight) and find max_obj_num
41
+ # This avoids loading all object bodies into memory at once
42
+ refs = []
43
+ max_obj_num = 0
39
44
  @resolver.each_object do |ref, body|
40
- objects << { ref: ref, body: body } if body
45
+ if body
46
+ refs << ref
47
+ max_obj_num = [max_obj_num, ref[0]].max
48
+ end
41
49
  end
42
50
 
43
- objects.sort_by! { |obj| obj[:ref][0] }
51
+ # Sort references by object number
52
+ refs.sort_by! { |ref| ref[0] }
44
53
 
54
+ # Second pass: write objects in sorted order, retrieving bodies on demand
45
55
  writer = PDFWriter.new
46
56
  writer.write_header
47
57
 
48
- objects.each do |obj|
49
- writer.write_object(obj[:ref], obj[:body])
58
+ refs.each do |ref|
59
+ body = @resolver.object_body(ref)
60
+ writer.write_object(ref, body) if body
50
61
  end
51
62
 
52
63
  writer.write_xref
@@ -58,7 +69,6 @@ module AcroThat
58
69
  end
59
70
 
60
71
  # Write trailer
61
- max_obj_num = objects.map { |obj| obj[:ref][0] }.max || 0
62
72
  writer.write_trailer(max_obj_num + 1, root_ref, info_ref)
63
73
 
64
74
  writer.output
@@ -381,9 +391,11 @@ module AcroThat
381
391
  all_fields = list_fields
382
392
 
383
393
  if block_given?
384
- # Use block to determine which fields to keep
394
+ # Use block to determine which fields to remove
395
+ # Block receives field object (can check field.name, field.value, etc.)
396
+ # Return true to remove the field, false to keep it
385
397
  all_fields.each do |field|
386
- fields_to_remove.add(field.name) unless yield(field.name)
398
+ fields_to_remove.add(field.name) if yield(field)
387
399
  end
388
400
  elsif keep_fields
389
401
  # Keep only specified fields
@@ -443,19 +455,28 @@ module AcroThat
443
455
  end
444
456
  end
445
457
 
446
- # Collect objects to write (excluding removed fields and widgets)
447
- objects = []
458
+ # Collect refs to write (excluding removed fields and widgets)
459
+ # Store refs only initially to avoid loading all bodies into memory at once
460
+ refs_to_keep = []
448
461
  @resolver.each_object do |ref, body|
449
462
  next if field_refs_to_remove.include?(ref)
450
463
  next if widget_refs_to_remove.include?(ref)
451
464
  next unless body
452
465
 
453
- objects << { ref: ref, body: body }
466
+ refs_to_keep << ref
467
+ end
468
+
469
+ # Build objects hash - load bodies only for objects we need to modify
470
+ # For unmodified objects, we'll load bodies on demand during writing
471
+ objects = []
472
+ refs_to_keep.each do |ref|
473
+ body = @resolver.object_body(ref)
474
+ objects << { ref: ref, body: body } if body
454
475
  end
455
476
 
456
477
  # Process AcroForm to remove field references from /Fields array
457
478
  af_ref = acroform_ref
458
- if af_ref
479
+ if af_ref && refs_to_keep.include?(af_ref)
459
480
  # Find the AcroForm object in our objects list
460
481
  af_obj = objects.find { |o| o[:ref] == af_ref }
461
482
  if af_obj