acro_that 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -129,6 +129,36 @@ module AcroThat
129
129
  # Also update any widget annotations that reference this field via /Parent
130
130
  update_widget_annotations_for_field(field_ref, @new_value)
131
131
 
132
+ # If this is a checkbox without appearance streams, create them
133
+ if fld.button_field?
134
+ # Check if it's a checkbox (not a radio button) by checking field flags
135
+ field_body = get_object_body_with_patch(field_ref)
136
+ is_radio = false
137
+ if field_body
138
+ field_flags_match = field_body.match(%r{/Ff\s+(\d+)})
139
+ if field_flags_match
140
+ field_flags = field_flags_match[1].to_i
141
+ # Radio button flag is bit 15 = 32768
142
+ is_radio = field_flags.anybits?(32_768)
143
+ end
144
+ end
145
+
146
+ # Only create checkbox appearances (not radio buttons)
147
+ unless is_radio
148
+ widget_ref = find_checkbox_widget(fld.ref)
149
+ if widget_ref
150
+ widget_body = get_object_body_with_patch(widget_ref)
151
+ # Create appearances if /AP doesn't exist
152
+ unless widget_body&.include?("/AP")
153
+ rect = extract_widget_rect(widget_body)
154
+ if rect && rect[:width].positive? && rect[:height].positive?
155
+ add_checkbox_appearance(widget_ref, rect[:width], rect[:height])
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
161
+
132
162
  # Best-effort: set NeedAppearances to true so viewers regenerate appearances
133
163
  ensure_need_appearances
134
164
 
@@ -144,8 +174,22 @@ module AcroThat
144
174
  # Ensure we have a valid dictionary
145
175
  return dict_body unless dict_body&.include?("<<")
146
176
 
147
- # Encode the new value
148
- v_token = DictScan.encode_pdf_string(new_value)
177
+ # For checkboxes (/Btn fields), normalize value to "Yes" or "Off"
178
+ ft_pattern = %r{/FT\s+/Btn}
179
+ is_button_field = ft_pattern.match(dict_body)
180
+
181
+ normalized_value = if is_button_field
182
+ # For checkboxes/radio buttons, normalize to "Yes" or "Off"
183
+ # Accept "Yes", "/Yes" (PDF name format), true (boolean), or "true" (string)
184
+ value_str = new_value.to_s
185
+ is_checked = ["Yes", "/Yes", "true"].include?(value_str) || new_value == true
186
+ is_checked ? "Yes" : "Off"
187
+ else
188
+ new_value
189
+ end
190
+
191
+ # Encode the normalized value
192
+ v_token = DictScan.encode_pdf_string(normalized_value)
149
193
 
150
194
  # Find /V using pattern matching to ensure we get the complete key
151
195
  v_key_pattern = %r{/V(?=[\s(<\[/])}
@@ -166,24 +210,34 @@ module AcroThat
166
210
 
167
211
  # Update /AS for checkboxes/radio buttons if needed
168
212
  # Check for /FT /Btn more carefully
169
- ft_pattern = %r{/FT\s+/Btn}
170
- if ft_pattern.match(patched) && (as_needed = DictScan.appearance_choice_for(new_value, patched))
171
- as_pattern = %r{/AS(?=[\s(<\[/])}
172
- has_as = patched.match(as_pattern)
173
-
174
- patched = if has_as
175
- DictScan.replace_key_value(patched, "/AS", as_needed)
176
- else
177
- DictScan.upsert_key_value(patched, "/AS", as_needed)
178
- end
179
-
180
- # Verify /AS replacement worked
181
- unless patched && patched.include?("<<") && patched.include?(">>")
182
- warn "Warning: Dictionary corrupted after /AS replacement"
183
- # Revert to before /AS change
184
- return DictScan.replace_key_value(dict_body, "/V", v_token) if has_v
185
-
186
- return dict_body
213
+ if ft_pattern.match(patched)
214
+ # For button fields, set /AS based on normalized value
215
+ as_value = if normalized_value == "Yes"
216
+ "/Yes"
217
+ else
218
+ "/Off"
219
+ end
220
+
221
+ # Only set /AS if /AP exists (appearance dictionary is present)
222
+ # If /AP doesn't exist, we can't set /AS properly
223
+ if patched.include?("/AP")
224
+ as_pattern = %r{/AS(?=[\s(<\[/])}
225
+ has_as = patched.match(as_pattern)
226
+
227
+ patched = if has_as
228
+ DictScan.replace_key_value(patched, "/AS", as_value)
229
+ else
230
+ DictScan.upsert_key_value(patched, "/AS", as_value)
231
+ end
232
+
233
+ # Verify /AS replacement worked
234
+ unless patched && patched.include?("<<") && patched.include?(">>")
235
+ warn "Warning: Dictionary corrupted after /AS replacement"
236
+ # Revert to before /AS change
237
+ return DictScan.replace_key_value(dict_body, "/V", v_token) if has_v
238
+
239
+ return dict_body
240
+ end
187
241
  end
188
242
  end
189
243
 
@@ -296,6 +350,184 @@ module AcroThat
296
350
 
297
351
  DictScan.is_multiline_field?(field_body)
298
352
  end
353
+
354
+ def find_checkbox_widget(field_ref)
355
+ # Check patches first
356
+ patches = @document.instance_variable_get(:@patches)
357
+ patches.each do |patch|
358
+ next unless patch[:body]
359
+ next unless DictScan.is_widget?(patch[:body])
360
+
361
+ # Check if widget has /Parent pointing to field_ref
362
+ if patch[:body] =~ %r{/Parent\s+(\d+)\s+(\d+)\s+R}
363
+ parent_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
364
+ return patch[:ref] if parent_ref == field_ref
365
+ end
366
+
367
+ # Also check if widget IS the field (flat structure)
368
+ if patch[:body].include?("/FT") && DictScan.value_token_after("/FT",
369
+ patch[:body]) == "/Btn" && (patch[:ref] == field_ref)
370
+ return patch[:ref]
371
+ end
372
+ end
373
+
374
+ # Then check resolver (for existing widgets)
375
+ resolver.each_object do |ref, body|
376
+ next unless body && DictScan.is_widget?(body)
377
+
378
+ # Check if widget has /Parent pointing to field_ref
379
+ if body =~ %r{/Parent\s+(\d+)\s+(\d+)\s+R}
380
+ parent_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
381
+ return ref if parent_ref == field_ref
382
+ end
383
+
384
+ # Also check if widget IS the field (flat structure)
385
+ if body.include?("/FT") && DictScan.value_token_after("/FT", body) == "/Btn" && (ref == field_ref)
386
+ return ref
387
+ end
388
+ end
389
+
390
+ # Fallback: if field_ref itself is a widget
391
+ body = get_object_body_with_patch(field_ref)
392
+ return field_ref if body && DictScan.is_widget?(body) && body.include?("/FT") && DictScan.value_token_after(
393
+ "/FT", body
394
+ ) == "/Btn"
395
+
396
+ nil
397
+ end
398
+
399
+ def extract_widget_rect(widget_body)
400
+ return nil unless widget_body
401
+
402
+ rect_tok = DictScan.value_token_after("/Rect", widget_body)
403
+ return nil unless rect_tok&.start_with?("[")
404
+
405
+ rect_values = rect_tok.scan(/[-+]?\d*\.?\d+/).map(&:to_f)
406
+ return nil unless rect_values.length == 4
407
+
408
+ x1, y1, x2, y2 = rect_values
409
+ width = (x2 - x1).abs
410
+ height = (y2 - y1).abs
411
+
412
+ return nil if width <= 0 || height <= 0
413
+
414
+ { x: x1, y: y1, width: width, height: height }
415
+ end
416
+
417
+ def add_checkbox_appearance(widget_ref, width, height)
418
+ # Create appearance form XObjects for Yes and Off states
419
+ yes_obj_num = next_fresh_object_number
420
+ off_obj_num = yes_obj_num + 1
421
+
422
+ # Create Yes appearance (checked box with checkmark)
423
+ yes_body = create_checkbox_yes_appearance(width, height)
424
+ @document.instance_variable_get(:@patches) << { ref: [yes_obj_num, 0], body: yes_body }
425
+
426
+ # Create Off appearance (empty box)
427
+ off_body = create_checkbox_off_appearance(width, height)
428
+ @document.instance_variable_get(:@patches) << { ref: [off_obj_num, 0], body: off_body }
429
+
430
+ # Get current widget body and add /AP dictionary
431
+ original_widget_body = get_object_body_with_patch(widget_ref)
432
+ widget_body = original_widget_body.dup
433
+
434
+ # Create /AP dictionary with Yes and Off appearances
435
+ ap_dict = "<<\n /N <<\n /Yes #{yes_obj_num} 0 R\n /Off #{off_obj_num} 0 R\n >>\n>>"
436
+
437
+ # Add /AP to widget
438
+ if widget_body.include?("/AP")
439
+ # Replace existing /AP
440
+ ap_key_pattern = %r{/AP(?=[\s(<\[/])}
441
+ if widget_body.match(ap_key_pattern)
442
+ widget_body = DictScan.replace_key_value(widget_body, "/AP", ap_dict)
443
+ end
444
+ else
445
+ # Insert /AP before closing >>
446
+ widget_body = DictScan.upsert_key_value(widget_body, "/AP", ap_dict)
447
+ end
448
+
449
+ # Set /AS based on the value - use the EXACT same normalization logic as widget creation
450
+ # This ensures consistency between /V and /AS
451
+ # Normalize value: "Yes" if truthy (Yes, "/Yes", true, etc.), otherwise "Off"
452
+ value_str = @new_value.to_s
453
+ is_checked = value_str == "Yes" || value_str == "/Yes" || value_str == "true" || @new_value == true
454
+ normalized_value = is_checked ? "Yes" : "Off"
455
+
456
+ # Set /AS to match normalized value (same as what was set for /V in widget creation)
457
+ as_value = if normalized_value == "Yes"
458
+ "/Yes"
459
+ else
460
+ "/Off"
461
+ end
462
+
463
+ widget_body = if widget_body.include?("/AS")
464
+ DictScan.replace_key_value(widget_body, "/AS", as_value)
465
+ else
466
+ DictScan.upsert_key_value(widget_body, "/AS", as_value)
467
+ end
468
+
469
+ apply_patch(widget_ref, widget_body, original_widget_body)
470
+ end
471
+
472
+ def create_checkbox_yes_appearance(width, height)
473
+ # Create a form XObject that draws a checked checkbox
474
+ # Box outline + checkmark
475
+ # Scale to match width and height
476
+ # Simple appearance: draw a box and a checkmark
477
+ # For simplicity, use PDF drawing operators
478
+ # Box: rectangle from (0,0) to (width, height)
479
+ # Checkmark: simple path drawing
480
+
481
+ # PDF content stream for checked checkbox
482
+ # Draw just the checkmark (no box border)
483
+ border_width = [width * 0.08, height * 0.08].min
484
+
485
+ # Calculate checkmark path
486
+ check_x1 = width * 0.25
487
+ check_y1 = height * 0.45
488
+ check_x2 = width * 0.45
489
+ check_y2 = height * 0.25
490
+ check_x3 = width * 0.75
491
+ check_y3 = height * 0.75
492
+
493
+ content_stream = "q\n"
494
+ content_stream += "0 0 0 rg\n" # Black color (darker)
495
+ content_stream += "#{border_width} w\n" # Line width
496
+ # Draw checkmark only (no box border)
497
+ content_stream += "#{check_x1} #{check_y1} m\n"
498
+ content_stream += "#{check_x2} #{check_y2} l\n"
499
+ content_stream += "#{check_x3} #{check_y3} l\n"
500
+ content_stream += "S\n" # Stroke
501
+ content_stream += "Q\n"
502
+
503
+ build_form_xobject(content_stream, width, height)
504
+ end
505
+
506
+ def create_checkbox_off_appearance(width, height)
507
+ # Create a form XObject for unchecked checkbox
508
+ # Empty appearance (no border, no checkmark) - viewer will draw default checkbox
509
+
510
+ content_stream = "q\n"
511
+ # Empty appearance for unchecked state
512
+ content_stream += "Q\n"
513
+
514
+ build_form_xobject(content_stream, width, height)
515
+ end
516
+
517
+ def build_form_xobject(content_stream, width, height)
518
+ # Build a Form XObject dictionary with the given content stream
519
+ dict = "<<\n"
520
+ dict += " /Type /XObject\n"
521
+ dict += " /Subtype /Form\n"
522
+ dict += " /BBox [0 0 #{width} #{height}]\n"
523
+ dict += " /Length #{content_stream.bytesize}\n"
524
+ dict += ">>\n"
525
+ dict += "stream\n"
526
+ dict += content_stream
527
+ dict += "\nendstream"
528
+
529
+ dict
530
+ end
299
531
  end
300
532
  end
301
533
  end
@@ -130,6 +130,42 @@ module AcroThat
130
130
  end
131
131
  end
132
132
 
133
+ # Format a metadata key as a PDF dictionary key (ensure it starts with /)
134
+ def format_pdf_key(key)
135
+ key_str = key.to_s
136
+ key_str.start_with?("/") ? key_str : "/#{key_str}"
137
+ end
138
+
139
+ # Format a metadata value appropriately for PDF
140
+ def format_pdf_value(value)
141
+ case value
142
+ when Integer, Float
143
+ value.to_s
144
+ when String
145
+ # If it looks like a PDF string (starts with parenthesis or angle bracket), use as-is
146
+ if value.start_with?("(") || value.start_with?("<") || value.start_with?("/")
147
+ value
148
+ else
149
+ # Otherwise encode as a PDF string
150
+ encode_pdf_string(value)
151
+ end
152
+ when Array
153
+ # Array format: [item1 item2 item3]
154
+ items = value.map { |v| format_pdf_value(v) }.join(" ")
155
+ "[#{items}]"
156
+ when Hash
157
+ # Dictionary format: << /Key1 value1 /Key2 value2 >>
158
+ dict = value.map do |k, v|
159
+ pdf_key = format_pdf_key(k)
160
+ pdf_val = format_pdf_value(v)
161
+ " #{pdf_key} #{pdf_val}"
162
+ end.join("\n")
163
+ "<<\n#{dict}\n>>"
164
+ else
165
+ value.to_s
166
+ end
167
+ end
168
+
133
169
  def value_token_after(key, dict_src)
134
170
  # Find key followed by delimiter (whitespace, (, <, [, /)
135
171
  # Use regex to ensure key is a complete token
@@ -335,6 +371,19 @@ module AcroThat
335
371
  ff_value.anybits?(0x1000)
336
372
  end
337
373
 
374
+ # Parse a box array (MediaBox, CropBox, ArtBox, BleedBox, TrimBox, etc.)
375
+ # Returns a hash with keys :llx, :lly, :urx, :ury, or nil if not found/invalid
376
+ def parse_box(body, box_type)
377
+ pattern = %r{/#{box_type}\s*\[(.*?)\]}
378
+ return nil unless body =~ pattern
379
+
380
+ box_values = ::Regexp.last_match(1).scan(/[-+]?\d*\.?\d+/).map(&:to_f)
381
+ return nil unless box_values.length == 4
382
+
383
+ llx, lly, urx, ury = box_values
384
+ { llx: llx, lly: lly, urx: urx, ury: ury }
385
+ end
386
+
338
387
  # Remove /AP (appearance stream) entry from a dictionary
339
388
  def remove_appearance_stream(dict_body)
340
389
  return dict_body unless dict_body&.include?("/AP")
@@ -22,7 +22,7 @@ module AcroThat
22
22
  when String then File.binread(path_or_io)
23
23
  else path_or_io.binmode
24
24
  path_or_io.read
25
- end
25
+ end.freeze
26
26
  @resolver = AcroThat::ObjectResolver.new(@raw)
27
27
  @patches = []
28
28
  end
@@ -63,8 +63,9 @@ module AcroThat
63
63
 
64
64
  # Flatten this document in-place (mutates current instance)
65
65
  def flatten!
66
- flattened_content = flatten
66
+ flattened_content = flatten.freeze
67
67
  @raw = flattened_content
68
+ @resolver.clear_cache
68
69
  @resolver = AcroThat::ObjectResolver.new(flattened_content)
69
70
  @patches = []
70
71
 
@@ -84,58 +85,19 @@ module AcroThat
84
85
  # Extract MediaBox, CropBox, or ArtBox for dimensions
85
86
  width = nil
86
87
  height = nil
87
- media_box = nil
88
- crop_box = nil
89
- art_box = nil
90
- bleed_box = nil
91
- trim_box = nil
92
-
93
- # Try MediaBox first (most common)
94
- if body =~ %r{/MediaBox\s*\[(.*?)\]}
95
- box_values = ::Regexp.last_match(1).scan(/[-+]?\d*\.?\d+/).map(&:to_f)
96
- if box_values.length == 4
97
- llx, lly, urx, ury = box_values
98
- width = urx - llx
99
- height = ury - lly
100
- media_box = { llx: llx, lly: lly, urx: urx, ury: ury }
101
- end
102
- end
103
88
 
104
- # Try CropBox
105
- if body =~ %r{/CropBox\s*\[(.*?)\]}
106
- box_values = ::Regexp.last_match(1).scan(/[-+]?\d*\.?\d+/).map(&:to_f)
107
- if box_values.length == 4
108
- llx, lly, urx, ury = box_values
109
- crop_box = { llx: llx, lly: lly, urx: urx, ury: ury }
110
- end
89
+ # Try MediaBox first (most common) - also extract width/height
90
+ media_box = DictScan.parse_box(body, "MediaBox")
91
+ if media_box
92
+ width = media_box[:urx] - media_box[:llx]
93
+ height = media_box[:ury] - media_box[:lly]
111
94
  end
112
95
 
113
- # Try ArtBox
114
- if body =~ %r{/ArtBox\s*\[(.*?)\]}
115
- box_values = ::Regexp.last_match(1).scan(/[-+]?\d*\.?\d+/).map(&:to_f)
116
- if box_values.length == 4
117
- llx, lly, urx, ury = box_values
118
- art_box = { llx: llx, lly: lly, urx: urx, ury: ury }
119
- end
120
- end
121
-
122
- # Try BleedBox
123
- if body =~ %r{/BleedBox\s*\[(.*?)\]}
124
- box_values = ::Regexp.last_match(1).scan(/[-+]?\d*\.?\d+/).map(&:to_f)
125
- if box_values.length == 4
126
- llx, lly, urx, ury = box_values
127
- bleed_box = { llx: llx, lly: lly, urx: urx, ury: ury }
128
- end
129
- end
130
-
131
- # Try TrimBox
132
- if body =~ %r{/TrimBox\s*\[(.*?)\]}
133
- box_values = ::Regexp.last_match(1).scan(/[-+]?\d*\.?\d+/).map(&:to_f)
134
- if box_values.length == 4
135
- llx, lly, urx, ury = box_values
136
- trim_box = { llx: llx, lly: lly, urx: urx, ury: ury }
137
- end
138
- end
96
+ # Parse other box types
97
+ crop_box = DictScan.parse_box(body, "CropBox")
98
+ art_box = DictScan.parse_box(body, "ArtBox")
99
+ bleed_box = DictScan.parse_box(body, "BleedBox")
100
+ trim_box = DictScan.parse_box(body, "TrimBox")
139
101
 
140
102
  # Extract rotation
141
103
  rotate = nil
@@ -266,6 +228,11 @@ module AcroThat
266
228
  ft_tok = body.include?("/FT") ? DictScan.value_token_after("/FT", body) : nil
267
229
  type = ft_tok
268
230
 
231
+ # Normalize button field values: "Yes" -> "/Yes" to match PDF name conventions
232
+ if type == "/Btn" && value == "Yes"
233
+ value = "/Yes"
234
+ end
235
+
269
236
  position = {}
270
237
  if is_widget
271
238
  rect_tok = DictScan.value_token_after("/Rect", body)
@@ -637,8 +604,9 @@ module AcroThat
637
604
 
638
605
  # Clean up in-place (mutates current instance)
639
606
  def clear!(...)
640
- cleaned_content = clear(...)
607
+ cleaned_content = clear(...).freeze
641
608
  @raw = cleaned_content
609
+ @resolver.clear_cache
642
610
  @resolver = AcroThat::ObjectResolver.new(cleaned_content)
643
611
  @patches = []
644
612
 
@@ -649,8 +617,9 @@ module AcroThat
649
617
  def write(path_out = nil, flatten: true)
650
618
  deduped_patches = @patches.reverse.uniq { |p| p[:ref] }.reverse
651
619
  writer = AcroThat::IncrementalWriter.new(@raw, deduped_patches)
652
- @raw = writer.render
620
+ @raw = writer.render.freeze
653
621
  @patches = []
622
+ @resolver.clear_cache
654
623
  @resolver = AcroThat::ObjectResolver.new(@raw)
655
624
 
656
625
  flatten! if flatten
@@ -9,6 +9,8 @@ module AcroThat
9
9
  TYPES = {
10
10
  text: "/Tx",
11
11
  button: "/Btn",
12
+ checkbox: "/Btn",
13
+ radio: "/Btn",
12
14
  choice: "/Ch",
13
15
  signature: "/Sig"
14
16
  }.freeze
@@ -16,8 +16,9 @@ module AcroThat
16
16
  max_obj = scan_max_obj_number(@orig)
17
17
 
18
18
  # Ensure we end with a newline before appending
19
- original_with_newline = @orig.dup
20
- original_with_newline << "\n" unless @orig.end_with?("\n")
19
+ # Avoid dup by concatenating instead of modifying in place
20
+ newline_if_needed = @orig.end_with?("\n") ? "".b : "\n".b
21
+ original_with_newline = @orig + newline_if_needed
21
22
 
22
23
  buf = +""
23
24
  offsets = []
@@ -49,6 +49,11 @@ module AcroThat
49
49
  end
50
50
  end
51
51
 
52
+ # Clear the object stream cache to free memory
53
+ def clear_cache
54
+ @objstm_cache.clear
55
+ end
56
+
52
57
  def object_body(ref)
53
58
  case (e = @entries[ref])&.type
54
59
  when :in_file
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AcroThat
4
- VERSION = "0.1.5"
4
+ VERSION = "0.1.7"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: acro_that
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Wynkoop
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-11-01 00:00:00.000000000 Z
11
+ date: 2025-11-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: chunky_png
@@ -88,7 +88,6 @@ executables: []
88
88
  extensions: []
89
89
  extra_rdoc_files: []
90
90
  files:
91
- - ".DS_Store"
92
91
  - ".gitignore"
93
92
  - ".rubocop.yml"
94
93
  - CHANGELOG.md
@@ -103,6 +102,9 @@ files:
103
102
  - docs/object_streams.md
104
103
  - docs/pdf_structure.md
105
104
  - issues/README.md
105
+ - issues/memory-benchmark-results.md
106
+ - issues/memory-improvements.md
107
+ - issues/memory-optimization-summary.md
106
108
  - issues/refactoring-opportunities.md
107
109
  - lib/acro_that.rb
108
110
  - lib/acro_that/actions/add_field.rb
data/.DS_Store DELETED
Binary file