acro_that 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +8 -0
- data/.rubocop.yml +78 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +86 -0
- data/README.md +360 -0
- data/Rakefile +18 -0
- data/acro_that.gemspec +34 -0
- data/docs/README.md +99 -0
- data/docs/dict_scan_explained.md +341 -0
- data/docs/object_streams.md +311 -0
- data/docs/pdf_structure.md +251 -0
- data/lib/acro_that/actions/add_field.rb +278 -0
- data/lib/acro_that/actions/add_signature_appearance.rb +422 -0
- data/lib/acro_that/actions/base.rb +44 -0
- data/lib/acro_that/actions/remove_field.rb +158 -0
- data/lib/acro_that/actions/update_field.rb +301 -0
- data/lib/acro_that/dict_scan.rb +413 -0
- data/lib/acro_that/document.rb +331 -0
- data/lib/acro_that/field.rb +143 -0
- data/lib/acro_that/incremental_writer.rb +244 -0
- data/lib/acro_that/object_resolver.rb +376 -0
- data/lib/acro_that/objstm.rb +75 -0
- data/lib/acro_that/pdf_writer.rb +97 -0
- data/lib/acro_that/version.rb +5 -0
- data/lib/acro_that.rb +24 -0
- metadata +143 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: dc2bd5a5999f0593e7ec92b9d5c2426e0654d154e10add82b2e5be89ac2a9598
|
|
4
|
+
data.tar.gz: bee07674d5dee314fc4578f165926e94b8b87013aa40b2d5625722e3ae3de22a
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 606104cbfe93792dc1f6f061fd566492a853625837d6fca8fca1957b352474db11a5988418d5539d612bbd460211d5f3966261c5a9f8a7653d8746106c2a8139
|
|
7
|
+
data.tar.gz: 4f3375ed35f820dd9146798f4c50427d1f47233c263bee886ec4e025cdc5ebbc982fb89b4cc8b1313b04125d10eaa4e4f28fe1cec419f28fec343dd2ead5a8b7
|
data/.DS_Store
ADDED
|
Binary file
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require:
|
|
4
|
+
- rubocop-rspec
|
|
5
|
+
|
|
6
|
+
AllCops:
|
|
7
|
+
NewCops: enable
|
|
8
|
+
TargetRubyVersion: 3.1
|
|
9
|
+
Exclude:
|
|
10
|
+
- 'vendor/**/*'
|
|
11
|
+
- 'tmp/**/*'
|
|
12
|
+
- '*.gemspec'
|
|
13
|
+
|
|
14
|
+
# Disable problematic cops
|
|
15
|
+
Capybara/RSpec/PredicateMatcher:
|
|
16
|
+
Enabled: false
|
|
17
|
+
|
|
18
|
+
RSpec/FilePath:
|
|
19
|
+
Enabled: false
|
|
20
|
+
|
|
21
|
+
RSpec/SpecFilePathFormat:
|
|
22
|
+
Enabled: false
|
|
23
|
+
|
|
24
|
+
Style/Documentation:
|
|
25
|
+
Enabled: false
|
|
26
|
+
|
|
27
|
+
Style/StringLiterals:
|
|
28
|
+
EnforcedStyle: double_quotes
|
|
29
|
+
|
|
30
|
+
Style/FrozenStringLiteralComment:
|
|
31
|
+
Enabled: true
|
|
32
|
+
|
|
33
|
+
Layout/LineLength:
|
|
34
|
+
Max: 120
|
|
35
|
+
|
|
36
|
+
Metrics/AbcSize:
|
|
37
|
+
Max: 20
|
|
38
|
+
|
|
39
|
+
Metrics/MethodLength:
|
|
40
|
+
Max: 20
|
|
41
|
+
|
|
42
|
+
Metrics/ClassLength:
|
|
43
|
+
Max: 150
|
|
44
|
+
|
|
45
|
+
Metrics/ModuleLength:
|
|
46
|
+
Max: 150
|
|
47
|
+
|
|
48
|
+
Metrics/BlockLength:
|
|
49
|
+
Exclude:
|
|
50
|
+
- 'spec/**/*'
|
|
51
|
+
- 'Rakefile'
|
|
52
|
+
|
|
53
|
+
Style/ClassAndModuleChildren:
|
|
54
|
+
Enabled: false
|
|
55
|
+
|
|
56
|
+
Style/GuardClause:
|
|
57
|
+
Enabled: false
|
|
58
|
+
|
|
59
|
+
Style/IfUnlessModifier:
|
|
60
|
+
Enabled: false
|
|
61
|
+
|
|
62
|
+
Style/RedundantReturn:
|
|
63
|
+
Enabled: false
|
|
64
|
+
|
|
65
|
+
Style/RescueStandardError:
|
|
66
|
+
Enabled: false
|
|
67
|
+
|
|
68
|
+
Style/SafeNavigation:
|
|
69
|
+
Enabled: false
|
|
70
|
+
|
|
71
|
+
Style/TrivialAccessors:
|
|
72
|
+
Enabled: false
|
|
73
|
+
|
|
74
|
+
RSpec/ExampleLength:
|
|
75
|
+
Max: 10
|
|
76
|
+
|
|
77
|
+
RSpec/MultipleExpectations:
|
|
78
|
+
Max: 5
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
acro_that (0.1.0)
|
|
5
|
+
chunky_png (~> 1.4)
|
|
6
|
+
|
|
7
|
+
GEM
|
|
8
|
+
remote: https://rubygems.org/
|
|
9
|
+
specs:
|
|
10
|
+
ast (2.4.3)
|
|
11
|
+
chunky_png (1.4.0)
|
|
12
|
+
coderay (1.1.3)
|
|
13
|
+
diff-lcs (1.6.2)
|
|
14
|
+
json (2.15.2)
|
|
15
|
+
language_server-protocol (3.17.0.5)
|
|
16
|
+
lint_roller (1.1.0)
|
|
17
|
+
method_source (1.1.0)
|
|
18
|
+
parallel (1.27.0)
|
|
19
|
+
parser (3.3.10.0)
|
|
20
|
+
ast (~> 2.4.1)
|
|
21
|
+
racc
|
|
22
|
+
prism (1.6.0)
|
|
23
|
+
pry (0.15.2)
|
|
24
|
+
coderay (~> 1.1)
|
|
25
|
+
method_source (~> 1.0)
|
|
26
|
+
racc (1.8.1)
|
|
27
|
+
rainbow (3.1.1)
|
|
28
|
+
regexp_parser (2.11.3)
|
|
29
|
+
rspec (3.13.2)
|
|
30
|
+
rspec-core (~> 3.13.0)
|
|
31
|
+
rspec-expectations (~> 3.13.0)
|
|
32
|
+
rspec-mocks (~> 3.13.0)
|
|
33
|
+
rspec-core (3.13.6)
|
|
34
|
+
rspec-support (~> 3.13.0)
|
|
35
|
+
rspec-expectations (3.13.5)
|
|
36
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
37
|
+
rspec-support (~> 3.13.0)
|
|
38
|
+
rspec-mocks (3.13.6)
|
|
39
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
40
|
+
rspec-support (~> 3.13.0)
|
|
41
|
+
rspec-support (3.13.6)
|
|
42
|
+
rubocop (1.81.6)
|
|
43
|
+
json (~> 2.3)
|
|
44
|
+
language_server-protocol (~> 3.17.0.2)
|
|
45
|
+
lint_roller (~> 1.1.0)
|
|
46
|
+
parallel (~> 1.10)
|
|
47
|
+
parser (>= 3.3.0.2)
|
|
48
|
+
rainbow (>= 2.2.2, < 4.0)
|
|
49
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
|
50
|
+
rubocop-ast (>= 1.47.1, < 2.0)
|
|
51
|
+
ruby-progressbar (~> 1.7)
|
|
52
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
|
53
|
+
rubocop-ast (1.47.1)
|
|
54
|
+
parser (>= 3.3.7.2)
|
|
55
|
+
prism (~> 1.4)
|
|
56
|
+
rubocop-capybara (2.22.1)
|
|
57
|
+
lint_roller (~> 1.1)
|
|
58
|
+
rubocop (~> 1.72, >= 1.72.1)
|
|
59
|
+
rubocop-factory_bot (2.27.1)
|
|
60
|
+
lint_roller (~> 1.1)
|
|
61
|
+
rubocop (~> 1.72, >= 1.72.1)
|
|
62
|
+
rubocop-rspec (2.31.0)
|
|
63
|
+
rubocop (~> 1.40)
|
|
64
|
+
rubocop-capybara (~> 2.17)
|
|
65
|
+
rubocop-factory_bot (~> 2.22)
|
|
66
|
+
rubocop-rspec_rails (~> 2.28)
|
|
67
|
+
rubocop-rspec_rails (2.29.1)
|
|
68
|
+
rubocop (~> 1.61)
|
|
69
|
+
ruby-progressbar (1.13.0)
|
|
70
|
+
unicode-display_width (3.2.0)
|
|
71
|
+
unicode-emoji (~> 4.1)
|
|
72
|
+
unicode-emoji (4.1.0)
|
|
73
|
+
|
|
74
|
+
PLATFORMS
|
|
75
|
+
arm64-darwin-22
|
|
76
|
+
ruby
|
|
77
|
+
|
|
78
|
+
DEPENDENCIES
|
|
79
|
+
acro_that!
|
|
80
|
+
pry (~> 0.14)
|
|
81
|
+
rspec (~> 3.0)
|
|
82
|
+
rubocop (~> 1.50)
|
|
83
|
+
rubocop-rspec (~> 2.20)
|
|
84
|
+
|
|
85
|
+
BUNDLED WITH
|
|
86
|
+
2.5.21
|
data/README.md
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
# AcroThat
|
|
2
|
+
|
|
3
|
+
A minimal pure Ruby library for parsing and editing PDF AcroForm fields.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- ✅ **Pure Ruby** - Minimal dependencies (only `chunky_png` for PNG image processing)
|
|
8
|
+
- ✅ **StringIO Only** - Works entirely in memory, no temp files
|
|
9
|
+
- ✅ **PDF AcroForm Support** - Parse, list, add, remove, and modify form fields
|
|
10
|
+
- ✅ **Signature Field Images** - Add image appearances to signature fields (JPEG and PNG support)
|
|
11
|
+
- ✅ **Minimal PDF Engine** - Basic PDF parser/writer for AcroForm manipulation
|
|
12
|
+
- ✅ **Ruby 3.1+** - Modern Ruby support
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
Add this line to your application's Gemfile:
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
gem 'acro_that'
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
And then execute:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
bundle install
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Or install it directly:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
gem install acro_that
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Usage
|
|
35
|
+
|
|
36
|
+
### Basic Usage
|
|
37
|
+
|
|
38
|
+
```ruby
|
|
39
|
+
require 'acro_that'
|
|
40
|
+
|
|
41
|
+
# Create a document from a file path or StringIO
|
|
42
|
+
doc = AcroThat::Document.new("form.pdf")
|
|
43
|
+
|
|
44
|
+
# Or from StringIO
|
|
45
|
+
require 'stringio'
|
|
46
|
+
pdf_data = File.binread("form.pdf")
|
|
47
|
+
io = StringIO.new(pdf_data)
|
|
48
|
+
doc = AcroThat::Document.new(io)
|
|
49
|
+
|
|
50
|
+
# List all form fields
|
|
51
|
+
fields = doc.list_fields
|
|
52
|
+
fields.each do |field|
|
|
53
|
+
type_info = field.type_key ? "#{field.type} (:#{field.type_key})" : field.type
|
|
54
|
+
puts "#{field.name} (#{type_info}) = #{field.value}"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Add a new field (using symbol key for type)
|
|
58
|
+
new_field = doc.add_field("NameField",
|
|
59
|
+
value: "John Doe",
|
|
60
|
+
x: 100,
|
|
61
|
+
y: 500,
|
|
62
|
+
width: 200,
|
|
63
|
+
height: 20,
|
|
64
|
+
page: 1,
|
|
65
|
+
type: :text # Optional: :text, :button, :choice, :signature (or "/Tx", "/Btn", etc.)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Or using the PDF type string directly
|
|
69
|
+
button_field = doc.add_field("CheckBox",
|
|
70
|
+
type: "/Btn", # Or use :button symbol
|
|
71
|
+
x: 100,
|
|
72
|
+
y: 600,
|
|
73
|
+
width: 20,
|
|
74
|
+
height: 20,
|
|
75
|
+
page: 1
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Update a field value
|
|
79
|
+
doc.update_field("ExistingField", "New Value")
|
|
80
|
+
|
|
81
|
+
# Rename a field while updating it
|
|
82
|
+
doc.update_field("OldName", "New Value", new_name: "NewName")
|
|
83
|
+
|
|
84
|
+
# Remove a field
|
|
85
|
+
doc.remove_field("FieldToRemove")
|
|
86
|
+
|
|
87
|
+
# Write the modified PDF to a file
|
|
88
|
+
doc.write("output.pdf")
|
|
89
|
+
|
|
90
|
+
# Or write with flattening (removes incremental updates)
|
|
91
|
+
doc.write("output.pdf", flatten: true)
|
|
92
|
+
|
|
93
|
+
# Or get PDF bytes as a String (returns String, not StringIO)
|
|
94
|
+
pdf_bytes = doc.write
|
|
95
|
+
File.binwrite("output.pdf", pdf_bytes)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Advanced Usage
|
|
99
|
+
|
|
100
|
+
#### Working with Field Objects
|
|
101
|
+
|
|
102
|
+
```ruby
|
|
103
|
+
doc = AcroThat::Document.new("form.pdf")
|
|
104
|
+
fields = doc.list_fields
|
|
105
|
+
|
|
106
|
+
# Access field properties
|
|
107
|
+
field = fields.first
|
|
108
|
+
puts field.name # Field name
|
|
109
|
+
puts field.value # Field value
|
|
110
|
+
puts field.type # Field type (e.g., "/Tx")
|
|
111
|
+
puts field.type_key # Symbol key (e.g., :text) or nil if not mapped
|
|
112
|
+
puts field.x # X position
|
|
113
|
+
puts field.y # Y position
|
|
114
|
+
puts field.width # Width
|
|
115
|
+
puts field.height # Height
|
|
116
|
+
puts field.page # Page number
|
|
117
|
+
|
|
118
|
+
# Fields default to "/Tx" if type is missing from PDF
|
|
119
|
+
|
|
120
|
+
# Update a field directly
|
|
121
|
+
field.update("New Value")
|
|
122
|
+
|
|
123
|
+
# Update and rename a field
|
|
124
|
+
field.update("New Value", new_name: "NewName")
|
|
125
|
+
|
|
126
|
+
# Remove a field directly
|
|
127
|
+
field.remove
|
|
128
|
+
|
|
129
|
+
# Check field type
|
|
130
|
+
field.text_field? # true for text fields
|
|
131
|
+
field.button_field? # true for button/checkbox fields
|
|
132
|
+
field.choice_field? # true for choice/dropdown fields
|
|
133
|
+
field.signature_field? # true for signature fields
|
|
134
|
+
|
|
135
|
+
# Check if field has a value
|
|
136
|
+
field.has_value?
|
|
137
|
+
|
|
138
|
+
# Check if field has position information
|
|
139
|
+
field.has_position?
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
#### Signature Fields with Image Appearances
|
|
143
|
+
|
|
144
|
+
Signature fields can be enhanced with image appearances (signature images). When you update a signature field with image data (base64-encoded JPEG or PNG), AcroThat will automatically add the image as the field's appearance.
|
|
145
|
+
|
|
146
|
+
```ruby
|
|
147
|
+
doc = AcroThat::Document.new("form.pdf")
|
|
148
|
+
|
|
149
|
+
# Add a signature field
|
|
150
|
+
sig_field = doc.add_field("MySignature",
|
|
151
|
+
type: :signature,
|
|
152
|
+
x: 100,
|
|
153
|
+
y: 500,
|
|
154
|
+
width: 200,
|
|
155
|
+
height: 100,
|
|
156
|
+
page: 1
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Update signature field with base64-encoded image data
|
|
160
|
+
# JPEG example:
|
|
161
|
+
jpeg_base64 = Base64.encode64(File.binread("signature.jpg")).strip
|
|
162
|
+
doc.update_field("MySignature", jpeg_base64)
|
|
163
|
+
|
|
164
|
+
# PNG example (requires chunky_png gem):
|
|
165
|
+
png_base64 = Base64.encode64(File.binread("signature.png")).strip
|
|
166
|
+
doc.update_field("MySignature", png_base64)
|
|
167
|
+
|
|
168
|
+
# Or using data URI format:
|
|
169
|
+
data_uri = "data:image/png;base64,#{png_base64}"
|
|
170
|
+
doc.update_field("MySignature", data_uri)
|
|
171
|
+
|
|
172
|
+
# Write the PDF with the signature appearance
|
|
173
|
+
doc.write("form_with_signature.pdf")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
**Note**: PNG image processing requires the `chunky_png` gem, which is included as a dependency. JPEG images can be processed without any additional dependencies.
|
|
177
|
+
|
|
178
|
+
#### Flattening PDFs
|
|
179
|
+
|
|
180
|
+
```ruby
|
|
181
|
+
# Flatten a PDF to remove incremental updates
|
|
182
|
+
doc = AcroThat::Document.new("form.pdf")
|
|
183
|
+
doc.flatten! # Modifies the document in-place
|
|
184
|
+
|
|
185
|
+
# Or create a new flattened document
|
|
186
|
+
flattened_doc = AcroThat::Document.flatten_pdf("input.pdf", "output.pdf")
|
|
187
|
+
|
|
188
|
+
# Or get flattened bytes
|
|
189
|
+
flattened_bytes = AcroThat::Document.flatten_pdf("input.pdf")
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### API Reference
|
|
193
|
+
|
|
194
|
+
#### `AcroThat::Document.new(path_or_io)`
|
|
195
|
+
Creates a PDF document from a file path (String) or StringIO object.
|
|
196
|
+
|
|
197
|
+
```ruby
|
|
198
|
+
doc = AcroThat::Document.new("path/to/file.pdf")
|
|
199
|
+
doc = AcroThat::Document.new(StringIO.new(pdf_bytes))
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
#### `#list_fields`
|
|
203
|
+
Returns an array of `Field` objects representing all form fields in the document.
|
|
204
|
+
|
|
205
|
+
```ruby
|
|
206
|
+
fields = doc.list_fields
|
|
207
|
+
fields.each do |field|
|
|
208
|
+
puts field.name
|
|
209
|
+
end
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
#### `#add_field(name, options)`
|
|
213
|
+
Adds a new form field to the document. Options include:
|
|
214
|
+
- `value`: Default value for the field (String)
|
|
215
|
+
- `x`: X coordinate (Integer, default: 100)
|
|
216
|
+
- `y`: Y coordinate (Integer, default: 500)
|
|
217
|
+
- `width`: Field width (Integer, default: 100)
|
|
218
|
+
- `height`: Field height (Integer, default: 20)
|
|
219
|
+
- `page`: Page number to add the field to (Integer, default: 1)
|
|
220
|
+
- `type`: Field type (Symbol or String, default: `"/Tx"`). Options:
|
|
221
|
+
- Symbol keys: `:text`, `:button`, `:choice`, `:signature`
|
|
222
|
+
- PDF type strings: `"/Tx"`, `"/Btn"`, `"/Ch"`, `"/Sig"`
|
|
223
|
+
|
|
224
|
+
Returns a `Field` object if successful.
|
|
225
|
+
|
|
226
|
+
```ruby
|
|
227
|
+
# Using symbol keys (recommended)
|
|
228
|
+
field = doc.add_field("NewField", value: "Value", x: 100, y: 500, width: 200, height: 20, page: 1, type: :text)
|
|
229
|
+
|
|
230
|
+
# Using PDF type strings
|
|
231
|
+
field = doc.add_field("ButtonField", type: "/Btn", x: 100, y: 500, width: 20, height: 20, page: 1)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
#### `#update_field(name, new_value, new_name: nil)`
|
|
235
|
+
Updates a field's value and optionally renames it. For signature fields, if `new_value` looks like image data (base64-encoded JPEG/PNG or a data URI), it will automatically add the image as the field's appearance. Returns `true` if successful, `false` if field not found.
|
|
236
|
+
|
|
237
|
+
```ruby
|
|
238
|
+
doc.update_field("FieldName", "New Value")
|
|
239
|
+
doc.update_field("OldName", "New Value", new_name: "NewName")
|
|
240
|
+
|
|
241
|
+
# For signature fields with images:
|
|
242
|
+
doc.update_field("SignatureField", base64_image_data) # Base64-encoded JPEG or PNG
|
|
243
|
+
doc.update_field("SignatureField", "data:image/png;base64,...") # Data URI format
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
#### `#remove_field(name_or_field)`
|
|
247
|
+
Removes a form field by name (String) or Field object. Returns `true` if successful, `false` if field not found.
|
|
248
|
+
|
|
249
|
+
```ruby
|
|
250
|
+
doc.remove_field("FieldName")
|
|
251
|
+
doc.remove_field(field_object)
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
#### `#write(path_out = nil, flatten: false)`
|
|
255
|
+
Writes the modified PDF. If `path_out` is provided, writes to that file path and returns `true`. If no path is provided, returns the PDF bytes as a String. The `flatten` option removes incremental updates from the PDF.
|
|
256
|
+
|
|
257
|
+
```ruby
|
|
258
|
+
doc.write("output.pdf") # Write to file
|
|
259
|
+
doc.write("output.pdf", flatten: true) # Write flattened PDF to file
|
|
260
|
+
pdf_bytes = doc.write # Get PDF bytes as String
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
#### `#flatten`
|
|
264
|
+
Returns flattened PDF bytes (removes incremental updates) without modifying the document.
|
|
265
|
+
|
|
266
|
+
```ruby
|
|
267
|
+
flattened_bytes = doc.flatten
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
#### `#flatten!`
|
|
271
|
+
Flattens the PDF in-place (modifies the current document instance).
|
|
272
|
+
|
|
273
|
+
```ruby
|
|
274
|
+
doc.flatten!
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
#### `AcroThat::Document.flatten_pdf(input_path, output_path = nil)`
|
|
278
|
+
Class method to flatten a PDF. If `output_path` is provided, writes to that path and returns the path. Otherwise returns a new `Document` instance with the flattened content.
|
|
279
|
+
|
|
280
|
+
```ruby
|
|
281
|
+
AcroThat::Document.flatten_pdf("input.pdf", "output.pdf")
|
|
282
|
+
flattened_doc = AcroThat::Document.flatten_pdf("input.pdf")
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
### Field Object
|
|
286
|
+
|
|
287
|
+
Each field returned by `#list_fields` is a `Field` object with the following attributes and methods:
|
|
288
|
+
|
|
289
|
+
#### Attributes
|
|
290
|
+
- `name`: Field name (String)
|
|
291
|
+
- `value`: Field value (String or nil)
|
|
292
|
+
- `type`: Field type (String, e.g., "/Tx", "/Btn", "/Ch", "/Sig"). Defaults to "/Tx" if missing from PDF.
|
|
293
|
+
- `ref`: Object reference array `[object_number, generation]`
|
|
294
|
+
- `x`: X coordinate (Float or nil)
|
|
295
|
+
- `y`: Y coordinate (Float or nil)
|
|
296
|
+
- `width`: Field width (Float or nil)
|
|
297
|
+
- `height`: Field height (Float or nil)
|
|
298
|
+
- `page`: Page number (Integer or nil)
|
|
299
|
+
|
|
300
|
+
#### Methods
|
|
301
|
+
- `#update(new_value, new_name: nil)`: Update the field's value and optionally rename it
|
|
302
|
+
- `#remove`: Remove the field from the document
|
|
303
|
+
- `#type_key`: Returns the symbol key for the type (e.g., `:text` for `"/Tx"`) or `nil` if not mapped
|
|
304
|
+
- `#text_field?`: Returns true if field is a text field
|
|
305
|
+
- `#button_field?`: Returns true if field is a button/checkbox field
|
|
306
|
+
- `#choice_field?`: Returns true if field is a choice/dropdown field
|
|
307
|
+
- `#signature_field?`: Returns true if field is a signature field
|
|
308
|
+
- `#has_value?`: Returns true if field has a non-empty value
|
|
309
|
+
- `#has_position?`: Returns true if field has position information
|
|
310
|
+
- `#object_number`: Returns the object number (first element of ref)
|
|
311
|
+
- `#generation`: Returns the generation number (second element of ref)
|
|
312
|
+
- `#valid_ref?`: Returns true if field has a valid reference (not a placeholder)
|
|
313
|
+
|
|
314
|
+
**Note**: When reading fields from a PDF, if the type is missing or empty, it defaults to `"/Tx"` (text field). The `type_key` method allows you to get the symbol representation (e.g., `:text`) from the type string.
|
|
315
|
+
|
|
316
|
+
## Example
|
|
317
|
+
|
|
318
|
+
For complete working examples, see the test files in the `spec/` directory:
|
|
319
|
+
- `spec/document_spec.rb` - Basic document operations
|
|
320
|
+
- `spec/form_editing_spec.rb` - Form field editing examples
|
|
321
|
+
- `spec/field_editor_spec.rb` - Field object manipulation
|
|
322
|
+
|
|
323
|
+
## Architecture
|
|
324
|
+
|
|
325
|
+
AcroThat is built as a minimal PDF engine with the following components:
|
|
326
|
+
|
|
327
|
+
- **ObjectResolver**: Resolves and extracts PDF objects from the document
|
|
328
|
+
- **DictScan**: Parses PDF dictionaries and extracts field information
|
|
329
|
+
- **IncrementalWriter**: Handles incremental PDF updates (appends changes)
|
|
330
|
+
- **PDFWriter**: Writes complete PDF files (for flattening)
|
|
331
|
+
- **Actions**: Modular actions for adding, updating, and removing fields (`AddField`, `UpdateField`, `RemoveField`)
|
|
332
|
+
- **Document**: Main orchestration class that coordinates all operations
|
|
333
|
+
- **Field**: Represents a form field with its properties and methods
|
|
334
|
+
|
|
335
|
+
## Limitations
|
|
336
|
+
|
|
337
|
+
This is a minimal implementation focused on AcroForm manipulation. It does not support:
|
|
338
|
+
|
|
339
|
+
- Complex PDF features (images, fonts, advanced graphics, etc.)
|
|
340
|
+
- PDF compression/decompression (streams are preserved as-is)
|
|
341
|
+
- Full PDF rendering or display
|
|
342
|
+
- Digital signatures (though signature fields can be added)
|
|
343
|
+
- JavaScript or other interactive features
|
|
344
|
+
- Form submission/validation logic
|
|
345
|
+
|
|
346
|
+
## Dependencies
|
|
347
|
+
|
|
348
|
+
- **chunky_png** (~> 1.4): Required for PNG image processing in signature field appearances. JPEG images can be processed without this dependency, but PNG support requires it.
|
|
349
|
+
|
|
350
|
+
## Development
|
|
351
|
+
|
|
352
|
+
After checking out the repo, run `bundle install` to install dependencies. Then, run `bundle exec rspec` to run the tests.
|
|
353
|
+
|
|
354
|
+
## Contributing
|
|
355
|
+
|
|
356
|
+
Bug reports and pull requests are welcome on GitHub.
|
|
357
|
+
|
|
358
|
+
## License
|
|
359
|
+
|
|
360
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/gem_tasks"
|
|
4
|
+
require "rspec/core/rake_task"
|
|
5
|
+
|
|
6
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
7
|
+
|
|
8
|
+
desc "Run RuboCop"
|
|
9
|
+
task :rubocop do
|
|
10
|
+
sh "bundle exec rubocop"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
desc "Run RuboCop with auto-correct"
|
|
14
|
+
task "rubocop:fix" do
|
|
15
|
+
sh "bundle exec rubocop --auto-correct"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
task default: :spec
|
data/acro_that.gemspec
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/acro_that/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "acro_that"
|
|
7
|
+
spec.version = AcroThat::VERSION
|
|
8
|
+
spec.authors = ["Michael Wynkoop"]
|
|
9
|
+
spec.email = ["michaelwynkoop@corporatetools.com"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "Pure Ruby PDF AcroForm editing library"
|
|
12
|
+
spec.description = "A minimal pure Ruby library for parsing and editing PDF AcroForm fields using only stdlib"
|
|
13
|
+
spec.homepage = "https://github.com/corporatetools/acro_that"
|
|
14
|
+
spec.license = "MIT"
|
|
15
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.1.0")
|
|
16
|
+
|
|
17
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
18
|
+
spec.metadata["source_code_uri"] = "https://github.com/corporatetools/acro_that"
|
|
19
|
+
spec.metadata["changelog_uri"] = "https://github.com/corporatetools/acro_that/blob/main/CHANGELOG.md"
|
|
20
|
+
|
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
|
22
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
24
|
+
end
|
|
25
|
+
spec.bindir = "exe"
|
|
26
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
27
|
+
spec.require_paths = ["lib"]
|
|
28
|
+
|
|
29
|
+
spec.add_runtime_dependency "chunky_png", "~> 1.4"
|
|
30
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
|
31
|
+
spec.add_development_dependency "pry", "~> 0.14"
|
|
32
|
+
spec.add_development_dependency "rubocop", "~> 1.50"
|
|
33
|
+
spec.add_development_dependency "rubocop-rspec", "~> 2.20"
|
|
34
|
+
end
|
data/docs/README.md
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# AcroThat Documentation
|
|
2
|
+
|
|
3
|
+
This directory contains detailed documentation about how `AcroThat` works, with a focus on explaining the text-based nature of PDFs and how the library uses simple text traversal to parse and modify them.
|
|
4
|
+
|
|
5
|
+
## Documentation Overview
|
|
6
|
+
|
|
7
|
+
### [PDF Structure](./pdf_structure.md)
|
|
8
|
+
|
|
9
|
+
Explains the fundamental structure of PDF files, including:
|
|
10
|
+
- PDFs as text-based files with structured syntax
|
|
11
|
+
- PDF dictionaries (`<< ... >>`)
|
|
12
|
+
- PDF objects, references, arrays, and strings
|
|
13
|
+
- Why PDF structure is parseable with text traversal
|
|
14
|
+
- Examples of PDF dictionary structure
|
|
15
|
+
|
|
16
|
+
**Key insight:** PDFs may contain binary data in streams, but their **structure**—dictionaries, arrays, strings, references—is all text-based syntax.
|
|
17
|
+
|
|
18
|
+
### [DictScan Explained](./dict_scan_explained.md)
|
|
19
|
+
|
|
20
|
+
A detailed walkthrough of the `DictScan` module:
|
|
21
|
+
- How each function works
|
|
22
|
+
- Why text traversal is the core approach
|
|
23
|
+
- Step-by-step algorithm explanations
|
|
24
|
+
- Common patterns for using `DictScan`
|
|
25
|
+
- Examples showing how text traversal parses PDF dictionaries
|
|
26
|
+
|
|
27
|
+
**Key insight:** Despite appearing complicated, `DictScan` is fundamentally **text traversal**—finding delimiters (`<<`, `>>`, `(`, `)`, etc.) and tracking depth to extract values.
|
|
28
|
+
|
|
29
|
+
### [Object Streams](./object_streams.md)
|
|
30
|
+
|
|
31
|
+
Explains how PDF object streams work and how `AcroThat` parses them:
|
|
32
|
+
- What object streams are and why they're used
|
|
33
|
+
- Object stream structure (header + data sections)
|
|
34
|
+
- How `ObjectResolver` identifies objects in streams
|
|
35
|
+
- The `ObjStm.parse` algorithm
|
|
36
|
+
- Stream decoding (compression, PNG predictor)
|
|
37
|
+
- Lazy loading and caching
|
|
38
|
+
|
|
39
|
+
**Key insight:** Object streams compress multiple objects together, but parsing them is still **text traversal**—once decompressed, it's just parsing space-separated numbers and extracting substrings by offset.
|
|
40
|
+
|
|
41
|
+
## Common Themes
|
|
42
|
+
|
|
43
|
+
Throughout all documentation, you'll see these recurring themes:
|
|
44
|
+
|
|
45
|
+
1. **PDFs are text-based**: Despite being "binary" files, PDF structure uses text syntax
|
|
46
|
+
2. **Text traversal works**: Simple character-by-character scanning can parse PDF dictionaries
|
|
47
|
+
3. **Depth tracking**: Nested structures (dictionaries, arrays, strings) use depth counting
|
|
48
|
+
4. **Position-based replacement**: Using exact byte positions is safer than regex replacement
|
|
49
|
+
5. **Minimal parsing**: We don't need a full PDF parser—just enough to find dictionaries and extract/replace values
|
|
50
|
+
|
|
51
|
+
## How to Read These Docs
|
|
52
|
+
|
|
53
|
+
**If you're new to PDFs:**
|
|
54
|
+
1. Start with [PDF Structure](./pdf_structure.md) to understand PDFs at a high level
|
|
55
|
+
2. Read [DictScan Explained](./dict_scan_explained.md) to see how text traversal works
|
|
56
|
+
3. Read [Object Streams](./object_streams.md) to understand compression features
|
|
57
|
+
|
|
58
|
+
**If you're debugging:**
|
|
59
|
+
- [DictScan Explained](./dict_scan_explained.md) has function-by-function walkthroughs
|
|
60
|
+
- [Object Streams](./object_streams.md) explains how object streams are parsed
|
|
61
|
+
|
|
62
|
+
**If you're contributing:**
|
|
63
|
+
- All docs include code examples and algorithm explanations
|
|
64
|
+
- Each document explains **why** the approach works, not just **how**
|
|
65
|
+
|
|
66
|
+
## Technical Details
|
|
67
|
+
|
|
68
|
+
### Why Text Traversal Works
|
|
69
|
+
|
|
70
|
+
PDF dictionaries use distinct delimiters:
|
|
71
|
+
- `<<` `>>` for dictionaries
|
|
72
|
+
- `[` `]` for arrays
|
|
73
|
+
- `(` `)` for literal strings
|
|
74
|
+
- `<` `>` for hex strings
|
|
75
|
+
- `/` for names
|
|
76
|
+
|
|
77
|
+
These unique delimiters allow pattern-matching on the first character to determine value types. Depth tracking (counting `<<`/`>>`, `[`/`]`, etc.) handles nested structures.
|
|
78
|
+
|
|
79
|
+
### Performance
|
|
80
|
+
|
|
81
|
+
**Why text traversal is fast:**
|
|
82
|
+
- No AST construction
|
|
83
|
+
- No full PDF parsing
|
|
84
|
+
- Direct string manipulation
|
|
85
|
+
- Minimal memory allocation
|
|
86
|
+
|
|
87
|
+
**Trade-offs:**
|
|
88
|
+
- Doesn't validate entire PDF structure
|
|
89
|
+
- Assumes dictionaries are well-formed
|
|
90
|
+
- Some preprocessing needed (stream stripping)
|
|
91
|
+
|
|
92
|
+
### Safety
|
|
93
|
+
|
|
94
|
+
**Position-based replacement** (using exact byte positions) avoids regex edge cases and preserves formatting. The code verifies dictionaries remain valid after modification.
|
|
95
|
+
|
|
96
|
+
## Questions?
|
|
97
|
+
|
|
98
|
+
If you have questions about how `AcroThat` works, these docs should answer them. The code is also well-commented, so reading the source alongside the docs is recommended.
|
|
99
|
+
|