corp_pdf 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CorpPdf
4
+ # Represents a page in a PDF document
5
+ class Page
6
+ attr_reader :page, :width, :height, :ref, :metadata, :document
7
+
8
+ def initialize(page, width, height, ref, metadata, document)
9
+ @page = page # Page number (1-indexed)
10
+ @width = width
11
+ @height = height
12
+ @ref = ref # [obj_num, gen_num]
13
+ @metadata = metadata # Hash with :rotate, :media_box, :crop_box, etc.
14
+ @document = document
15
+ end
16
+
17
+ # Add a field to this page
18
+ # Options are the same as Document#add_field, but :page is automatically set
19
+ def add_field(name, options = {})
20
+ # Automatically set the page number to this page
21
+ options_with_page = options.merge(page: @page)
22
+ @document.add_field(name, options_with_page)
23
+ end
24
+
25
+ # Get the page number
26
+ def page_number
27
+ @page
28
+ end
29
+
30
+ # Get the page reference [obj_num, gen_num]
31
+ def page_ref
32
+ @ref
33
+ end
34
+
35
+ # Check if page has rotation
36
+ def rotated?
37
+ !@metadata[:rotate].nil? && @metadata[:rotate] != 0
38
+ end
39
+
40
+ # Get rotation angle (0, 90, 180, 270)
41
+ def rotation
42
+ @metadata[:rotate] || 0
43
+ end
44
+
45
+ # Get MediaBox dimensions
46
+ def media_box
47
+ @metadata[:media_box]
48
+ end
49
+
50
+ # Get CropBox dimensions
51
+ def crop_box
52
+ @metadata[:crop_box]
53
+ end
54
+
55
+ # Get ArtBox dimensions
56
+ def art_box
57
+ @metadata[:art_box]
58
+ end
59
+
60
+ # Get BleedBox dimensions
61
+ def bleed_box
62
+ @metadata[:bleed_box]
63
+ end
64
+
65
+ # Get TrimBox dimensions
66
+ def trim_box
67
+ @metadata[:trim_box]
68
+ end
69
+
70
+ # String representation for debugging
71
+ def to_s
72
+ dims = width && height ? " #{width}x#{height}" : ""
73
+ rot = rotated? ? " (rotated #{rotation}°)" : ""
74
+ "#<CorpPdf::Page page=#{page}#{dims}#{rot} ref=#{ref.inspect}>"
75
+ end
76
+
77
+ alias inspect to_s
78
+
79
+ # Convert to hash for backward compatibility
80
+ def to_h
81
+ {
82
+ page: @page,
83
+ width: @width,
84
+ height: @height,
85
+ ref: @ref,
86
+ metadata: @metadata
87
+ }
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CorpPdf
4
+ # PDFWriter - Clean PDF writer for flattening documents
5
+ # Writes a complete PDF from parsed objects, consolidating incremental updates
6
+ class PDFWriter
7
+ def initialize
8
+ # Work entirely in binary encoding to avoid UTF-8/ASCII-8BIT conflicts
9
+ @buffer = "".b
10
+ @offsets = [] # Track [obj_num, gen, offset] for xref table
11
+ @xref_offset = 0
12
+ end
13
+
14
+ def write_header
15
+ @buffer << "%PDF-1.6\n".b
16
+ # Binary marker (helps PDF readers identify binary content)
17
+ @buffer << "%\xE2\xE3\xCF\xD3\n".b
18
+ end
19
+
20
+ def write_object(ref, body)
21
+ obj_num, gen = ref
22
+ offset = @buffer.bytesize
23
+ @offsets << [obj_num, gen, offset]
24
+
25
+ # Write object with proper PDF syntax
26
+ # Use ASCII-8BIT encoding throughout to avoid conflicts
27
+ @buffer << "#{obj_num} #{gen} obj\n".b
28
+
29
+ # Body is already in binary from ObjectResolver
30
+ @buffer << body.b
31
+
32
+ # Ensure proper spacing before endobj
33
+ @buffer << "\n".b unless body.end_with?("\n")
34
+ @buffer << "endobj\n".b
35
+ end
36
+
37
+ def write_xref
38
+ @xref_offset = @buffer.bytesize
39
+
40
+ # Build xref table
41
+ xref = "xref\n".b
42
+
43
+ # Object 0 (free list head)
44
+ xref << "0 1\n".b
45
+ xref << "0000000000 65535 f \n".b
46
+
47
+ # Sort offsets and group consecutive objects into subsections
48
+ sorted = @offsets.sort_by { |num, gen, _offset| [num, gen] }
49
+
50
+ # Find max object number to determine Size
51
+ max_obj_num = sorted.map { |num, _gen, _offset| num }.max || 0
52
+
53
+ # Build xref entries covering all objects from 0 to max_obj_num
54
+ # Missing objects are marked as free (type 'f')
55
+ i = 0
56
+ current_obj = 0
57
+
58
+ while current_obj <= max_obj_num
59
+ # Find next existing object
60
+ next_existing = sorted.find { |num, _gen, _offset| num >= current_obj }
61
+
62
+ if next_existing && next_existing[0] == current_obj
63
+ # Object exists - find consecutive run of existing objects
64
+ first_num = current_obj
65
+ run_length = 1
66
+
67
+ while (i + run_length) < sorted.length &&
68
+ sorted[i + run_length][0] == first_num + run_length &&
69
+ sorted[i + run_length][1] == sorted[i][1]
70
+ run_length += 1
71
+ end
72
+
73
+ # Write subsection header
74
+ xref << "#{first_num} #{run_length}\n".b
75
+
76
+ # Write entries in this subsection
77
+ run_length.times do |j|
78
+ offset = sorted[i + j][2]
79
+ gen = sorted[i + j][1]
80
+ xref << format("%010d %05d n \n", offset, gen).b
81
+ end
82
+
83
+ i += run_length
84
+ current_obj = first_num + run_length
85
+ else
86
+ # Object doesn't exist - find consecutive run of missing objects
87
+ first_missing = current_obj
88
+ missing_count = 1
89
+
90
+ while current_obj + missing_count <= max_obj_num
91
+ check_obj = current_obj + missing_count
92
+ if sorted.any? { |num, _gen, _offset| num == check_obj }
93
+ break
94
+ end
95
+
96
+ missing_count += 1
97
+ end
98
+
99
+ # Write subsection header for missing objects
100
+ xref << "#{first_missing} #{missing_count}\n".b
101
+
102
+ # Write free entries
103
+ missing_count.times do
104
+ xref << "0000000000 65535 f \n".b
105
+ end
106
+
107
+ current_obj = first_missing + missing_count
108
+ end
109
+ end
110
+
111
+ @buffer << xref
112
+ end
113
+
114
+ def write_trailer(size, root_ref, info_ref = nil)
115
+ trailer = "trailer\n".b
116
+ trailer << "<<".b
117
+ trailer << " /Size #{size}".b
118
+ trailer << " /Root #{root_ref[0]} #{root_ref[1]} R".b
119
+ trailer << " /Info #{info_ref[0]} #{info_ref[1]} R".b if info_ref
120
+ trailer << " >>".b
121
+ trailer << "\n".b
122
+ trailer << "startxref\n".b
123
+ trailer << "#{@xref_offset}\n".b
124
+ trailer << "%%EOF\n".b
125
+
126
+ @buffer << trailer
127
+ end
128
+
129
+ def output
130
+ @buffer
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CorpPdf
4
+ VERSION = "1.0.5"
5
+ end
data/lib/corp_pdf.rb ADDED
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "strscan"
4
+ require "stringio"
5
+ require "zlib"
6
+ require "base64"
7
+ require "set"
8
+ require "i18n"
9
+
10
+ require_relative "corp_pdf/dict_scan"
11
+ require_relative "corp_pdf/object_resolver"
12
+ require_relative "corp_pdf/objstm"
13
+ require_relative "corp_pdf/pdf_writer"
14
+ require_relative "corp_pdf/incremental_writer"
15
+ require_relative "corp_pdf/field"
16
+ require_relative "corp_pdf/page"
17
+ require_relative "corp_pdf/document"
18
+
19
+ # Load actions base first (needed by fields)
20
+ require_relative "corp_pdf/actions/base"
21
+
22
+ # Load fields
23
+ require_relative "corp_pdf/fields/base"
24
+ require_relative "corp_pdf/fields/radio"
25
+ require_relative "corp_pdf/fields/text"
26
+ require_relative "corp_pdf/fields/checkbox"
27
+ require_relative "corp_pdf/fields/signature"
28
+
29
+ # Load actions
30
+ require_relative "corp_pdf/actions/add_field"
31
+ require_relative "corp_pdf/actions/update_field"
32
+ require_relative "corp_pdf/actions/remove_field"
33
+
34
+ module CorpPdf
35
+ end
data/publish ADDED
@@ -0,0 +1,183 @@
1
+ #!/bin/bash
2
+
3
+ set -e # Exit on any error
4
+
5
+ VERSION_FILE="lib/corp_pdf/version.rb"
6
+ GEMSPEC_FILE="corp_pdf.gemspec"
7
+ GEM_NAME="corp_pdf"
8
+
9
+ # Function to show usage
10
+ usage() {
11
+ cat << EOF
12
+ Usage: $0 [OPTIONS]
13
+
14
+ Publish corp_pdf gem to RubyGems.
15
+
16
+ OPTIONS:
17
+ -b, --bump TYPE Bump version before publishing (major|minor|patch)
18
+ -k, --key KEY RubyGems API key name (optional, uses credentials if not provided)
19
+ -h, --help Show this help message
20
+
21
+ EXAMPLES:
22
+ $0 # Publish current version without bumping
23
+ $0 -b patch # Bump patch version (0.1.2 -> 0.1.3)
24
+ $0 -b minor # Bump minor version (0.1.2 -> 0.2.0)
25
+ $0 -b major # Bump major version (0.1.2 -> 1.0.0)
26
+ $0 -b patch -k mykey # Bump patch and use specific API key
27
+
28
+ EOF
29
+ exit 1
30
+ }
31
+
32
+ # Function to get current version
33
+ get_current_version() {
34
+ if [[ ! -f "$VERSION_FILE" ]]; then
35
+ echo "Error: $VERSION_FILE not found!" >&2
36
+ exit 1
37
+ fi
38
+
39
+ # Extract version using awk (works reliably on both macOS and Linux)
40
+ awk -F'"' '/VERSION =/ {print $2}' "$VERSION_FILE"
41
+ }
42
+
43
+ # Function to bump version
44
+ bump_version() {
45
+ local bump_type=$1
46
+ local current_version=$(get_current_version)
47
+
48
+ IFS='.' read -ra VERSION_PARTS <<< "$current_version"
49
+ local major=${VERSION_PARTS[0]:-0}
50
+ local minor=${VERSION_PARTS[1]:-0}
51
+ local patch=${VERSION_PARTS[2]:-0}
52
+
53
+ case "$bump_type" in
54
+ major)
55
+ major=$((major + 1))
56
+ minor=0
57
+ patch=0
58
+ ;;
59
+ minor)
60
+ minor=$((minor + 1))
61
+ patch=0
62
+ ;;
63
+ patch)
64
+ patch=$((patch + 1))
65
+ ;;
66
+ *)
67
+ echo "Error: Invalid bump type: $bump_type" >&2
68
+ echo "Must be one of: major, minor, patch" >&2
69
+ exit 1
70
+ ;;
71
+ esac
72
+
73
+ local new_version="${major}.${minor}.${patch}"
74
+
75
+ # Update version in version.rb
76
+ if [[ "$OSTYPE" == "darwin"* ]]; then
77
+ # macOS uses BSD sed
78
+ sed -i '' "s/VERSION = \".*\"/VERSION = \"$new_version\"/" "$VERSION_FILE"
79
+ else
80
+ # Linux uses GNU sed
81
+ sed -i "s/VERSION = \".*\"/VERSION = \"$new_version\"/" "$VERSION_FILE"
82
+ fi
83
+
84
+ echo "$new_version"
85
+ }
86
+
87
+ # Parse arguments
88
+ BUMP_TYPE=""
89
+ API_KEY=""
90
+
91
+ while [[ $# -gt 0 ]]; do
92
+ case $1 in
93
+ -b|--bump)
94
+ BUMP_TYPE="$2"
95
+ shift 2
96
+ ;;
97
+ -k|--key)
98
+ API_KEY="$2"
99
+ shift 2
100
+ ;;
101
+ -h|--help)
102
+ usage
103
+ ;;
104
+ *)
105
+ echo "Error: Unknown option: $1" >&2
106
+ usage
107
+ ;;
108
+ esac
109
+ done
110
+
111
+ # Validate bump type if provided
112
+ if [[ -n "$BUMP_TYPE" ]]; then
113
+ if [[ ! "$BUMP_TYPE" =~ ^(major|minor|patch)$ ]]; then
114
+ echo "Error: Invalid bump type: $BUMP_TYPE" >&2
115
+ echo "Must be one of: major, minor, patch" >&2
116
+ exit 1
117
+ fi
118
+ fi
119
+
120
+ # Get version (after potential bump)
121
+ if [[ -n "$BUMP_TYPE" ]]; then
122
+ echo "Bumping $BUMP_TYPE version..."
123
+ VERSION=$(bump_version "$BUMP_TYPE")
124
+ echo "New version: $VERSION"
125
+ else
126
+ VERSION=$(get_current_version)
127
+ echo "Using current version: $VERSION"
128
+ fi
129
+
130
+ # Build the gem
131
+ echo "Building gem..."
132
+ GEM_FILE="${GEM_NAME}-${VERSION}.gem"
133
+ gem build "$GEMSPEC_FILE"
134
+
135
+ if [[ ! -f "$GEM_FILE" ]]; then
136
+ echo "Error: Failed to build gem file: $GEM_FILE" >&2
137
+ exit 1
138
+ fi
139
+
140
+ echo "Gem built successfully: $GEM_FILE"
141
+
142
+ # Push to RubyGems
143
+ echo "Pushing to RubyGems..."
144
+ if [[ -n "$API_KEY" ]]; then
145
+ gem push "$GEM_FILE" --key "$API_KEY"
146
+ else
147
+ gem push "$GEM_FILE"
148
+ fi
149
+
150
+ if [[ $? -ne 0 ]]; then
151
+ echo "Error: Failed to push gem to RubyGems" >&2
152
+ exit 1
153
+ fi
154
+
155
+ echo "Gem pushed to RubyGems successfully"
156
+
157
+ # Commit and push to git (only if version was bumped or if there are changes)
158
+ if [[ -n "$BUMP_TYPE" ]] || ! git diff --quiet "$VERSION_FILE"; then
159
+ echo "Committing version change..."
160
+ git add "$VERSION_FILE"
161
+ git commit -m "v${VERSION}"
162
+
163
+ echo "Creating and pushing tag..."
164
+ git tag -a "v${VERSION}" -m "Release version ${VERSION}"
165
+
166
+ echo "Pushing to origin..."
167
+ git push origin main
168
+ git push origin "v${VERSION}"
169
+
170
+ echo "Git operations completed successfully"
171
+ else
172
+ echo "No version changes to commit"
173
+ fi
174
+
175
+ echo ""
176
+ echo "✅ Successfully published ${GEM_NAME} v${VERSION}!"
177
+ echo " - Gem built: ${GEM_FILE}"
178
+ echo " - Pushed to RubyGems"
179
+ if [[ -n "$BUMP_TYPE" ]]; then
180
+ echo " - Version bumped and committed"
181
+ echo " - Tagged as v${VERSION}"
182
+ fi
183
+
metadata ADDED
@@ -0,0 +1,169 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: corp_pdf
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.5
5
+ platform: ruby
6
+ authors:
7
+ - Michael Wynkoop
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2025-11-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: chunky_png
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.4'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: i18n
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.14'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.14'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.14'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.14'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.50'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.50'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop-rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.20'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.20'
97
+ description: A minimal pure Ruby library for parsing and editing PDF AcroForm fields
98
+ using only stdlib
99
+ email:
100
+ - michaelwynkoop@corporatetools.com
101
+ executables: []
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - ".gitignore"
106
+ - ".rubocop.yml"
107
+ - CHANGELOG.md
108
+ - Gemfile
109
+ - Gemfile.lock
110
+ - README.md
111
+ - Rakefile
112
+ - corp_pdf.gemspec
113
+ - docs/README.md
114
+ - docs/clear_fields.md
115
+ - docs/dict_scan_explained.md
116
+ - docs/object_streams.md
117
+ - docs/pdf_structure.md
118
+ - issues/README.md
119
+ - issues/memory-benchmark-results.md
120
+ - issues/memory-improvements.md
121
+ - issues/memory-optimization-summary.md
122
+ - issues/refactoring-opportunities.md
123
+ - lib/corp_pdf.rb
124
+ - lib/corp_pdf/actions/add_field.rb
125
+ - lib/corp_pdf/actions/base.rb
126
+ - lib/corp_pdf/actions/remove_field.rb
127
+ - lib/corp_pdf/actions/update_field.rb
128
+ - lib/corp_pdf/dict_scan.rb
129
+ - lib/corp_pdf/document.rb
130
+ - lib/corp_pdf/field.rb
131
+ - lib/corp_pdf/fields/base.rb
132
+ - lib/corp_pdf/fields/checkbox.rb
133
+ - lib/corp_pdf/fields/radio.rb
134
+ - lib/corp_pdf/fields/signature.rb
135
+ - lib/corp_pdf/fields/text.rb
136
+ - lib/corp_pdf/incremental_writer.rb
137
+ - lib/corp_pdf/object_resolver.rb
138
+ - lib/corp_pdf/objstm.rb
139
+ - lib/corp_pdf/page.rb
140
+ - lib/corp_pdf/pdf_writer.rb
141
+ - lib/corp_pdf/version.rb
142
+ - publish
143
+ homepage: https://github.com/corporatetools/corp_pdf
144
+ licenses:
145
+ - MIT
146
+ metadata:
147
+ homepage_uri: https://github.com/corporatetools/corp_pdf
148
+ source_code_uri: https://github.com/corporatetools/corp_pdf
149
+ changelog_uri: https://github.com/corporatetools/corp_pdf/blob/main/CHANGELOG.md
150
+ post_install_message:
151
+ rdoc_options: []
152
+ require_paths:
153
+ - lib
154
+ required_ruby_version: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: 3.1.0
159
+ required_rubygems_version: !ruby/object:Gem::Requirement
160
+ requirements:
161
+ - - ">="
162
+ - !ruby/object:Gem::Version
163
+ version: '0'
164
+ requirements: []
165
+ rubygems_version: 3.4.12
166
+ signing_key:
167
+ specification_version: 4
168
+ summary: Pure Ruby PDF AcroForm editing library
169
+ test_files: []