acro_that 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +1 -1
- data/lib/acro_that/document.rb +30 -5
- data/lib/acro_that/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d1d3505b27f84c80388049dd3d167c73c88859f076cb0f02e42ead53e8a9c63a
|
|
4
|
+
data.tar.gz: 5bfa90b434d7f35722879ad0e3b6414d8160e4267281a433500de1e496cb5a2d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '09ea0329b659add9744960363196db63715e99cb873ef75bc6a0162ad6a33c6295211aadd1e3dcbdb8d0d857571f1f32b7fb0a2e2442599b1fb65628302922ca'
|
|
7
|
+
data.tar.gz: a458bb09017f0b01af6b6f6f4923e8b8ae8a41e4c5c0660d97874769c9eaf8141b2dbb6ed0a3de62a4878cff738354bd5655269d5c216e1dcc78aba72432e4f6
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.8] - 2025-11-04
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- Fixed PDF parsing error when PDFs are wrapped in multipart form data. PDFs uploaded via web forms (with boundary markers like `------WebKitFormBoundary...`) are now automatically extracted before processing, ensuring correct offset calculations.
|
|
12
|
+
- Fixed xref stream parsing to properly validate objects are actually xref streams before attempting to parse them. Added fallback logic to find classic xref tables nearby when xref stream parsing fails.
|
|
13
|
+
- Fixed annotation removal to preserve non-widget annotations (such as highlighting, comments, etc.) when clearing fields. Only widget annotations associated with form fields are now removed.
|
|
14
|
+
- Improved PDF trailer Size calculation to handle object number gaps correctly.
|
|
15
|
+
|
|
8
16
|
## [0.1.5] - 2025-11-01
|
|
9
17
|
|
|
10
18
|
### Fixed
|
data/Gemfile.lock
CHANGED
data/lib/acro_that/document.rb
CHANGED
|
@@ -18,11 +18,14 @@ module AcroThat
|
|
|
18
18
|
|
|
19
19
|
def initialize(path_or_io)
|
|
20
20
|
@path = path_or_io.is_a?(String) ? path_or_io : nil
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
21
|
+
raw_bytes = case path_or_io
|
|
22
|
+
when String then File.binread(path_or_io)
|
|
23
|
+
else path_or_io.binmode
|
|
24
|
+
path_or_io.read
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Extract PDF content if wrapped in multipart form data
|
|
28
|
+
@raw = extract_pdf_from_form_data(raw_bytes).freeze
|
|
26
29
|
@resolver = AcroThat::ObjectResolver.new(@raw)
|
|
27
30
|
@patches = []
|
|
28
31
|
end
|
|
@@ -634,6 +637,28 @@ module AcroThat
|
|
|
634
637
|
|
|
635
638
|
private
|
|
636
639
|
|
|
640
|
+
# Extract PDF content from multipart form data if present
|
|
641
|
+
# Some PDFs are uploaded as multipart form data with boundary markers
|
|
642
|
+
def extract_pdf_from_form_data(bytes)
|
|
643
|
+
# Check if this looks like multipart form data
|
|
644
|
+
if bytes =~ /\A------\w+/
|
|
645
|
+
# Find the PDF header
|
|
646
|
+
pdf_start = bytes.index("%PDF")
|
|
647
|
+
return bytes unless pdf_start
|
|
648
|
+
|
|
649
|
+
# Extract PDF content from start to EOF
|
|
650
|
+
pdf_end = bytes.rindex("%%EOF")
|
|
651
|
+
return bytes unless pdf_end
|
|
652
|
+
|
|
653
|
+
# Extract just the PDF portion
|
|
654
|
+
pdf_content = bytes[pdf_start..(pdf_end + 4)]
|
|
655
|
+
return pdf_content
|
|
656
|
+
end
|
|
657
|
+
|
|
658
|
+
# Not form data, return as-is
|
|
659
|
+
bytes
|
|
660
|
+
end
|
|
661
|
+
|
|
637
662
|
def collect_pages_from_tree(pages_ref, page_objects)
|
|
638
663
|
pages_body = @resolver.object_body(pages_ref)
|
|
639
664
|
return unless pages_body
|
data/lib/acro_that/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: acro_that
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Michael Wynkoop
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-11-
|
|
11
|
+
date: 2025-11-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: chunky_png
|