qpdf_ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +4 -0
- data/.rspec +3 -0
- data/.rubocop.yml +88 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +129 -0
- data/Rakefile +25 -0
- data/docker/Dockerfile +46 -0
- data/exe/qpdf_ruby +4 -0
- data/ext/qpdf_ruby/array_node.cpp +17 -0
- data/ext/qpdf_ruby/document_handle.cpp +102 -0
- data/ext/qpdf_ruby/document_handle.hpp +60 -0
- data/ext/qpdf_ruby/extconf.rb +25 -0
- data/ext/qpdf_ruby/figure_node.cpp +109 -0
- data/ext/qpdf_ruby/mcid_node.cpp +15 -0
- data/ext/qpdf_ruby/mcr_node.cpp +20 -0
- data/ext/qpdf_ruby/pdf_image_mapper.cpp +294 -0
- data/ext/qpdf_ruby/pdf_image_mapper.hpp +66 -0
- data/ext/qpdf_ruby/pdf_struct_walker.cpp +46 -0
- data/ext/qpdf_ruby/pdf_struct_walker.hpp +34 -0
- data/ext/qpdf_ruby/qpdf_ruby.cpp +204 -0
- data/ext/qpdf_ruby/qpdf_ruby.hpp +10 -0
- data/ext/qpdf_ruby/stream_node.cpp +10 -0
- data/ext/qpdf_ruby/struct_elem_node.cpp +203 -0
- data/ext/qpdf_ruby/struct_node.cpp +51 -0
- data/ext/qpdf_ruby/struct_node.hpp +115 -0
- data/ext/qpdf_ruby/unknown_node.cpp +10 -0
- data/lib/qpdf_ruby/version.rb +5 -0
- data/lib/qpdf_ruby.rb +9 -0
- data/sig/qpdf_ruby.rbs +4 -0
- metadata +191 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c5f6da89731f271f230b6784f82f01bb4f1dfe4060d4caae6d956fa73d8fc13e
|
4
|
+
data.tar.gz: c3d2e9dc8baf1975acbca704b8b76dab210c0f959ce78b8655d1c3c295585d10
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c850b48c57e89e431b3d7340b1cd4819f28887402cdc03af7e108533249a4e8742351732c22217b0fed011e4799b9b00268219ec2b1ea17d6576ef50125ffbbb
|
7
|
+
data.tar.gz: 60f89a46753d7caa4d70a3832c2dc68bd01470528c5c6c6939df10920608f3e451ed7c11d42874fd6944df1a06bdd4847020e1289fb1b07c100fc502cc48e5ce
|
data/.clang-format
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
AllCops:
|
2
|
+
NewCops: enable
|
3
|
+
TargetRubyVersion: 3.3
|
4
|
+
Exclude:
|
5
|
+
- 'ext/qpdf_ruby/extconf.rb'
|
6
|
+
- 'tmp/**/*'
|
7
|
+
- 'vendor/**/*'
|
8
|
+
|
9
|
+
Style/StringLiterals:
|
10
|
+
EnforcedStyle: double_quotes
|
11
|
+
|
12
|
+
Style/StringLiteralsInInterpolation:
|
13
|
+
EnforcedStyle: double_quotes
|
14
|
+
|
15
|
+
Style/Documentation:
|
16
|
+
Enabled: false
|
17
|
+
|
18
|
+
RSpec/SubjectStub:
|
19
|
+
Enabled: false
|
20
|
+
|
21
|
+
RSpec/ExampleLength:
|
22
|
+
Enabled: false
|
23
|
+
|
24
|
+
Layout/MultilineMethodCallIndentation:
|
25
|
+
Enabled: false
|
26
|
+
|
27
|
+
Layout/FirstArgumentIndentation:
|
28
|
+
Enabled: false
|
29
|
+
|
30
|
+
Layout/ClosingParenthesisIndentation:
|
31
|
+
Enabled: false
|
32
|
+
|
33
|
+
Layout/FirstHashElementIndentation:
|
34
|
+
EnforcedStyle: consistent
|
35
|
+
|
36
|
+
Layout/FirstArrayElementIndentation:
|
37
|
+
Enabled: false
|
38
|
+
|
39
|
+
Layout/MultilineOperationIndentation:
|
40
|
+
Enabled: false
|
41
|
+
|
42
|
+
Layout/BeginEndAlignment:
|
43
|
+
Enabled: false
|
44
|
+
|
45
|
+
Layout/ArrayAlignment:
|
46
|
+
Enabled: false
|
47
|
+
|
48
|
+
Layout/LineLength:
|
49
|
+
Enabled: false
|
50
|
+
|
51
|
+
Layout/LineEndStringConcatenationIndentation:
|
52
|
+
Enabled: false
|
53
|
+
|
54
|
+
RSpec/MultipleMemoizedHelpers:
|
55
|
+
Max: 10
|
56
|
+
|
57
|
+
Metrics/MethodLength:
|
58
|
+
Enabled: false
|
59
|
+
|
60
|
+
Metrics/ClassLength:
|
61
|
+
Enabled: false
|
62
|
+
|
63
|
+
Metrics/ParameterLists:
|
64
|
+
Max: 10
|
65
|
+
|
66
|
+
|
67
|
+
Gemspec/DevelopmentDependencies:
|
68
|
+
EnforcedStyle: gemspec
|
69
|
+
|
70
|
+
RSpec/InstanceVariable:
|
71
|
+
Enabled: false
|
72
|
+
|
73
|
+
RSpec/BeforeAfterAll:
|
74
|
+
Enabled: false
|
75
|
+
|
76
|
+
RSpec/SpecFilePathFormat:
|
77
|
+
Enabled: true
|
78
|
+
Exclude:
|
79
|
+
- 'spec/acceptance/**/*_spec.rb'
|
80
|
+
|
81
|
+
RSpec/DescribeClass:
|
82
|
+
Enabled: true
|
83
|
+
Exclude:
|
84
|
+
- 'spec/acceptance/**/*_spec.rb'
|
85
|
+
|
86
|
+
plugins:
|
87
|
+
- rubocop-rake
|
88
|
+
- rubocop-rspec
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
qpdf_ruby
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-3.3.4
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2025 Dieter S.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
# QpdfRuby
|
2
|
+
|
3
|
+
> **Patch & polish PDFs so that PAC 2024 finally turns green.**
|
4
|
+
|
5
|
+
QpdfRuby is a very small Ruby wrapper around the battle‑tested
|
6
|
+
[QPDF \>= 12](https://qpdf.sourceforge.net/) C++ library. Right now the
|
7
|
+
library focuses on only **three specialised tasks** that are needed when
|
8
|
+
PDFs are printed from Chromium‑based browsers and subsequently audited
|
9
|
+
with the PAC 2024 accessibility checker:
|
10
|
+
|
11
|
+
1. **Export the structure tree as XML** – handy for debugging.
|
12
|
+
2. **Mark vector path objects as `/Artifact`** so that decorative lines,
|
13
|
+
boxes, &c. are ignored by assistive technologies.
|
14
|
+
3. **Add missing `/BBox` entries to every `/Figure` element** (derived
|
15
|
+
from the page’s graphic operators) so that screen readers know the
|
16
|
+
physical extent of each image.
|
17
|
+
|
18
|
+
Together these tweaks eliminate the most common complaints PAC 2024 has
|
19
|
+
about browser‑generated PDFs.
|
20
|
+
|
21
|
+
---
|
22
|
+
|
23
|
+
## Features in Detail
|
24
|
+
|
25
|
+
| Feature | Ruby API |
|
26
|
+
| ---------------------------------------------- | ---------------------------------------- |
|
27
|
+
| Dump structure tree as XML | `doc.show_structure` |
|
28
|
+
| Mark path objects ( `re … S/s/f/F/B/b` ) | `doc.mark_paths_as_artifacts` |
|
29
|
+
| Ensure `/Figure` elements have a layout BBox¹ | `doc.ensure_bbox` |
|
30
|
+
|
31
|
+
_¹Internally the gem parses each page’s content stream, maps image
|
32
|
+
`/MCID`s to their transformation matrix, computes the bounding box
|
33
|
+
(courtesy of a little linear algebra) and finally writes the result into
|
34
|
+
the structure tree._
|
35
|
+
|
36
|
+
---
|
37
|
+
|
38
|
+
## Installation
|
39
|
+
|
40
|
+
### Requirements
|
41
|
+
|
42
|
+
* **Ruby** \>= 3.1
|
43
|
+
* **QPDF** \>= 12.0.0 (headers & libs)
|
44
|
+
|
45
|
+
### macOS
|
46
|
+
```bash
|
47
|
+
brew install qpdf
|
48
|
+
bundle config set --local build.qpdf_ruby "--with-qpdf-dir=$(brew --prefix qpdf)"
|
49
|
+
```
|
50
|
+
|
51
|
+
### Debian/Ubuntu
|
52
|
+
```bash
|
53
|
+
# on Debian 11/Ubuntu 20.04 you may need newer packages from testing
|
54
|
+
sudo apt-get update && sudo apt-get install -y libqpdf-dev qpdf
|
55
|
+
```
|
56
|
+
If `apt` cannot provide QPDF ≥ 12 you can compile it yourself or pull the
|
57
|
+
package from *testing/unstable* – see the [Dockerfile](./docker/Dockerfile) for a working
|
58
|
+
`apt preferences` snippet.
|
59
|
+
|
60
|
+
### Add the gem
|
61
|
+
```bash
|
62
|
+
bundle add qpdf_ruby
|
63
|
+
# …or without bundler:
|
64
|
+
# gem install qpdf_ruby -- --with-qpdf-include=/usr/local/include/qpdf --with-qpdf-lib=/usr/local/lib
|
65
|
+
```
|
66
|
+
|
67
|
+
---
|
68
|
+
|
69
|
+
## Quick Start
|
70
|
+
```ruby
|
71
|
+
require "qpdf_ruby"
|
72
|
+
|
73
|
+
pdf = QpdfRuby::Document.new("input.pdf")
|
74
|
+
|
75
|
+
# 1. tag decorative paths
|
76
|
+
pdf.mark_paths_as_artifacts
|
77
|
+
|
78
|
+
# 2. add BBox to every <Figure>
|
79
|
+
pdf.ensure_bbox
|
80
|
+
|
81
|
+
# 3. introspect structure tree (optional)
|
82
|
+
File.write("structure.xml", pdf.show_structure)
|
83
|
+
|
84
|
+
# 4. save 🎉
|
85
|
+
pdf.write("fixed.pdf")
|
86
|
+
```
|
87
|
+
|
88
|
+
Run PAC 2024 on `fixed.pdf` – it should report far fewer (or zero!)
|
89
|
+
errors compared to the original browser output.
|
90
|
+
|
91
|
+
---
|
92
|
+
|
93
|
+
## Development
|
94
|
+
```bash
|
95
|
+
git clone https://github.com/dieter-medium/qpdf_ruby.git
|
96
|
+
cd qpdf_ruby
|
97
|
+
bin/setup # install gem + test deps
|
98
|
+
autotest # guard & RSpec
|
99
|
+
```
|
100
|
+
* Bump **version.rb** → `bundle exec rake release` to push a new gem.
|
101
|
+
|
102
|
+
### Testing with local QPDF builds
|
103
|
+
If you tinker with QPDF itself, point Bundler to your custom prefix:
|
104
|
+
```bash
|
105
|
+
bundle config set --local build.qpdf_ruby "--with-qpdf-include=$HOME/opt/qpdf/include --with-qpdf-lib=$HOME/opt/qpdf/lib"
|
106
|
+
```
|
107
|
+
|
108
|
+
---
|
109
|
+
|
110
|
+
## Roadmap
|
111
|
+
|
112
|
+
TBD
|
113
|
+
|
114
|
+
---
|
115
|
+
|
116
|
+
## Contributing
|
117
|
+
Bug reports & pull requests are welcome at
|
118
|
+
<https://github.com/dieter-medium/qpdf_ruby>.
|
119
|
+
|
120
|
+
### Code Style
|
121
|
+
* C++ 17, clang‑format enforced
|
122
|
+
* Ruby 3.2, rubocop default rules
|
123
|
+
|
124
|
+
---
|
125
|
+
|
126
|
+
## License
|
127
|
+
|
128
|
+
[MIT](https://opensource.org/licenses/MIT) – see `LICENSE.txt` for full
|
129
|
+
text.
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rspec/core/rake_task"
|
5
|
+
|
6
|
+
RSpec::Core::RakeTask.new(:spec)
|
7
|
+
|
8
|
+
require "rubocop/rake_task"
|
9
|
+
|
10
|
+
RuboCop::RakeTask.new
|
11
|
+
|
12
|
+
require "rake/extensiontask"
|
13
|
+
|
14
|
+
desc "Build the gem including native extensions"
|
15
|
+
task build: :compile
|
16
|
+
|
17
|
+
GEMSPEC = Gem::Specification.load("qpdf_ruby.gemspec")
|
18
|
+
|
19
|
+
Rake::ExtensionTask.new("qpdf_ruby", GEMSPEC) do |ext|
|
20
|
+
ext.lib_dir = "lib/qpdf_ruby"
|
21
|
+
end
|
22
|
+
|
23
|
+
Dir.glob("tasks/*.rake").each { |r| load r }
|
24
|
+
|
25
|
+
task default: %i[clobber compile spec rubocop]
|
data/docker/Dockerfile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
FROM ruby:3.3
|
2
|
+
|
3
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
4
|
+
|
5
|
+
RUN set -eux; \
|
6
|
+
echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list.d/extra.list; \
|
7
|
+
echo "deb http://deb.debian.org/debian unstable main" >> /etc/apt/sources.list.d/extra.list; \
|
8
|
+
\
|
9
|
+
printf "Package: *\nPin: release a=testing\nPin-Priority: 100\n\n" > /etc/apt/preferences.d/99-pin-testing; \
|
10
|
+
printf "Package: *\nPin: release a=unstable\nPin-Priority: 100\n" >> /etc/apt/preferences.d/99-pin-testing; \
|
11
|
+
\
|
12
|
+
apt-get update; \
|
13
|
+
apt-get -y --no-install-recommends install qpdf libqpdf-dev -t testing; \
|
14
|
+
apt-mark hold qpdf libqpdf-dev; \
|
15
|
+
\
|
16
|
+
apt-get clean; rm -rf /var/lib/apt/lists/*;
|
17
|
+
|
18
|
+
# Smoke‑test (shown in build log)
|
19
|
+
RUN qpdf --version # should print "qpdf version 12.x"
|
20
|
+
|
21
|
+
|
22
|
+
COPY ./pkg/qpdf_ruby-*.gem ./
|
23
|
+
COPY ./spec/fixtures/example_accessibility.pdf /tmp/dummy.pdf
|
24
|
+
|
25
|
+
RUN gem install ./qpdf_ruby-*.gem -- --with-qpdf-include=/usr/local/include/qpdf --with-qpdf-lib=/usr/local/lib
|
26
|
+
|
27
|
+
RUN ruby -r qpdf_ruby -e 'puts "QpdfRuby version: #{QpdfRuby::VERSION}";doc = QpdfRuby::Document.new "/tmp/dummy.pdf"; puts doc.show_structure'
|
28
|
+
|
29
|
+
# Create a non-root user
|
30
|
+
RUN set -eux; \
|
31
|
+
groupadd -r appuser && useradd -r -g appuser -m -d /home/appuser appuser
|
32
|
+
|
33
|
+
|
34
|
+
RUN set -eux; \
|
35
|
+
mkdir -p /workspace
|
36
|
+
|
37
|
+
# Set working directory
|
38
|
+
WORKDIR /workspace
|
39
|
+
|
40
|
+
RUN chown -R appuser:appuser /workspace
|
41
|
+
|
42
|
+
# Switch to non-root user
|
43
|
+
USER appuser
|
44
|
+
|
45
|
+
CMD ["/usr/bin/bash"]
|
46
|
+
|
data/exe/qpdf_ruby
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#include "struct_node.hpp"
|
2
|
+
|
3
|
+
void ArrayNode::addChild(std::unique_ptr<StructNode> child) { children.push_back(std::move(child)); }
|
4
|
+
|
5
|
+
std::string ArrayNode::to_string(int level, PDFStructWalker& walker) {
|
6
|
+
std::ostringstream oss;
|
7
|
+
for (const auto& child : children) {
|
8
|
+
oss << child->to_string(level, walker);
|
9
|
+
}
|
10
|
+
return oss.str();
|
11
|
+
}
|
12
|
+
|
13
|
+
void ArrayNode::ensureLayoutBBox(PDFStructWalker& walker) {
|
14
|
+
for (const auto& child : children) {
|
15
|
+
child->ensureLayoutBBox(walker);
|
16
|
+
}
|
17
|
+
}
|
@@ -0,0 +1,102 @@
|
|
1
|
+
#include "document_handle.hpp"
|
2
|
+
|
3
|
+
#include <qpdf/QPDFWriter.hh>
|
4
|
+
#include <stdexcept>
|
5
|
+
#include <system_error>
|
6
|
+
#include <cerrno>
|
7
|
+
|
8
|
+
using namespace qpdf_ruby;
|
9
|
+
|
10
|
+
std::unique_ptr<DocumentHandle> DocumentHandle::open(const std::string& filename) {
|
11
|
+
auto qpdf = std::make_shared<QPDF>();
|
12
|
+
try {
|
13
|
+
// Use empty password → owner & user pwd same.
|
14
|
+
qpdf->processFile(filename.c_str());
|
15
|
+
} catch (const std::exception& ex) {
|
16
|
+
throw std::runtime_error(std::string("qpdf_ruby: failed to open “") + filename + "”: " + ex.what());
|
17
|
+
}
|
18
|
+
return std::unique_ptr<DocumentHandle>(new DocumentHandle(qpdf));
|
19
|
+
}
|
20
|
+
|
21
|
+
std::unique_ptr<DocumentHandle> DocumentHandle::open_memory(std::string const& desc, std::vector<unsigned char> buf,
|
22
|
+
std::string const& pwd) {
|
23
|
+
auto qpdf = std::make_shared<QPDF>();
|
24
|
+
try {
|
25
|
+
qpdf->processMemoryFile(desc.c_str(), reinterpret_cast<char const*>(buf.data()), buf.size(),
|
26
|
+
pwd.empty() ? nullptr : pwd.c_str());
|
27
|
+
} catch (std::exception const& ex) {
|
28
|
+
throw std::runtime_error("qpdf_ruby: open_memory failed: " + std::string(ex.what()));
|
29
|
+
}
|
30
|
+
|
31
|
+
auto h = std::unique_ptr<DocumentHandle>(new DocumentHandle(qpdf));
|
32
|
+
h->m_owned_buf = std::move(buf); // keep bytes alive
|
33
|
+
return h;
|
34
|
+
}
|
35
|
+
|
36
|
+
DocumentHandle::DocumentHandle(std::shared_ptr<QPDF> qpdf) : m_qpdf(std::move(qpdf)) {}
|
37
|
+
|
38
|
+
void DocumentHandle::write(const std::string& out_filename) {
|
39
|
+
try {
|
40
|
+
// honour original file’s extension-level features (linearized? encrypted? …)
|
41
|
+
QPDFWriter w(*m_qpdf, out_filename.c_str());
|
42
|
+
w.setStaticID(true); // deterministic IDs – helps tests
|
43
|
+
w.write();
|
44
|
+
} catch (const std::exception& ex) {
|
45
|
+
throw std::runtime_error(std::string("qpdf_ruby: failed to write “") + out_filename + "”: " + ex.what());
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
std::string DocumentHandle::write_to_memory() {
|
50
|
+
try {
|
51
|
+
QPDFWriter w(*m_qpdf, nullptr);
|
52
|
+
w.setStaticID(true);
|
53
|
+
w.setOutputMemory();
|
54
|
+
w.write();
|
55
|
+
|
56
|
+
auto b = w.getBuffer();
|
57
|
+
return std::string(reinterpret_cast<char const*>(b->getBuffer()), b->getSize());
|
58
|
+
} catch (std::exception const& ex) {
|
59
|
+
throw std::runtime_error("qpdf_ruby: write_to_memory failed: " + std::string(ex.what()));
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
// ------------------------- C bridge impl --------------------------------
|
64
|
+
|
65
|
+
extern "C" {
|
66
|
+
DocumentHandle* qpdf_ruby_open(const char* filename) {
|
67
|
+
try {
|
68
|
+
return DocumentHandle::open(filename).release();
|
69
|
+
} catch (const std::exception&) {
|
70
|
+
errno = EIO;
|
71
|
+
return nullptr;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
DocumentHandle* qpdf_ruby_open_memory(char const* desc, unsigned char const* buf, size_t len, char const* pwd) {
|
76
|
+
try {
|
77
|
+
std::vector<unsigned char> copy(buf, buf + len); // simple ownership
|
78
|
+
return DocumentHandle::open_memory(desc, std::move(copy), pwd ? pwd : "").release();
|
79
|
+
} catch (...) {
|
80
|
+
errno = EIO;
|
81
|
+
return nullptr;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
int qpdf_ruby_write(DocumentHandle* handle, const char* out_filename) {
|
86
|
+
if (!handle) {
|
87
|
+
errno = EBADF;
|
88
|
+
return -1;
|
89
|
+
}
|
90
|
+
try {
|
91
|
+
handle->write(out_filename);
|
92
|
+
return 0;
|
93
|
+
} catch (const std::exception&) {
|
94
|
+
errno = EIO;
|
95
|
+
return -1;
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
void qpdf_ruby_close(DocumentHandle* handle) {
|
100
|
+
delete handle; // ok on nullptr
|
101
|
+
}
|
102
|
+
} // extern "C"
|
@@ -0,0 +1,60 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <memory>
|
4
|
+
#include <string>
|
5
|
+
#include <qpdf/QPDF.hh>
|
6
|
+
|
7
|
+
namespace qpdf_ruby {
|
8
|
+
|
9
|
+
/**
|
10
|
+
* A thin RAII wrapper around std::shared_ptr<QPDF>.
|
11
|
+
*
|
12
|
+
* - Only one QPDF parse per PDF file.
|
13
|
+
* - Shared among Ruby proxy objects (StructureProxy, PathsProxy, …).
|
14
|
+
* - Non-copyable but movable.
|
15
|
+
*/
|
16
|
+
class DocumentHandle final {
|
17
|
+
public:
|
18
|
+
// ---- factory ----------------------------------------------------------
|
19
|
+
static std::unique_ptr<DocumentHandle> open(const std::string& filename);
|
20
|
+
static std::unique_ptr<DocumentHandle> open_memory(std::string const& description, std::vector<unsigned char> data,
|
21
|
+
std::string const& password = "");
|
22
|
+
|
23
|
+
// ---- public API -------------------------------------------------------
|
24
|
+
/** Write the (possibly-modified) PDF to disk. */
|
25
|
+
void write(const std::string& out_filename);
|
26
|
+
|
27
|
+
std::string write_to_memory();
|
28
|
+
|
29
|
+
/** Direct access for C++ helpers that need the raw QPDF. */
|
30
|
+
QPDF& qpdf() { return *m_qpdf; }
|
31
|
+
const QPDF& qpdf() const { return *m_qpdf; }
|
32
|
+
|
33
|
+
// ---- rule of five -----------------------------------------------------
|
34
|
+
~DocumentHandle() = default;
|
35
|
+
DocumentHandle(const DocumentHandle&) = delete;
|
36
|
+
DocumentHandle& operator=(const DocumentHandle&) = delete;
|
37
|
+
DocumentHandle(DocumentHandle&&) noexcept = default;
|
38
|
+
DocumentHandle& operator=(DocumentHandle&&) noexcept = default;
|
39
|
+
|
40
|
+
private:
|
41
|
+
explicit DocumentHandle(std::shared_ptr<QPDF> qpdf);
|
42
|
+
|
43
|
+
std::shared_ptr<QPDF> m_qpdf;
|
44
|
+
std::vector<unsigned char> m_owned_buf;
|
45
|
+
};
|
46
|
+
|
47
|
+
extern "C" {
|
48
|
+
/** Returns a freshly allocated handle or nullptr on error (see errno). */
|
49
|
+
DocumentHandle* qpdf_ruby_open(const char* filename);
|
50
|
+
|
51
|
+
DocumentHandle* qpdf_ruby_open_memory(char const* desc, unsigned char const* buf, size_t len, char const* pwd);
|
52
|
+
|
53
|
+
/** Writes PDF; returns 0 on success, -1 on error (see errno). */
|
54
|
+
int qpdf_ruby_write(DocumentHandle* handle, const char* out_filename);
|
55
|
+
|
56
|
+
/** Deletes the handle (idempotent). */
|
57
|
+
void qpdf_ruby_close(DocumentHandle* handle);
|
58
|
+
}
|
59
|
+
|
60
|
+
} // namespace qpdf_ruby
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "mkmf"
|
4
|
+
|
5
|
+
# Makes all symbols private by default to avoid unintended conflict
|
6
|
+
# with other gems. To explicitly export symbols you can use RUBY_FUNC_EXPORTED
|
7
|
+
# selectively, or entirely remove this flag.
|
8
|
+
append_cflags("-fvisibility=hidden")
|
9
|
+
|
10
|
+
if with_config("qpdf-dir")
|
11
|
+
qpdf_include_dir, qpdf_lib_dir = dir_config("qpdf")
|
12
|
+
$INCFLAGS << " -I#{qpdf_include_dir}"
|
13
|
+
$LDFLAGS << " -L#{qpdf_lib_dir} -lqpdf"
|
14
|
+
else
|
15
|
+
$LDFLAGS << " -lqpdf"
|
16
|
+
end
|
17
|
+
|
18
|
+
if RbConfig::CONFIG["host_os"] =~ /darwin/
|
19
|
+
$LDFLAGS << " -Wl,-search_paths_first -Wl,-headerpad_max_install_names -Wl,-multiply_defined,suppress"
|
20
|
+
$LDFLAGS << " -Wl,-undefined,dynamic_lookup"
|
21
|
+
end
|
22
|
+
|
23
|
+
$CXXFLAGS << " -std=c++17"
|
24
|
+
|
25
|
+
create_makefile("qpdf_ruby/qpdf_ruby")
|
@@ -0,0 +1,109 @@
|
|
1
|
+
#include "struct_node.hpp"
|
2
|
+
|
3
|
+
static void add_bbox(PDFStructWalker& walker, QPDFObjectHandle node) {
|
4
|
+
int mcid = -std::numeric_limits<int>::infinity();
|
5
|
+
|
6
|
+
if (node.getKey("/K").isInteger()) {
|
7
|
+
mcid = node.getKey("/K").getIntValue();
|
8
|
+
}
|
9
|
+
|
10
|
+
// Get the bbox from the walker or use defaults
|
11
|
+
|
12
|
+
QPDFObjectHandle pageObj;
|
13
|
+
QPDFObjectHandle currentNode = node;
|
14
|
+
bool found = false;
|
15
|
+
|
16
|
+
// Try to find /Pg by walking up the parent chain
|
17
|
+
while (currentNode.isDictionary()) {
|
18
|
+
if (currentNode.hasKey("/Pg")) {
|
19
|
+
pageObj = currentNode.getKey("/Pg");
|
20
|
+
found = true;
|
21
|
+
break;
|
22
|
+
}
|
23
|
+
|
24
|
+
if (currentNode.hasKey("/P")) {
|
25
|
+
currentNode = currentNode.getKey("/P");
|
26
|
+
} else {
|
27
|
+
break; // No more parents to check
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
if (!found && node.hasKey("/K") && node.getKey("/K").isArray()) {
|
32
|
+
QPDFObjectHandle kids = node.getKey("/K");
|
33
|
+
for (int i = 0; i < kids.getArrayNItems(); ++i) {
|
34
|
+
QPDFObjectHandle kid = kids.getArrayItem(i);
|
35
|
+
if (kid.isDictionary() && kid.hasKey("/Pg")) {
|
36
|
+
pageObj = kid.getKey("/Pg");
|
37
|
+
found = true;
|
38
|
+
break;
|
39
|
+
}
|
40
|
+
}
|
41
|
+
}
|
42
|
+
|
43
|
+
if (!found || !pageObj.isIndirect()) {
|
44
|
+
std::cerr << "No /Pg key found for MCID " << mcid << ", cannot add BBox." << std::endl;
|
45
|
+
return;
|
46
|
+
}
|
47
|
+
|
48
|
+
auto media_box = walker.getPageCropBoxFor(pageObj);
|
49
|
+
|
50
|
+
double llx = media_box[0], lly = media_box[1], urx = media_box[2], ury = media_box[3];
|
51
|
+
auto const& bbox_map = walker.getMcidBboxMap();
|
52
|
+
|
53
|
+
// Use a try/catch to prevent failures when accessing the map
|
54
|
+
try {
|
55
|
+
if (bbox_map.count(mcid) > 0) {
|
56
|
+
auto const& b = bbox_map.at(mcid);
|
57
|
+
llx = b[0];
|
58
|
+
lly = b[1];
|
59
|
+
urx = b[2];
|
60
|
+
ury = b[3];
|
61
|
+
}
|
62
|
+
} catch (const std::exception& e) {
|
63
|
+
std::cerr << "Error accessing mcid " << mcid << " in bbox map: " << e.what() << std::endl;
|
64
|
+
// Continue with default values
|
65
|
+
}
|
66
|
+
|
67
|
+
QPDFObjectHandle attrs;
|
68
|
+
attrs = QPDFObjectHandle::newDictionary();
|
69
|
+
attrs.replaceKey("/O", QPDFObjectHandle::newName("/Layout"));
|
70
|
+
|
71
|
+
node.replaceKey("/A", attrs);
|
72
|
+
|
73
|
+
if (!attrs.hasKey("/BBox")) {
|
74
|
+
QPDFObjectHandle arr = QPDFObjectHandle::newArray();
|
75
|
+
arr.appendItem(QPDFObjectHandle::newReal(llx));
|
76
|
+
arr.appendItem(QPDFObjectHandle::newReal(lly));
|
77
|
+
arr.appendItem(QPDFObjectHandle::newReal(urx));
|
78
|
+
arr.appendItem(QPDFObjectHandle::newReal(ury));
|
79
|
+
|
80
|
+
attrs.replaceKey("/BBox", arr);
|
81
|
+
}
|
82
|
+
}
|
83
|
+
|
84
|
+
void FigureNode::ensureLayoutBBox(PDFStructWalker& walker) {
|
85
|
+
StructElemNode::ensureLayoutBBox(walker);
|
86
|
+
|
87
|
+
// Bail if a BBox is already present *anywhere* in /A
|
88
|
+
if (node.hasKey("/A")) {
|
89
|
+
QPDFObjectHandle A = node.getKey("/A");
|
90
|
+
auto has_bbox = [](QPDFObjectHandle const& dict) {
|
91
|
+
QPDFObjectHandle non_const_dict = dict;
|
92
|
+
return non_const_dict.isDictionary() && non_const_dict.hasKey("/O") &&
|
93
|
+
non_const_dict.getKey("/O").getName() == "/Layout" && non_const_dict.hasKey("/BBox");
|
94
|
+
};
|
95
|
+
|
96
|
+
if ((A.isDictionary() && has_bbox(A)) || (A.isArray() && [&]() -> bool {
|
97
|
+
for (int i = 0; i < A.getArrayNItems(); ++i) {
|
98
|
+
if (has_bbox(A.getArrayItem(i))) {
|
99
|
+
return true;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
return false;
|
103
|
+
}())) {
|
104
|
+
return; // nothing to do
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
add_bbox(walker, node);
|
109
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#include "struct_node.hpp"
|
2
|
+
|
3
|
+
int McidNode::getMcid() const { return mcid; }
|
4
|
+
|
5
|
+
void McidNode::setPage(int page) { pageNumber = page; }
|
6
|
+
|
7
|
+
std::string McidNode::to_string(int level, PDFStructWalker& walker) {
|
8
|
+
std::ostringstream oss;
|
9
|
+
IndentHelper::indent(oss, level);
|
10
|
+
oss << "[MCID: " << mcid;
|
11
|
+
|
12
|
+
oss << "]" << std::endl;
|
13
|
+
|
14
|
+
return oss.str();
|
15
|
+
}
|