qpdf_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c5f6da89731f271f230b6784f82f01bb4f1dfe4060d4caae6d956fa73d8fc13e
4
+ data.tar.gz: c3d2e9dc8baf1975acbca704b8b76dab210c0f959ce78b8655d1c3c295585d10
5
+ SHA512:
6
+ metadata.gz: c850b48c57e89e431b3d7340b1cd4819f28887402cdc03af7e108533249a4e8742351732c22217b0fed011e4799b9b00268219ec2b1ea17d6576ef50125ffbbb
7
+ data.tar.gz: 60f89a46753d7caa4d70a3832c2dc68bd01470528c5c6c6939df10920608f3e451ed7c11d42874fd6944df1a06bdd4847020e1289fb1b07c100fc502cc48e5ce
data/.clang-format ADDED
@@ -0,0 +1,4 @@
1
+ BasedOnStyle: Google
2
+ SortIncludes: false
3
+ IndentWidth: 2
4
+ ColumnLimit: 120
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,88 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ TargetRubyVersion: 3.3
4
+ Exclude:
5
+ - 'ext/qpdf_ruby/extconf.rb'
6
+ - 'tmp/**/*'
7
+ - 'vendor/**/*'
8
+
9
+ Style/StringLiterals:
10
+ EnforcedStyle: double_quotes
11
+
12
+ Style/StringLiteralsInInterpolation:
13
+ EnforcedStyle: double_quotes
14
+
15
+ Style/Documentation:
16
+ Enabled: false
17
+
18
+ RSpec/SubjectStub:
19
+ Enabled: false
20
+
21
+ RSpec/ExampleLength:
22
+ Enabled: false
23
+
24
+ Layout/MultilineMethodCallIndentation:
25
+ Enabled: false
26
+
27
+ Layout/FirstArgumentIndentation:
28
+ Enabled: false
29
+
30
+ Layout/ClosingParenthesisIndentation:
31
+ Enabled: false
32
+
33
+ Layout/FirstHashElementIndentation:
34
+ EnforcedStyle: consistent
35
+
36
+ Layout/FirstArrayElementIndentation:
37
+ Enabled: false
38
+
39
+ Layout/MultilineOperationIndentation:
40
+ Enabled: false
41
+
42
+ Layout/BeginEndAlignment:
43
+ Enabled: false
44
+
45
+ Layout/ArrayAlignment:
46
+ Enabled: false
47
+
48
+ Layout/LineLength:
49
+ Enabled: false
50
+
51
+ Layout/LineEndStringConcatenationIndentation:
52
+ Enabled: false
53
+
54
+ RSpec/MultipleMemoizedHelpers:
55
+ Max: 10
56
+
57
+ Metrics/MethodLength:
58
+ Enabled: false
59
+
60
+ Metrics/ClassLength:
61
+ Enabled: false
62
+
63
+ Metrics/ParameterLists:
64
+ Max: 10
65
+
66
+
67
+ Gemspec/DevelopmentDependencies:
68
+ EnforcedStyle: gemspec
69
+
70
+ RSpec/InstanceVariable:
71
+ Enabled: false
72
+
73
+ RSpec/BeforeAfterAll:
74
+ Enabled: false
75
+
76
+ RSpec/SpecFilePathFormat:
77
+ Enabled: true
78
+ Exclude:
79
+ - 'spec/acceptance/**/*_spec.rb'
80
+
81
+ RSpec/DescribeClass:
82
+ Enabled: true
83
+ Exclude:
84
+ - 'spec/acceptance/**/*_spec.rb'
85
+
86
+ plugins:
87
+ - rubocop-rake
88
+ - rubocop-rspec
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ qpdf_ruby
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-3.3.4
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2025-06-11
4
+
5
+ - Initial release
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Dieter S.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,129 @@
1
+ # QpdfRuby
2
+
3
+ > **Patch & polish PDFs so that PAC 2024 finally turns green.**
4
+
5
+ QpdfRuby is a very small Ruby wrapper around the battle‑tested
6
+ [QPDF \>= 12](https://qpdf.sourceforge.net/) C++ library. Right now the
7
+ library focuses on only **three specialised tasks** that are needed when
8
+ PDFs are printed from Chromium‑based browsers and subsequently audited
9
+ with the PAC 2024 accessibility checker:
10
+
11
+ 1. **Export the structure tree as XML** – handy for debugging.
12
+ 2. **Mark vector path objects as `/Artifact`** so that decorative lines,
13
+ boxes, &c. are ignored by assistive technologies.
14
+ 3. **Add missing `/BBox` entries to every `/Figure` element** (derived
15
+ from the page’s graphic operators) so that screen readers know the
16
+ physical extent of each image.
17
+
18
+ Together these tweaks eliminate the most common complaints PAC 2024 has
19
+ about browser‑generated PDFs.
20
+
21
+ ---
22
+
23
+ ## Features in Detail
24
+
25
+ | Feature | Ruby API |
26
+ | ---------------------------------------------- | ---------------------------------------- |
27
+ | Dump structure tree as XML | `doc.show_structure` |
28
+ | Mark path objects ( `re … S/s/f/F/B/b` ) | `doc.mark_paths_as_artifacts` |
29
+ | Ensure `/Figure` elements have a layout BBox¹ | `doc.ensure_bbox` |
30
+
31
+ _¹Internally the gem parses each page’s content stream, maps image
32
+ `/MCID`s to their transformation matrix, computes the bounding box
33
+ (courtesy of a little linear algebra) and finally writes the result into
34
+ the structure tree._
35
+
36
+ ---
37
+
38
+ ## Installation
39
+
40
+ ### Requirements
41
+
42
+ * **Ruby** \>= 3.1
43
+ * **QPDF** \>= 12.0.0 (headers & libs)
44
+
45
+ ### macOS
46
+ ```bash
47
+ brew install qpdf
48
+ bundle config set --local build.qpdf_ruby "--with-qpdf-dir=$(brew --prefix qpdf)"
49
+ ```
50
+
51
+ ### Debian/Ubuntu
52
+ ```bash
53
+ # on Debian 11/Ubuntu 20.04 you may need newer packages from testing
54
+ sudo apt-get update && sudo apt-get install -y libqpdf-dev qpdf
55
+ ```
56
+ If `apt` cannot provide QPDF ≥ 12 you can compile it yourself or pull the
57
+ package from *testing/unstable* – see the [Dockerfile](./docker/Dockerfile) for a working
58
+ `apt preferences` snippet.
59
+
60
+ ### Add the gem
61
+ ```bash
62
+ bundle add qpdf_ruby
63
+ # …or without bundler:
64
+ # gem install qpdf_ruby -- --with-qpdf-include=/usr/local/include/qpdf --with-qpdf-lib=/usr/local/lib
65
+ ```
66
+
67
+ ---
68
+
69
+ ## Quick Start
70
+ ```ruby
71
+ require "qpdf_ruby"
72
+
73
+ pdf = QpdfRuby::Document.new("input.pdf")
74
+
75
+ # 1. tag decorative paths
76
+ pdf.mark_paths_as_artifacts
77
+
78
+ # 2. add BBox to every <Figure>
79
+ pdf.ensure_bbox
80
+
81
+ # 3. introspect structure tree (optional)
82
+ File.write("structure.xml", pdf.show_structure)
83
+
84
+ # 4. save 🎉
85
+ pdf.write("fixed.pdf")
86
+ ```
87
+
88
+ Run PAC 2024 on `fixed.pdf` – it should report far fewer (or zero!)
89
+ errors compared to the original browser output.
90
+
91
+ ---
92
+
93
+ ## Development
94
+ ```bash
95
+ git clone https://github.com/dieter-medium/qpdf_ruby.git
96
+ cd qpdf_ruby
97
+ bin/setup # install gem + test deps
98
+ autotest # guard & RSpec
99
+ ```
100
+ * Bump **version.rb** → `bundle exec rake release` to push a new gem.
101
+
102
+ ### Testing with local QPDF builds
103
+ If you tinker with QPDF itself, point Bundler to your custom prefix:
104
+ ```bash
105
+ bundle config set --local build.qpdf_ruby "--with-qpdf-include=$HOME/opt/qpdf/include --with-qpdf-lib=$HOME/opt/qpdf/lib"
106
+ ```
107
+
108
+ ---
109
+
110
+ ## Roadmap
111
+
112
+ TBD
113
+
114
+ ---
115
+
116
+ ## Contributing
117
+ Bug reports & pull requests are welcome at
118
+ <https://github.com/dieter-medium/qpdf_ruby>.
119
+
120
+ ### Code Style
121
+ * C++ 17, clang‑format enforced
122
+ * Ruby 3.2, rubocop default rules
123
+
124
+ ---
125
+
126
+ ## License
127
+
128
+ [MIT](https://opensource.org/licenses/MIT) – see `LICENSE.txt` for full
129
+ text.
data/Rakefile ADDED
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ require "rake/extensiontask"
13
+
14
+ desc "Build the gem including native extensions"
15
+ task build: :compile
16
+
17
+ GEMSPEC = Gem::Specification.load("qpdf_ruby.gemspec")
18
+
19
+ Rake::ExtensionTask.new("qpdf_ruby", GEMSPEC) do |ext|
20
+ ext.lib_dir = "lib/qpdf_ruby"
21
+ end
22
+
23
+ Dir.glob("tasks/*.rake").each { |r| load r }
24
+
25
+ task default: %i[clobber compile spec rubocop]
data/docker/Dockerfile ADDED
@@ -0,0 +1,46 @@
1
+ FROM ruby:3.3
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ RUN set -eux; \
6
+ echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list.d/extra.list; \
7
+ echo "deb http://deb.debian.org/debian unstable main" >> /etc/apt/sources.list.d/extra.list; \
8
+ \
9
+ printf "Package: *\nPin: release a=testing\nPin-Priority: 100\n\n" > /etc/apt/preferences.d/99-pin-testing; \
10
+ printf "Package: *\nPin: release a=unstable\nPin-Priority: 100\n" >> /etc/apt/preferences.d/99-pin-testing; \
11
+ \
12
+ apt-get update; \
13
+ apt-get -y --no-install-recommends install qpdf libqpdf-dev -t testing; \
14
+ apt-mark hold qpdf libqpdf-dev; \
15
+ \
16
+ apt-get clean; rm -rf /var/lib/apt/lists/*;
17
+
18
+ # Smoke‑test (shown in build log)
19
+ RUN qpdf --version # should print "qpdf version 12.x"
20
+
21
+
22
+ COPY ./pkg/qpdf_ruby-*.gem ./
23
+ COPY ./spec/fixtures/example_accessibility.pdf /tmp/dummy.pdf
24
+
25
+ RUN gem install ./qpdf_ruby-*.gem -- --with-qpdf-include=/usr/local/include/qpdf --with-qpdf-lib=/usr/local/lib
26
+
27
+ RUN ruby -r qpdf_ruby -e 'puts "QpdfRuby version: #{QpdfRuby::VERSION}";doc = QpdfRuby::Document.new "/tmp/dummy.pdf"; puts doc.show_structure'
28
+
29
+ # Create a non-root user
30
+ RUN set -eux; \
31
+ groupadd -r appuser && useradd -r -g appuser -m -d /home/appuser appuser
32
+
33
+
34
+ RUN set -eux; \
35
+ mkdir -p /workspace
36
+
37
+ # Set working directory
38
+ WORKDIR /workspace
39
+
40
+ RUN chown -R appuser:appuser /workspace
41
+
42
+ # Switch to non-root user
43
+ USER appuser
44
+
45
+ CMD ["/usr/bin/bash"]
46
+
data/exe/qpdf_ruby ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "qpdf_ruby"
@@ -0,0 +1,17 @@
1
+ #include "struct_node.hpp"
2
+
3
+ void ArrayNode::addChild(std::unique_ptr<StructNode> child) { children.push_back(std::move(child)); }
4
+
5
+ std::string ArrayNode::to_string(int level, PDFStructWalker& walker) {
6
+ std::ostringstream oss;
7
+ for (const auto& child : children) {
8
+ oss << child->to_string(level, walker);
9
+ }
10
+ return oss.str();
11
+ }
12
+
13
+ void ArrayNode::ensureLayoutBBox(PDFStructWalker& walker) {
14
+ for (const auto& child : children) {
15
+ child->ensureLayoutBBox(walker);
16
+ }
17
+ }
@@ -0,0 +1,102 @@
1
+ #include "document_handle.hpp"
2
+
3
+ #include <qpdf/QPDFWriter.hh>
4
+ #include <stdexcept>
5
+ #include <system_error>
6
+ #include <cerrno>
7
+
8
+ using namespace qpdf_ruby;
9
+
10
+ std::unique_ptr<DocumentHandle> DocumentHandle::open(const std::string& filename) {
11
+ auto qpdf = std::make_shared<QPDF>();
12
+ try {
13
+ // Use empty password → owner & user pwd same.
14
+ qpdf->processFile(filename.c_str());
15
+ } catch (const std::exception& ex) {
16
+ throw std::runtime_error(std::string("qpdf_ruby: failed to open “") + filename + "”: " + ex.what());
17
+ }
18
+ return std::unique_ptr<DocumentHandle>(new DocumentHandle(qpdf));
19
+ }
20
+
21
+ std::unique_ptr<DocumentHandle> DocumentHandle::open_memory(std::string const& desc, std::vector<unsigned char> buf,
22
+ std::string const& pwd) {
23
+ auto qpdf = std::make_shared<QPDF>();
24
+ try {
25
+ qpdf->processMemoryFile(desc.c_str(), reinterpret_cast<char const*>(buf.data()), buf.size(),
26
+ pwd.empty() ? nullptr : pwd.c_str());
27
+ } catch (std::exception const& ex) {
28
+ throw std::runtime_error("qpdf_ruby: open_memory failed: " + std::string(ex.what()));
29
+ }
30
+
31
+ auto h = std::unique_ptr<DocumentHandle>(new DocumentHandle(qpdf));
32
+ h->m_owned_buf = std::move(buf); // keep bytes alive
33
+ return h;
34
+ }
35
+
36
+ DocumentHandle::DocumentHandle(std::shared_ptr<QPDF> qpdf) : m_qpdf(std::move(qpdf)) {}
37
+
38
+ void DocumentHandle::write(const std::string& out_filename) {
39
+ try {
40
+ // honour original file’s extension-level features (linearized? encrypted? …)
41
+ QPDFWriter w(*m_qpdf, out_filename.c_str());
42
+ w.setStaticID(true); // deterministic IDs – helps tests
43
+ w.write();
44
+ } catch (const std::exception& ex) {
45
+ throw std::runtime_error(std::string("qpdf_ruby: failed to write “") + out_filename + "”: " + ex.what());
46
+ }
47
+ }
48
+
49
+ std::string DocumentHandle::write_to_memory() {
50
+ try {
51
+ QPDFWriter w(*m_qpdf, nullptr);
52
+ w.setStaticID(true);
53
+ w.setOutputMemory();
54
+ w.write();
55
+
56
+ auto b = w.getBuffer();
57
+ return std::string(reinterpret_cast<char const*>(b->getBuffer()), b->getSize());
58
+ } catch (std::exception const& ex) {
59
+ throw std::runtime_error("qpdf_ruby: write_to_memory failed: " + std::string(ex.what()));
60
+ }
61
+ }
62
+
63
+ // ------------------------- C bridge impl --------------------------------
64
+
65
+ extern "C" {
66
+ DocumentHandle* qpdf_ruby_open(const char* filename) {
67
+ try {
68
+ return DocumentHandle::open(filename).release();
69
+ } catch (const std::exception&) {
70
+ errno = EIO;
71
+ return nullptr;
72
+ }
73
+ }
74
+
75
+ DocumentHandle* qpdf_ruby_open_memory(char const* desc, unsigned char const* buf, size_t len, char const* pwd) {
76
+ try {
77
+ std::vector<unsigned char> copy(buf, buf + len); // simple ownership
78
+ return DocumentHandle::open_memory(desc, std::move(copy), pwd ? pwd : "").release();
79
+ } catch (...) {
80
+ errno = EIO;
81
+ return nullptr;
82
+ }
83
+ }
84
+
85
+ int qpdf_ruby_write(DocumentHandle* handle, const char* out_filename) {
86
+ if (!handle) {
87
+ errno = EBADF;
88
+ return -1;
89
+ }
90
+ try {
91
+ handle->write(out_filename);
92
+ return 0;
93
+ } catch (const std::exception&) {
94
+ errno = EIO;
95
+ return -1;
96
+ }
97
+ }
98
+
99
+ void qpdf_ruby_close(DocumentHandle* handle) {
100
+ delete handle; // ok on nullptr
101
+ }
102
+ } // extern "C"
@@ -0,0 +1,60 @@
1
+ #pragma once
2
+
3
+ #include <memory>
4
+ #include <string>
5
+ #include <qpdf/QPDF.hh>
6
+
7
+ namespace qpdf_ruby {
8
+
9
+ /**
10
+ * A thin RAII wrapper around std::shared_ptr<QPDF>.
11
+ *
12
+ * - Only one QPDF parse per PDF file.
13
+ * - Shared among Ruby proxy objects (StructureProxy, PathsProxy, …).
14
+ * - Non-copyable but movable.
15
+ */
16
+ class DocumentHandle final {
17
+ public:
18
+ // ---- factory ----------------------------------------------------------
19
+ static std::unique_ptr<DocumentHandle> open(const std::string& filename);
20
+ static std::unique_ptr<DocumentHandle> open_memory(std::string const& description, std::vector<unsigned char> data,
21
+ std::string const& password = "");
22
+
23
+ // ---- public API -------------------------------------------------------
24
+ /** Write the (possibly-modified) PDF to disk. */
25
+ void write(const std::string& out_filename);
26
+
27
+ std::string write_to_memory();
28
+
29
+ /** Direct access for C++ helpers that need the raw QPDF. */
30
+ QPDF& qpdf() { return *m_qpdf; }
31
+ const QPDF& qpdf() const { return *m_qpdf; }
32
+
33
+ // ---- rule of five -----------------------------------------------------
34
+ ~DocumentHandle() = default;
35
+ DocumentHandle(const DocumentHandle&) = delete;
36
+ DocumentHandle& operator=(const DocumentHandle&) = delete;
37
+ DocumentHandle(DocumentHandle&&) noexcept = default;
38
+ DocumentHandle& operator=(DocumentHandle&&) noexcept = default;
39
+
40
+ private:
41
+ explicit DocumentHandle(std::shared_ptr<QPDF> qpdf);
42
+
43
+ std::shared_ptr<QPDF> m_qpdf;
44
+ std::vector<unsigned char> m_owned_buf;
45
+ };
46
+
47
+ extern "C" {
48
+ /** Returns a freshly allocated handle or nullptr on error (see errno). */
49
+ DocumentHandle* qpdf_ruby_open(const char* filename);
50
+
51
+ DocumentHandle* qpdf_ruby_open_memory(char const* desc, unsigned char const* buf, size_t len, char const* pwd);
52
+
53
+ /** Writes PDF; returns 0 on success, -1 on error (see errno). */
54
+ int qpdf_ruby_write(DocumentHandle* handle, const char* out_filename);
55
+
56
+ /** Deletes the handle (idempotent). */
57
+ void qpdf_ruby_close(DocumentHandle* handle);
58
+ }
59
+
60
+ } // namespace qpdf_ruby
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+
5
+ # Makes all symbols private by default to avoid unintended conflict
6
+ # with other gems. To explicitly export symbols you can use RUBY_FUNC_EXPORTED
7
+ # selectively, or entirely remove this flag.
8
+ append_cflags("-fvisibility=hidden")
9
+
10
+ if with_config("qpdf-dir")
11
+ qpdf_include_dir, qpdf_lib_dir = dir_config("qpdf")
12
+ $INCFLAGS << " -I#{qpdf_include_dir}"
13
+ $LDFLAGS << " -L#{qpdf_lib_dir} -lqpdf"
14
+ else
15
+ $LDFLAGS << " -lqpdf"
16
+ end
17
+
18
+ if RbConfig::CONFIG["host_os"] =~ /darwin/
19
+ $LDFLAGS << " -Wl,-search_paths_first -Wl,-headerpad_max_install_names -Wl,-multiply_defined,suppress"
20
+ $LDFLAGS << " -Wl,-undefined,dynamic_lookup"
21
+ end
22
+
23
+ $CXXFLAGS << " -std=c++17"
24
+
25
+ create_makefile("qpdf_ruby/qpdf_ruby")
@@ -0,0 +1,109 @@
1
+ #include "struct_node.hpp"
2
+
3
+ static void add_bbox(PDFStructWalker& walker, QPDFObjectHandle node) {
4
+ int mcid = -std::numeric_limits<int>::infinity();
5
+
6
+ if (node.getKey("/K").isInteger()) {
7
+ mcid = node.getKey("/K").getIntValue();
8
+ }
9
+
10
+ // Get the bbox from the walker or use defaults
11
+
12
+ QPDFObjectHandle pageObj;
13
+ QPDFObjectHandle currentNode = node;
14
+ bool found = false;
15
+
16
+ // Try to find /Pg by walking up the parent chain
17
+ while (currentNode.isDictionary()) {
18
+ if (currentNode.hasKey("/Pg")) {
19
+ pageObj = currentNode.getKey("/Pg");
20
+ found = true;
21
+ break;
22
+ }
23
+
24
+ if (currentNode.hasKey("/P")) {
25
+ currentNode = currentNode.getKey("/P");
26
+ } else {
27
+ break; // No more parents to check
28
+ }
29
+ }
30
+
31
+ if (!found && node.hasKey("/K") && node.getKey("/K").isArray()) {
32
+ QPDFObjectHandle kids = node.getKey("/K");
33
+ for (int i = 0; i < kids.getArrayNItems(); ++i) {
34
+ QPDFObjectHandle kid = kids.getArrayItem(i);
35
+ if (kid.isDictionary() && kid.hasKey("/Pg")) {
36
+ pageObj = kid.getKey("/Pg");
37
+ found = true;
38
+ break;
39
+ }
40
+ }
41
+ }
42
+
43
+ if (!found || !pageObj.isIndirect()) {
44
+ std::cerr << "No /Pg key found for MCID " << mcid << ", cannot add BBox." << std::endl;
45
+ return;
46
+ }
47
+
48
+ auto media_box = walker.getPageCropBoxFor(pageObj);
49
+
50
+ double llx = media_box[0], lly = media_box[1], urx = media_box[2], ury = media_box[3];
51
+ auto const& bbox_map = walker.getMcidBboxMap();
52
+
53
+ // Use a try/catch to prevent failures when accessing the map
54
+ try {
55
+ if (bbox_map.count(mcid) > 0) {
56
+ auto const& b = bbox_map.at(mcid);
57
+ llx = b[0];
58
+ lly = b[1];
59
+ urx = b[2];
60
+ ury = b[3];
61
+ }
62
+ } catch (const std::exception& e) {
63
+ std::cerr << "Error accessing mcid " << mcid << " in bbox map: " << e.what() << std::endl;
64
+ // Continue with default values
65
+ }
66
+
67
+ QPDFObjectHandle attrs;
68
+ attrs = QPDFObjectHandle::newDictionary();
69
+ attrs.replaceKey("/O", QPDFObjectHandle::newName("/Layout"));
70
+
71
+ node.replaceKey("/A", attrs);
72
+
73
+ if (!attrs.hasKey("/BBox")) {
74
+ QPDFObjectHandle arr = QPDFObjectHandle::newArray();
75
+ arr.appendItem(QPDFObjectHandle::newReal(llx));
76
+ arr.appendItem(QPDFObjectHandle::newReal(lly));
77
+ arr.appendItem(QPDFObjectHandle::newReal(urx));
78
+ arr.appendItem(QPDFObjectHandle::newReal(ury));
79
+
80
+ attrs.replaceKey("/BBox", arr);
81
+ }
82
+ }
83
+
84
+ void FigureNode::ensureLayoutBBox(PDFStructWalker& walker) {
85
+ StructElemNode::ensureLayoutBBox(walker);
86
+
87
+ // Bail if a BBox is already present *anywhere* in /A
88
+ if (node.hasKey("/A")) {
89
+ QPDFObjectHandle A = node.getKey("/A");
90
+ auto has_bbox = [](QPDFObjectHandle const& dict) {
91
+ QPDFObjectHandle non_const_dict = dict;
92
+ return non_const_dict.isDictionary() && non_const_dict.hasKey("/O") &&
93
+ non_const_dict.getKey("/O").getName() == "/Layout" && non_const_dict.hasKey("/BBox");
94
+ };
95
+
96
+ if ((A.isDictionary() && has_bbox(A)) || (A.isArray() && [&]() -> bool {
97
+ for (int i = 0; i < A.getArrayNItems(); ++i) {
98
+ if (has_bbox(A.getArrayItem(i))) {
99
+ return true;
100
+ }
101
+ }
102
+ return false;
103
+ }())) {
104
+ return; // nothing to do
105
+ }
106
+ }
107
+
108
+ add_bbox(walker, node);
109
+ }
@@ -0,0 +1,15 @@
1
+ #include "struct_node.hpp"
2
+
3
+ int McidNode::getMcid() const { return mcid; }
4
+
5
+ void McidNode::setPage(int page) { pageNumber = page; }
6
+
7
+ std::string McidNode::to_string(int level, PDFStructWalker& walker) {
8
+ std::ostringstream oss;
9
+ IndentHelper::indent(oss, level);
10
+ oss << "[MCID: " << mcid;
11
+
12
+ oss << "]" << std::endl;
13
+
14
+ return oss.str();
15
+ }