metaclean 1.0.2 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # ───────────────────────────────────────────────────────────────────────────
4
3
  # Wrapper around `qpdf` — a PDF structural cleaner.
5
4
  #
6
- # Why qpdf in addition to mat2/ExifTool? PDFs can carry metadata in places
7
- # those two don't always reach: orphaned objects, unused image streams,
8
- # old revisions kept in the file. qpdf rebuilds the PDF from scratch using
9
- # only the objects actually referenced by the document. That's a great
10
- # final pass after the other tools have stripped the obvious metadata.
11
- # ───────────────────────────────────────────────────────────────────────────
5
+ # Why qpdf on top of mat2/ExifTool? PDFs hide metadata in places those two
6
+ # don't always reach: orphaned objects, unused image streams, old revisions.
7
+ # qpdf rebuilds the PDF using only referenced objects a final pass after the
8
+ # other tools have stripped the obvious metadata.
12
9
 
13
10
  require 'open3'
14
11
  require 'fileutils'
12
+ require 'securerandom'
15
13
 
16
14
  module Metaclean
17
15
  module Qpdf
@@ -20,21 +18,21 @@ module Metaclean
20
18
  def available?
21
19
  return @available if defined?(@available)
22
20
 
23
- _out, _err, status = Open3.capture3('qpdf', '--version')
21
+ out, _err, status = Open3.capture3('qpdf', '--version')
24
22
  @available = status.success?
23
+ # `qpdf --version` prints "qpdf version 11.9.0" on its first line. We
24
+ # keep just the bare number (`.split.last`) so callers don't each have
25
+ # to post-process it — matching Exiftool.version / Mat2.version. Captured
26
+ # here so `version` reuses it instead of re-spawning the binary.
27
+ @version = @available ? out.lines.first.to_s.strip.split.last : nil
28
+ @available
25
29
  rescue Errno::ENOENT
30
+ @version = nil
26
31
  @available = false
27
32
  end
28
33
 
29
34
  def version
30
- return nil unless available?
31
-
32
- out, _err, status = Open3.capture3('qpdf', '--version')
33
- # `qpdf --version` prints multiple lines starting with the version line.
34
- # `.lines.first` grabs only that line.
35
- status.success? ? out.lines.first.to_s.strip : nil
36
- rescue Errno::ENOENT
37
- nil
35
+ available? ? @version : nil
38
36
  end
39
37
 
40
38
  # Rebuilds a PDF in place. The qpdf flags here:
@@ -51,25 +49,31 @@ module Metaclean
51
49
  def rebuild!(path)
52
50
  raise Error, 'qpdf not available' unless available?
53
51
 
54
- # Including `Process.pid` in the temp name avoids collisions if two
55
- # metaclean processes happen to run at the same time on shared storage.
56
- tmp = "#{path}.qpdf.tmp.#{Process.pid}"
52
+ src = Metaclean.safe_path(path)
53
+ tmp = tmp_path_for(path)
57
54
 
58
55
  _out, err, status = Open3.capture3(
59
56
  'qpdf', '--linearize', '--object-streams=generate',
60
- '--remove-unreferenced-resources=yes', path.to_s, tmp
57
+ '--remove-unreferenced-resources=yes', src, Metaclean.safe_path(tmp)
61
58
  )
62
59
 
63
60
  # qpdf has a quirk: exit code 3 means "succeeded with warnings" (output
64
61
  # is still produced and valid). We treat that the same as success.
65
62
  success = status.success? || status.exitstatus == 3
66
- unless success
67
- File.delete(tmp) if File.exist?(tmp)
68
- raise Error, "qpdf failed: #{err.strip}"
69
- end
63
+ raise Error, "qpdf failed: #{err.strip}" unless success && File.exist?(tmp)
70
64
 
71
- FileUtils.mv(tmp, path.to_s)
65
+ FileUtils.mv(tmp, src)
72
66
  true
67
+ ensure
68
+ # Interrupt-safety: drop the temp if we died (or failed) before the
69
+ # rename. On success it's already moved, so this is a no-op.
70
+ File.delete(tmp) if tmp && File.exist?(tmp)
71
+ end
72
+
73
+ # Short sibling temp in the same directory: same-fs rename, unpredictable
74
+ # name, and no risk of exceeding filename length by appending to a long PDF.
75
+ def tmp_path_for(path)
76
+ File.join(File.dirname(path), "#{Metaclean::TMP_MARKER}qpdf.#{Process.pid}.#{SecureRandom.hex(8)}.pdf")
73
77
  end
74
78
  end
75
79
  end