metaclean 1.0.2 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +134 -47
- data/bin/metaclean +1 -22
- data/lib/metaclean/cli.rb +42 -92
- data/lib/metaclean/display.rb +59 -40
- data/lib/metaclean/exiftool.rb +70 -89
- data/lib/metaclean/ffmpeg.rb +84 -0
- data/lib/metaclean/mat2.rb +43 -40
- data/lib/metaclean/qpdf.rb +29 -25
- data/lib/metaclean/runner.rb +317 -168
- data/lib/metaclean/strategy.rb +118 -39
- data/lib/metaclean/version.rb +1 -3
- data/lib/metaclean.rb +75 -26
- metadata +11 -8
data/lib/metaclean/qpdf.rb
CHANGED
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# ───────────────────────────────────────────────────────────────────────────
|
|
4
3
|
# Wrapper around `qpdf` — a PDF structural cleaner.
|
|
5
4
|
#
|
|
6
|
-
# Why qpdf
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
# final pass after the other tools have stripped the obvious metadata.
|
|
11
|
-
# ───────────────────────────────────────────────────────────────────────────
|
|
5
|
+
# Why qpdf on top of mat2/ExifTool? PDFs hide metadata in places those two
|
|
6
|
+
# don't always reach: orphaned objects, unused image streams, old revisions.
|
|
7
|
+
# qpdf rebuilds the PDF using only referenced objects — a final pass after the
|
|
8
|
+
# other tools have stripped the obvious metadata.
|
|
12
9
|
|
|
13
10
|
require 'open3'
|
|
14
11
|
require 'fileutils'
|
|
12
|
+
require 'securerandom'
|
|
15
13
|
|
|
16
14
|
module Metaclean
|
|
17
15
|
module Qpdf
|
|
@@ -20,21 +18,21 @@ module Metaclean
|
|
|
20
18
|
def available?
|
|
21
19
|
return @available if defined?(@available)
|
|
22
20
|
|
|
23
|
-
|
|
21
|
+
out, _err, status = Open3.capture3('qpdf', '--version')
|
|
24
22
|
@available = status.success?
|
|
23
|
+
# `qpdf --version` prints "qpdf version 11.9.0" on its first line. We
|
|
24
|
+
# keep just the bare number (`.split.last`) so callers don't each have
|
|
25
|
+
# to post-process it — matching Exiftool.version / Mat2.version. Captured
|
|
26
|
+
# here so `version` reuses it instead of re-spawning the binary.
|
|
27
|
+
@version = @available ? out.lines.first.to_s.strip.split.last : nil
|
|
28
|
+
@available
|
|
25
29
|
rescue Errno::ENOENT
|
|
30
|
+
@version = nil
|
|
26
31
|
@available = false
|
|
27
32
|
end
|
|
28
33
|
|
|
29
34
|
def version
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
out, _err, status = Open3.capture3('qpdf', '--version')
|
|
33
|
-
# `qpdf --version` prints multiple lines starting with the version line.
|
|
34
|
-
# `.lines.first` grabs only that line.
|
|
35
|
-
status.success? ? out.lines.first.to_s.strip : nil
|
|
36
|
-
rescue Errno::ENOENT
|
|
37
|
-
nil
|
|
35
|
+
available? ? @version : nil
|
|
38
36
|
end
|
|
39
37
|
|
|
40
38
|
# Rebuilds a PDF in place. The qpdf flags here:
|
|
@@ -51,25 +49,31 @@ module Metaclean
|
|
|
51
49
|
def rebuild!(path)
|
|
52
50
|
raise Error, 'qpdf not available' unless available?
|
|
53
51
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
tmp = "#{path}.qpdf.tmp.#{Process.pid}"
|
|
52
|
+
src = Metaclean.safe_path(path)
|
|
53
|
+
tmp = tmp_path_for(path)
|
|
57
54
|
|
|
58
55
|
_out, err, status = Open3.capture3(
|
|
59
56
|
'qpdf', '--linearize', '--object-streams=generate',
|
|
60
|
-
'--remove-unreferenced-resources=yes',
|
|
57
|
+
'--remove-unreferenced-resources=yes', src, Metaclean.safe_path(tmp)
|
|
61
58
|
)
|
|
62
59
|
|
|
63
60
|
# qpdf has a quirk: exit code 3 means "succeeded with warnings" (output
|
|
64
61
|
# is still produced and valid). We treat that the same as success.
|
|
65
62
|
success = status.success? || status.exitstatus == 3
|
|
66
|
-
unless success
|
|
67
|
-
File.delete(tmp) if File.exist?(tmp)
|
|
68
|
-
raise Error, "qpdf failed: #{err.strip}"
|
|
69
|
-
end
|
|
63
|
+
raise Error, "qpdf failed: #{err.strip}" unless success && File.exist?(tmp)
|
|
70
64
|
|
|
71
|
-
FileUtils.mv(tmp,
|
|
65
|
+
FileUtils.mv(tmp, src)
|
|
72
66
|
true
|
|
67
|
+
ensure
|
|
68
|
+
# Interrupt-safety: drop the temp if we died (or failed) before the
|
|
69
|
+
# rename. On success it's already moved, so this is a no-op.
|
|
70
|
+
File.delete(tmp) if tmp && File.exist?(tmp)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Short sibling temp in the same directory: same-fs rename, unpredictable
|
|
74
|
+
# name, and no risk of exceeding filename length by appending to a long PDF.
|
|
75
|
+
def tmp_path_for(path)
|
|
76
|
+
File.join(File.dirname(path), "#{Metaclean::TMP_MARKER}qpdf.#{Process.pid}.#{SecureRandom.hex(8)}.pdf")
|
|
73
77
|
end
|
|
74
78
|
end
|
|
75
79
|
end
|