metaclean 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,451 @@
1
+ # frozen_string_literal: true
2
+
3
+ # ───────────────────────────────────────────────────────────────────────────
4
+ # The orchestrator. Given a list of paths and parsed CLI options, this class:
5
+ #
6
+ # 1. Expands paths into a flat list of files (handling directories,
7
+ # recursion, symlinks, type filters).
8
+ # 2. Asks the user for confirmation (unless --force).
9
+ # 3. For each file, runs the strategy pipeline (mat2 / exiftool / qpdf)
10
+ # using the "atomic write" pattern so a crash never leaves a
11
+ # half-cleaned file.
12
+ # 4. Prints a before/after diff and a final summary.
13
+ # ───────────────────────────────────────────────────────────────────────────
14
+
15
+ require 'fileutils'
16
+ require 'json'
17
+ require 'set'
18
+ require 'tmpdir'
19
+
20
+ module Metaclean
21
+ class Runner
22
+ # Constructor — just stashes the options Hash. The CLI builds it.
23
+ def initialize(options)
24
+ @options = options
25
+ end
26
+
27
+ # ─────────────────────────────────────────────────────────────────
28
+ # Public entry points: one for `--inspect`, one for the cleaning flow.
29
+ # ─────────────────────────────────────────────────────────────────
30
+
31
+ def inspect_paths(paths)
32
+ files = expand_files(paths)
33
+ return Display.warning('No files to inspect.') if files.empty?
34
+
35
+ # `--json`: machine output, no colors, suitable for piping.
36
+ if @options[:format] == :json
37
+ out = files.map { |f| { file: f, metadata: Exiftool.read(f) } }
38
+ puts JSON.pretty_generate(out)
39
+ return
40
+ end
41
+
42
+ # Human output: pretty header + grouped table per file.
43
+ files.each do |file|
44
+ Display.header "📄 #{file}"
45
+ meta = Exiftool.read(file)
46
+ Display.section "Metadata (#{Display.count_embedded(meta)} embedded tags)"
47
+ Display.metadata_table(meta)
48
+ end
49
+ end
50
+
51
+ def clean_paths(paths)
52
+ files = expand_files(paths)
53
+ return Display.warning('No files to process.') if files.empty?
54
+
55
+ announce_tools
56
+
57
+ # Confirmation prompt — skipped for --force and --dry-run (since
58
+ # dry-run never modifies anything anyway).
59
+ unless @options[:force] || @options[:dry_run]
60
+ action = @options[:in_place] ? 'OVERWRITE' : 'create cleaned copies of'
61
+ puts Display.c("About to #{action} #{files.size} file(s).", :yellow)
62
+ if @options[:in_place] && !@options[:no_backup]
63
+ puts Display.c('Backups will be saved alongside as <file>.bak.', :gray)
64
+ end
65
+ print Display.c('Proceed? [y/N] ', :bold)
66
+ # `&.` is the safe-navigation operator: if `gets` returns nil
67
+ # (e.g. user hit Ctrl-D), the chain short-circuits to nil.
68
+ ans = $stdin.gets&.strip&.downcase
69
+ return Display.warning('Aborted.') unless %w[y yes].include?(ans)
70
+ end
71
+
72
+ summary = { cleaned: 0, failed: 0, removed_total: 0, residual_files: 0 }
73
+
74
+ # `each_with_index` gives us the file AND its position. We pass both
75
+ # to `clean_one` so it can render "[3/47]" in batch mode.
76
+ files.each_with_index do |file, idx|
77
+ result = clean_one(file, index: idx + 1, total: files.size)
78
+ summary[result[:status]] += 1
79
+ summary[:removed_total] += result[:removed].to_i
80
+ summary[:residual_files] += 1 if result[:residual].to_i.positive?
81
+ rescue Error => e
82
+ # Block-level rescue (Ruby 2.5+). Catches errors from `clean_one`
83
+ # without aborting the whole batch — one bad file shouldn't stop
84
+ # the next 99 from being cleaned.
85
+ warn Display.error("#{file}: #{e.message}")
86
+ summary[:failed] += 1
87
+ end
88
+
89
+ print_summary(summary)
90
+
91
+ # Non-zero exit code so CI pipelines can detect failures.
92
+ exit 1 if @options[:strict_verify] && summary[:residual_files].positive?
93
+ exit 1 if summary[:failed].positive?
94
+ end
95
+
96
+ private
97
+
98
+ # ─────────────────────────────────────────────────────────────────
99
+ # Output helpers
100
+ # ─────────────────────────────────────────────────────────────────
101
+
102
+ def announce_tools
103
+ have = []
104
+ have << "exiftool #{Exiftool.version}" if Exiftool.available?
105
+ have << "mat2 #{Mat2.version}" if Mat2.available?
106
+ have << "qpdf #{Qpdf.version&.split&.last}" if Qpdf.available?
107
+ Display.info "Tools detected: #{have.join(', ')}"
108
+ Display.info '(dry-run — no files will be modified)' if @options[:dry_run]
109
+ end
110
+
111
+ # ─────────────────────────────────────────────────────────────────
112
+ # Cleaning a single file — the heart of the program.
113
+ # ─────────────────────────────────────────────────────────────────
114
+
115
+ def clean_one(file, index:, total:)
116
+ prefix = total > 1 ? "[#{index}/#{total}] " : ''
117
+ Display.header "#{prefix}📄 #{file}"
118
+
119
+ # Read the "before" metadata FIRST — once we start cleaning, this is
120
+ # gone forever and we'd have nothing to diff against.
121
+ before = Exiftool.read(file)
122
+ Display.section "Before (#{Display.count_embedded(before)} embedded tags)"
123
+ Display.metadata_table(before, only_embedded: true)
124
+
125
+ # Ask the strategy module which tools to run. If everything's
126
+ # disabled (user passed all --no-* flags), bail out gracefully.
127
+ tools = Strategy.tools_for(file, prefer: tool_prefs)
128
+ if tools.empty?
129
+ Display.warning 'No applicable tools — skipping.'
130
+ return { status: :failed, removed: 0, residual: 0 }
131
+ end
132
+ Display.info "Pipeline: #{tools.join(' → ')}"
133
+
134
+ # ── Atomic write setup ────────────────────────────────────────
135
+ # `final_path` = where the cleaned file will end up.
136
+ # `staging` = a temp file we mutate. After all tools succeed, we
137
+ # rename staging → final_path. If anything goes wrong
138
+ # in the middle, we delete staging in the `ensure`
139
+ # block and the original is untouched.
140
+ final_path = resolve_final_path(file)
141
+ staging = staging_path_for(final_path)
142
+
143
+ FileUtils.cp(file, staging)
144
+ tool_results = []
145
+ begin
146
+ tools.each do |tool|
147
+ tool_results << run_tool(tool, staging)
148
+ end
149
+
150
+ # Re-read metadata of the cleaned staging file for the diff.
151
+ after = Exiftool.read(staging)
152
+ Display.section "After (#{Display.count_embedded(after)} embedded tags)"
153
+ Display.metadata_table(after, only_embedded: true)
154
+
155
+ Display.section 'Diff'
156
+ Display.diff(before, after)
157
+
158
+ # Loud warning if anything privacy-relevant survived.
159
+ residual = Strategy.privacy_residual(after)
160
+ if residual.any?
161
+ Display.warning "Privacy-relevant tags still present (#{residual.size}):"
162
+ residual.each { |k, v| puts " #{Display.c(k, :yellow)} = #{Display.truncate(Display.format_value(v), 60)}" }
163
+ end
164
+
165
+ # Dry-run path: discard the staging file and return without committing.
166
+ if @options[:dry_run]
167
+ File.delete(staging) if File.exist?(staging)
168
+ Display.info '(dry-run: nothing was written)'
169
+ return finalize_result(tool_results, before, after, residual)
170
+ end
171
+
172
+ # Commit: rename staging → final_path (and back up original if needed).
173
+ commit!(file, staging, final_path)
174
+ Display.success "→ #{final_path}"
175
+
176
+ finalize_result(tool_results, before, after, residual)
177
+ ensure
178
+ # Last-resort cleanup. If `commit!` already moved the staging file,
179
+ # `File.exist?(staging)` is false and this is a no-op. The path-
180
+ # comparison protects against deleting the final file by accident
181
+ # in the (impossible) case where staging == final.
182
+ File.delete(staging) if File.exist?(staging) && File.expand_path(staging) != File.expand_path(final_path)
183
+ end
184
+ end
185
+
186
+ # Dispatches to the right wrapper module. Returns a small Hash so the
187
+ # caller can summarize tool-by-tool success/failure.
188
+ def run_tool(tool, path)
189
+ case tool
190
+ when :exiftool
191
+ Exiftool.strip!(path,
192
+ keep_orientation: @options[:keep_orientation],
193
+ keep_color_profile: @options[:keep_color_profile])
194
+ Display.info " ✓ exiftool"
195
+ { tool: :exiftool, ok: true }
196
+ when :mat2
197
+ result = Mat2.strip!(path)
198
+ # mat2 returns either `true` (success) or a symbol indicating a
199
+ # soft skip. `:unsupported` means the tool didn't actually run, so
200
+ # it must not count as a successful pass — otherwise a file can be
201
+ # reported as "Cleaned" while metadata is still embedded.
202
+ case result
203
+ when :unsupported
204
+ Display.info ' · mat2 (unsupported file type, skipped)'
205
+ { tool: :mat2, ok: false, skipped: true, note: result }
206
+ when :no_metadata
207
+ Display.info ' · mat2 (no metadata to strip)'
208
+ { tool: :mat2, ok: true, note: result }
209
+ else
210
+ Display.info ' ✓ mat2'
211
+ { tool: :mat2, ok: true, note: result }
212
+ end
213
+ when :qpdf
214
+ Qpdf.rebuild!(path)
215
+ Display.info ' ✓ qpdf'
216
+ { tool: :qpdf, ok: true }
217
+ end
218
+ rescue Error => e
219
+ # One tool failing shouldn't abort the pipeline — we want to keep
220
+ # trying with the others. The `finalize_result` step decides whether
221
+ # the overall file counts as cleaned or failed.
222
+ Display.warning " ✗ #{tool}: #{e.message} — continuing"
223
+ { tool: tool, ok: false, error: e.message }
224
+ end
225
+
226
+ def finalize_result(tool_results, before, after, residual)
227
+ removed = removed_embedded_count(before, after)
228
+ # A file only counts as "cleaned" if at least one tool actually ran
229
+ # successfully (i.e. wasn't skipped as unsupported) AND no privacy-
230
+ # relevant tags survived. Anything else is a failure — silently
231
+ # marking a file clean when sensitive metadata is still present is
232
+ # the worst possible outcome for a privacy tool.
233
+ ran_ok = tool_results.any? { |r| r[:ok] && !r[:skipped] }
234
+ status = ran_ok && residual.empty? ? :cleaned : :failed
235
+ { status: status,
236
+ removed: removed,
237
+ residual: residual.size,
238
+ tools: tool_results }
239
+ end
240
+
241
+ def removed_embedded_count(before, after)
242
+ after_keys = after.keys.to_set
243
+ before.keys.count do |key|
244
+ next false if key == 'SourceFile'
245
+ next false if Display::NON_METADATA_GROUPS.include?(Display.group_of(key))
246
+
247
+ !after_keys.include?(key)
248
+ end
249
+ end
250
+
251
+ # ─────────────────────────────────────────────────────────────────
252
+ # Path helpers — figuring out where to stage and where to commit.
253
+ # ─────────────────────────────────────────────────────────────────
254
+
255
+ def commit!(source, staging, final_path)
256
+ # Make a backup of the original BEFORE we overwrite it. The order
257
+ # matters: if the rename below fails, the backup still exists.
258
+ # When source is a symlink, place the backup next to the *target*
259
+ # (which is what --in-place actually overwrites) — putting the .bak
260
+ # next to the link is confusing during recovery.
261
+ if @options[:in_place] && !@options[:no_backup]
262
+ backup_target = File.symlink?(source) ? File.realpath(source) : source
263
+ backup = collision_safe("#{backup_target}.bak")
264
+ FileUtils.cp(backup_target, backup)
265
+ end
266
+ FileUtils.mv(staging, final_path)
267
+ end
268
+
269
+ def resolve_final_path(file)
270
+ # When following a symlink with --in-place, we want to overwrite the
271
+ # *target* of the link, not replace the link itself with a regular
272
+ # file. `realpath` resolves through the link.
273
+ return File.realpath(file) if @options[:in_place] && File.symlink?(file)
274
+ return file if @options[:in_place]
275
+
276
+ # Default: write `<name>_clean.<ext>` next to the original. If it
277
+ # already exists, `collision_safe` appends `_1`, `_2`, …
278
+ collision_safe(build_clean_path(file))
279
+ end
280
+
281
+ def build_clean_path(file)
282
+ ext = File.extname(file)
283
+ base = File.basename(file, ext)
284
+ File.join(File.dirname(file), "#{base}_clean#{ext}")
285
+ end
286
+
287
+ # Staging path lives in the same directory as the destination so that
288
+ # `File.rename`/`FileUtils.mv` is an atomic same-filesystem operation.
289
+ # PID + random number prevent collisions between simultaneous runs.
290
+ # The original extension is preserved as the LAST segment so tools like
291
+ # mat2 — which dispatch on file extension — see the real type.
292
+ def staging_path_for(final_path)
293
+ ext = File.extname(final_path)
294
+ base = ext.empty? ? final_path : final_path[0...-ext.length]
295
+ "#{base}.metaclean.tmp.#{Process.pid}.#{rand(1_000_000)}#{ext}"
296
+ end
297
+
298
+ # If `path` is taken, return `path_1`, `path_2`, … until we find a free
299
+ # one. `loop do … end` runs forever; we `return` out of it.
300
+ def collision_safe(path)
301
+ return path unless File.exist?(path)
302
+
303
+ ext = File.extname(path)
304
+ base = File.basename(path, ext)
305
+ dir = File.dirname(path)
306
+ i = 1
307
+ loop do
308
+ candidate = File.join(dir, "#{base}_#{i}#{ext}")
309
+ return candidate unless File.exist?(candidate)
310
+
311
+ i += 1
312
+ end
313
+ end
314
+
315
+ # Translates the on/off CLI flags into a "prefer" hash that Strategy
316
+ # understands. Keeping this as one method makes the wiring obvious.
317
+ def tool_prefs
318
+ {
319
+ mat2: !@options[:no_mat2] && !@options[:exiftool_only],
320
+ qpdf: !@options[:no_qpdf] && !@options[:exiftool_only],
321
+ exiftool: !@options[:no_exiftool]
322
+ }
323
+ end
324
+
325
+ def print_summary(summary)
326
+ Display.header 'Summary'
327
+ Display.success "Cleaned: #{summary[:cleaned]} file(s)"
328
+ puts Display.error("Failed: #{summary[:failed]}") if summary[:failed].positive?
329
+ Display.info "Total embedded tags removed: #{summary[:removed_total]}"
330
+ if summary[:residual_files].positive?
331
+ Display.warning "Files with privacy residual: #{summary[:residual_files]}"
332
+ end
333
+ end
334
+
335
+ # ─────────────────────────────────────────────────────────────────
336
+ # File discovery — turning the user's paths into a flat list.
337
+ # ─────────────────────────────────────────────────────────────────
338
+
339
+ def expand_files(paths)
340
+ explicit = []
341
+ discovered = []
342
+ paths.each do |p|
343
+ # Symlinks are skipped by default. This avoids accidentally cleaning
344
+ # something through a link that points outside the intended scope.
345
+ if File.symlink?(p) && !@options[:follow_symlinks]
346
+ Display.warning "Skipping symlink: #{p} (use --follow-symlinks to include)"
347
+ next
348
+ end
349
+ if File.directory?(p)
350
+ collect_dir(p, discovered)
351
+ elsif File.file?(p)
352
+ # Explicit file argument — never apply skip?, the user asked for
353
+ # this exact path. (Skip filters exist to avoid re-cleaning our
354
+ # own outputs during recursion, not to override the CLI.)
355
+ explicit << p
356
+ else
357
+ Display.warning "Not found: #{p}"
358
+ end
359
+ end
360
+ discovered.reject! { |f| skip?(f) }
361
+ result = explicit + discovered
362
+ result.select! { |f| type_allowed?(f) } if @options[:types]
363
+ dedupe_by_realpath(result)
364
+ end
365
+
366
+ # Same file via two different paths (or via symlink + direct path) should
367
+ # be cleaned once. Comparing by realpath catches both cases. If realpath
368
+ # raises (broken symlink, permission denied), fall back to the raw path.
369
+ def dedupe_by_realpath(paths)
370
+ seen = {}
371
+ paths.each_with_object([]) do |p, acc|
372
+ key = begin
373
+ File.realpath(p)
374
+ rescue StandardError
375
+ p
376
+ end
377
+ next if seen[key]
378
+
379
+ seen[key] = true
380
+ acc << p
381
+ end
382
+ end
383
+
384
+ def collect_dir(dir, out)
385
+ if @options[:recursive]
386
+ walk_recursive(dir, out, Set.new)
387
+ else
388
+ # Non-recursive: just the immediate children of `dir`.
389
+ Dir.glob(File.join(dir, '*')).each do |sub|
390
+ next if File.symlink?(sub) && !@options[:follow_symlinks]
391
+
392
+ out << sub if File.file?(sub)
393
+ end
394
+ end
395
+ end
396
+
397
+ # Manual recursive walker. We don't use `Find.find` because it never
398
+ # descends into symlinked directories, even when --follow-symlinks is on.
399
+ # `visited` tracks realpaths so we don't infinite-loop on a symlink that
400
+ # eventually points at one of its ancestors.
401
+ def walk_recursive(dir, out, visited)
402
+ real = begin
403
+ File.realpath(dir)
404
+ rescue StandardError
405
+ dir
406
+ end
407
+ return if visited.include?(real)
408
+
409
+ visited << real
410
+
411
+ Dir.each_child(dir) do |entry|
412
+ sub = File.join(dir, entry)
413
+ if File.symlink?(sub)
414
+ next unless @options[:follow_symlinks]
415
+
416
+ if File.directory?(sub)
417
+ walk_recursive(sub, out, visited)
418
+ elsif File.file?(sub)
419
+ out << sub
420
+ end
421
+ elsif File.directory?(sub)
422
+ walk_recursive(sub, out, visited)
423
+ elsif File.file?(sub)
424
+ out << sub
425
+ end
426
+ end
427
+ rescue Errno::EACCES, Errno::ENOENT => e
428
+ Display.warning "Skipping #{dir}: #{e.message}"
429
+ end
430
+
431
+ # Files we never touch when DISCOVERED via directory scanning. This is
432
+ # NOT applied to explicit CLI arguments — if the user typed
433
+ # `metaclean .hidden.jpg`, they meant it. Hidden files (dot-prefixed)
434
+ # might be system metadata; .bak/_clean/.metaclean.tmp.* are our own
435
+ # outputs, so skipping them prevents loops on re-runs.
436
+ def skip?(file)
437
+ base = File.basename(file)
438
+ return true if base.start_with?('.')
439
+ return true if base.end_with?('.bak')
440
+ return true if base =~ /_clean(_\d+)?\.[^.]+\z/
441
+ return true if base =~ /\.metaclean\.tmp\.\d+\.\d+/
442
+
443
+ false
444
+ end
445
+
446
+ def type_allowed?(file)
447
+ ext = File.extname(file).downcase.delete('.')
448
+ @options[:types].include?(ext)
449
+ end
450
+ end
451
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ # ───────────────────────────────────────────────────────────────────────────
4
+ # The "policy" module: which tools to run for which file, and what counts as
5
+ # privacy-relevant if it survives a clean.
6
+ #
7
+ # Keeping this logic in its own file means the runner doesn't need to know
8
+ # about formats — it just asks Strategy.tools_for(path) and runs whatever
9
+ # comes back.
10
+ # ───────────────────────────────────────────────────────────────────────────
11
+
12
+ module Metaclean
13
+ module Strategy
14
+ # Tag GROUPS that almost always carry personally identifying info.
15
+ # Survival of any tag in these groups raises a flag to the user.
16
+ PRIVACY_GROUPS = %w[GPS MakerNotes XMP-dc XMP-photoshop IPTC ICC-header].freeze
17
+
18
+ # Specific tag NAMES (regardless of group) we never want to leak.
19
+ # If exiftool reports e.g. "EXIF:Artist" we still flag it because of the
20
+ # tag-name match, not the group.
21
+ PRIVACY_TAGS = %w[
22
+ Artist Author Creator Copyright Rights
23
+ By-line By-lineTitle Credit Source Contact OwnerName
24
+ CameraOwnerName SerialNumber InternalSerialNumber LensSerialNumber
25
+ Software HostComputer ProcessingSoftware
26
+ ImageDescription UserComment
27
+ LastModifiedBy LastSavedBy LastAuthor
28
+ ].freeze
29
+
30
+ # File extensions where mat2 is meaningfully stricter than ExifTool and
31
+ # should run first. For other formats, ExifTool is the broader expert.
32
+ MAT2_PREFERRED = %w[
33
+ pdf docx xlsx pptx odt ods odp odg epub png svg
34
+ mp4 avi mkv mov webm
35
+ ].freeze
36
+
37
+ module_function
38
+
39
+ # Returns an ordered list of tool symbols (e.g. `[:mat2, :exiftool, :qpdf]`)
40
+ # to run on `path`. The runner executes them in order; if one fails or
41
+ # is skipped, the next still runs.
42
+ #
43
+ # `prefer:` is a hash of user opt-outs from the CLI flags
44
+ # (--no-mat2, --exiftool-only, etc.). The pattern `prefer[:mat2] != false`
45
+ # treats both `nil` (not set) and `true` as "use it" — only an explicit
46
+ # `false` disables.
47
+ def tools_for(path, prefer: {})
48
+ ext = File.extname(path).downcase.delete('.')
49
+ tools = []
50
+
51
+ if ext == 'pdf'
52
+ # PDFs benefit from all three, in this order:
53
+ # mat2 → cleans the high-level metadata + content streams it knows
54
+ # exiftool → strips the Info dictionary (Author, Title, Producer)
55
+ # qpdf → rebuilds the file, dropping any unreferenced bits
56
+ tools << :mat2 if prefer[:mat2] != false && Mat2.available?
57
+ tools << :exiftool if prefer[:exiftool] != false
58
+ tools << :qpdf if prefer[:qpdf] != false && Qpdf.available?
59
+ elsif MAT2_PREFERRED.include?(ext) && prefer[:mat2] != false && Mat2.available?
60
+ # Office docs, modern image/video containers — mat2 leads.
61
+ tools << :mat2
62
+ tools << :exiftool if prefer[:exiftool] != false
63
+ else
64
+ # Everything else (JPEG, MP3, RAW, …) — ExifTool is the gold standard.
65
+ tools << :exiftool if prefer[:exiftool] != false
66
+ tools << :mat2 if prefer[:mat2] != false && Mat2.supports?(path)
67
+ end
68
+
69
+ tools
70
+ end
71
+
72
+ # Looks at metadata read AFTER cleaning and returns the entries that
73
+ # still look privacy-relevant. The runner uses this for the "still
74
+ # present" warning at the end of each file.
75
+ #
76
+ # Why both group-match and tag-match? Tag names can appear under
77
+ # different groups depending on the format (e.g. "Author" in PDF vs
78
+ # "Artist" in EXIF). Combining the two keeps coverage broad without
79
+ # having to enumerate every {group, tag} pair.
80
+ def privacy_residual(meta)
81
+ meta.reject { |k, _| k == 'SourceFile' }.select do |k, _|
82
+ # ExifTool keys look like "GPS:GPSLatitude". Split on the first ":".
83
+ group, tag = k.to_s.split(':', 2)
84
+ # Skip System/File/etc. — those aren't user metadata.
85
+ next false if Display::NON_METADATA_GROUPS.include?(group)
86
+
87
+ if tag.nil?
88
+ # No "Group:" prefix — the whole key is the tag name.
89
+ PRIVACY_TAGS.include?(group.to_s)
90
+ else
91
+ PRIVACY_GROUPS.include?(group) || PRIVACY_TAGS.include?(tag)
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ # ───────────────────────────────────────────────────────────────────────────
4
+ # Single source of truth for the program's version.
5
+ # Both the gemspec and `metaclean --version` read from here, so we only have
6
+ # one place to bump.
7
+ # ───────────────────────────────────────────────────────────────────────────
8
+
9
+ module Metaclean
10
+ VERSION = '1.0.2'
11
+ end
data/lib/metaclean.rb ADDED
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # ───────────────────────────────────────────────────────────────────────────
4
+ # lib/metaclean.rb — the library's "front door".
5
+ #
6
+ # In Ruby, a module is a namespace. We put everything inside `Metaclean::*`
7
+ # so we don't pollute the global namespace and so it's obvious where each
8
+ # piece belongs.
9
+ #
10
+ # The `require` order matters: a file can only reference constants from
11
+ # files already loaded. We load the smallest pieces first, then the bigger
12
+ # ones that depend on them.
13
+ # ───────────────────────────────────────────────────────────────────────────
14
+
15
+ require 'metaclean/version' # just defines VERSION
16
+ require 'metaclean/display' # ANSI colors and formatters (no deps)
17
+ require 'metaclean/exiftool' # ExifTool wrapper
18
+ require 'metaclean/mat2' # mat2 wrapper
19
+ require 'metaclean/qpdf' # qpdf wrapper
20
+ require 'metaclean/strategy' # picks which tools run for each file type
21
+ require 'metaclean/runner' # orchestrates a clean across many files
22
+ require 'metaclean/cli' # parses ARGV and calls Runner
23
+
24
+ module Metaclean
25
+ # Custom exception classes. Inheriting from StandardError lets callers do
26
+ # `rescue Metaclean::Error` to catch any of our errors without accidentally
27
+ # catching things like NoMemoryError or SystemExit.
28
+ class Error < StandardError; end
29
+
30
+ # A more specific error so the CLI can show a tailored install hint when
31
+ # ExifTool itself is missing.
32
+ class ExiftoolMissing < Error; end
33
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: metaclean
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
5
+ platform: ruby
6
+ authors:
7
+ - 26zl
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies: []
12
+ description: |
13
+ metaclean is a small Ruby CLI that wraps ExifTool, mat2 and qpdf to strip
14
+ removable embedded tags (EXIF, IPTC, XMP, GPS, MakerNotes, ID3, document
15
+ properties, etc.) from images, audio, video, PDFs and Office documents —
16
+ and shows a before/after diff of what was removed.
17
+ executables:
18
+ - metaclean
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - LICENSE
23
+ - README.md
24
+ - bin/metaclean
25
+ - lib/metaclean.rb
26
+ - lib/metaclean/cli.rb
27
+ - lib/metaclean/display.rb
28
+ - lib/metaclean/exiftool.rb
29
+ - lib/metaclean/mat2.rb
30
+ - lib/metaclean/qpdf.rb
31
+ - lib/metaclean/runner.rb
32
+ - lib/metaclean/strategy.rb
33
+ - lib/metaclean/version.rb
34
+ homepage: https://github.com/26zl/metaclean
35
+ licenses:
36
+ - MIT
37
+ metadata:
38
+ allowed_push_host: https://rubygems.org
39
+ bug_tracker_uri: https://github.com/26zl/metaclean/issues
40
+ changelog_uri: https://github.com/26zl/metaclean/releases
41
+ source_code_uri: https://github.com/26zl/metaclean
42
+ rubygems_mfa_required: 'true'
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '3.2'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ requirements:
57
+ - ExifTool (https://exiftool.org) on PATH
58
+ rubygems_version: 3.7.2
59
+ specification_version: 4
60
+ summary: Cross-platform CLI that strips file metadata with ExifTool, mat2 and qpdf.
61
+ test_files: []