stacked-pdf-generator 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 56ae6f60c86521d34dfb181497a9d9b97a0796780fa5bed10e137ddb48f5a2c7
4
- data.tar.gz: ee2f9c06fde0a77814e88f9f65015acccd73c6c2e0fb6131811e6326a60a98fd
3
+ metadata.gz: ea9f2232d17d099af488808839dddb8c3fab9f5c987fce3a82413d21654f8107
4
+ data.tar.gz: 34cf1e0945f1630d3f16e9256fafd9ecd55eec655fd4daea1b55a1140eb9190e
5
5
  SHA512:
6
- metadata.gz: c46b93d4b0074c76f0733e153b4a46e50a010332f76ca3a243ee042c9e7b4c90ff444a9cebf4bac874d1bd5ab28f13729aeb02ac73cbcad0969149e3c53311fb
7
- data.tar.gz: b58b49aea0905b0faa3cdc6ac2a89e35d6862e536aa781c06c9e57361ad17f09b02de3d431170634647b80798e033534f97e73e2bd42c971e70b1b34ec430892
6
+ metadata.gz: 046cef8c86c81630b03057805daed45301249533adfe7c4fb894880a62dbb2da43bc7c1ef41e641ad24a14f87b3d7e2671f60135c090573817660e5e8080ef37
7
+ data.tar.gz: 26e2f24a82a8ea91abe5d342ca9e4ac72c352be36ebbd54a39034ceab5ab7cca1d34845b0ea40abc1994df1bf7d20a6eef0bd41e3171d83eb36ad78b49811490
@@ -7,7 +7,8 @@ lib_path = File.expand_path('../lib', __dir__)
7
7
  $LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
8
8
  require 'stacked_pdf_generator'
9
9
 
10
- options = { portrait: false, two_sided_flipped: false }
10
+ options = { portrait: false, two_sided_flipped: false, crop_from_marks: false }
11
+ crop_box = {}
11
12
  parser = OptionParser.new do |opts|
12
13
  opts.banner = 'Usage: stacked-pdf-generator --input INPUT --output OUTPUT [options]'
13
14
 
@@ -26,6 +27,28 @@ parser = OptionParser.new do |opts|
26
27
  options[:two_sided_flipped] = true
27
28
  end
28
29
 
30
+ opts.separator ''
31
+ opts.separator 'Crop-from-marks options (input PDF carries printer crop marks):'
32
+
33
+ opts.on('--crop-from-marks',
34
+ 'Auto-detect crop marks on the first page and crop every page to the area they delimit') do
35
+ options[:crop_from_marks] = true
36
+ end
37
+ opts.on('--crop-top MM', Float, 'Manual crop: Y of the top edge in mm (from page bottom)') { |v| crop_box[:top] = v }
38
+ opts.on('--crop-bottom MM', Float, 'Manual crop: Y of the bottom edge in mm (from page bottom)') { |v| crop_box[:bottom] = v }
39
+ opts.on('--crop-left MM', Float, 'Manual crop: X of the left edge in mm (default 0)') { |v| crop_box[:left] = v }
40
+ opts.on('--crop-right MM', Float, 'Manual crop: X of the right edge in mm (default page width)') { |v| crop_box[:right] = v }
41
+ opts.on('--mark-max-length MM', Float,
42
+ 'Detection: maximum length of a crop-mark segment in mm (default ~10.6)') do |v|
43
+ options[:mark_max_length_mm] = v
44
+ end
45
+ opts.on('--mark-edge-tolerance MM', Float,
46
+ 'Detection: how close a mark must be to the page edge in mm (default ~0.35)') do |v|
47
+ options[:mark_edge_tolerance_mm] = v
48
+ end
49
+
50
+ opts.separator ''
51
+
29
52
  opts.on('-v', '--version', 'Print version') do
30
53
  puts StackedPdfGenerator::VERSION
31
54
  exit 0
@@ -59,6 +82,8 @@ if options[:pages_per_sheet].nil? && (options[:rows].nil? || options[:columns].n
59
82
  exit 1
60
83
  end
61
84
 
85
+ options[:crop_box] = crop_box unless crop_box.empty?
86
+
62
87
  result = StackedPdfGenerator.call(
63
88
  input_path: options[:input_path],
64
89
  output_path: options[:output_path],
@@ -69,7 +94,11 @@ result = StackedPdfGenerator.call(
69
94
  autoscale: options[:autoscale],
70
95
  portrait: options[:portrait],
71
96
  sheet_margins: options[:sheet_margins],
72
- two_sided_flipped: options[:two_sided_flipped]
97
+ two_sided_flipped: options[:two_sided_flipped],
98
+ crop_from_marks: options[:crop_from_marks],
99
+ crop_box: options[:crop_box],
100
+ mark_max_length_mm: options[:mark_max_length_mm],
101
+ mark_edge_tolerance_mm: options[:mark_edge_tolerance_mm]
73
102
  )
74
103
 
75
104
  if result.success?
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'hexapdf'
4
+ require 'hexapdf/content/processor'
5
+
6
+ module StackedPdfGenerator
7
+ # Detects printer crop marks on the first page of a PDF and returns the
8
+ # rectangular region delimited by them.
9
+ #
10
+ # Pattern expected: 4 short horizontal segments (or rectangles) per page,
11
+ # 2 at the top (left + right edges) and 2 at the bottom. Marks must touch
12
+ # either the left edge (x ≈ 0) or the right edge (x ≈ page_width). Only the
13
+ # vertical extent is constrained by the marks; horizontally we keep the full
14
+ # page width.
15
+ module CropMarkDetector
16
+ DEFAULT_MARK_MAX_LENGTH_MM = 30.0 / 72.0 * 25.4 # ≈ 10.6 mm (30 pts)
17
+ DEFAULT_EDGE_TOLERANCE_MM = 1.0 / 72.0 * 25.4 # ≈ 0.35 mm (1 pt)
18
+ HORIZONTAL_TOLERANCE_PTS = 0.5
19
+ Y_CLUSTER_TOLERANCE_PTS = 2.0
20
+
21
+ DetectedBox = Struct.new(:left, :bottom, :right, :top, :page_width, :page_height,
22
+ keyword_init: true)
23
+
24
+ module_function
25
+
26
+ # Inspect the first page of +pdf_path+ and return a DetectedBox in PDF
27
+ # points, or raise ProcessingError with a descriptive message.
28
+ def call(pdf_path, mark_max_length_pts: DEFAULT_MARK_MAX_LENGTH_MM * 72.0 / 25.4,
29
+ edge_tolerance_pts: DEFAULT_EDGE_TOLERANCE_MM * 72.0 / 25.4)
30
+ doc = HexaPDF::Document.open(pdf_path)
31
+ page = doc.pages[0]
32
+ raise ProcessingError, 'PDF has no pages' unless page
33
+
34
+ page_w = page.box.width.to_f
35
+ page_h = page.box.height.to_f
36
+ collector = LineCollector.new
37
+ page.process_contents(collector)
38
+
39
+ candidates = filter_candidates(collector.lines, page_w,
40
+ mark_max_length_pts: mark_max_length_pts,
41
+ edge_tolerance_pts: edge_tolerance_pts)
42
+
43
+ if candidates.empty?
44
+ raise ProcessingError,
45
+ "No crop marks detected on page 1 (looked for short horizontal " \
46
+ "segments touching the left or right edge; mark_max_length=" \
47
+ "#{mark_max_length_pts.round(2)}pts, edge_tolerance=" \
48
+ "#{edge_tolerance_pts.round(2)}pts). Inspected " \
49
+ "#{collector.lines.size} path segments."
50
+ end
51
+
52
+ clusters = cluster_by_y(candidates)
53
+ validate_clusters!(clusters, page_w, edge_tolerance_pts)
54
+
55
+ ys = clusters.map { |c| median(c.map { |seg| seg[1] }) }.sort
56
+ DetectedBox.new(left: 0.0, bottom: ys.first, right: page_w, top: ys.last,
57
+ page_width: page_w, page_height: page_h)
58
+ end
59
+
60
+ def filter_candidates(lines, page_w, mark_max_length_pts:, edge_tolerance_pts:)
61
+ lines.select do |x0, y0, x1, y1|
62
+ next false unless (y0 - y1).abs < HORIZONTAL_TOLERANCE_PTS
63
+ next false if (x1 - x0).abs > mark_max_length_pts
64
+
65
+ touches_left = [x0, x1].min <= edge_tolerance_pts
66
+ touches_right = [x0, x1].max >= page_w - edge_tolerance_pts
67
+ touches_left || touches_right
68
+ end
69
+ end
70
+
71
+ def cluster_by_y(segments)
72
+ sorted = segments.sort_by { |seg| seg[1] }
73
+ clusters = []
74
+ sorted.each do |seg|
75
+ if clusters.last && (seg[1] - clusters.last.last[1]).abs <= Y_CLUSTER_TOLERANCE_PTS
76
+ clusters.last << seg
77
+ else
78
+ clusters << [seg]
79
+ end
80
+ end
81
+ clusters
82
+ end
83
+
84
+ def validate_clusters!(clusters, page_w, edge_tolerance_pts)
85
+ ys_summary = clusters.map { |c| median(c.map { |seg| seg[1] }).round(2) }
86
+
87
+ if clusters.size != 2
88
+ raise ProcessingError,
89
+ "Expected 2 Y-rows of crop marks (top + bottom), found " \
90
+ "#{clusters.size}. Y values: #{ys_summary.inspect}, " \
91
+ "marks per row: #{clusters.map(&:size).inspect}."
92
+ end
93
+
94
+ clusters.each_with_index do |cluster, idx|
95
+ has_left = cluster.any? { |x0, _, x1, _| [x0, x1].min <= edge_tolerance_pts }
96
+ has_right = cluster.any? { |x0, _, x1, _| [x0, x1].max >= page_w - edge_tolerance_pts }
97
+ unless has_left && has_right
98
+ raise ProcessingError,
99
+ "Crop mark row at y=#{ys_summary[idx]} is missing " \
100
+ "#{has_left ? '' : 'left'}#{has_left || has_right ? '' : ' & '}" \
101
+ "#{has_right ? '' : 'right'} mark. Found #{cluster.size} marks."
102
+ end
103
+ end
104
+ end
105
+
106
+ def median(values)
107
+ sorted = values.sort
108
+ n = sorted.length
109
+ n.odd? ? sorted[n / 2] : (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
110
+ end
111
+
112
+ # Walks the page content stream and records every line/rectangle edge
113
+ # in page coordinates (CTM applied).
114
+ class LineCollector < HexaPDF::Content::Processor
115
+ attr_reader :lines
116
+
117
+ def initialize
118
+ super
119
+ @lines = []
120
+ @current = nil
121
+ end
122
+
123
+ def move_to(x, y)
124
+ @current = [x, y]
125
+ end
126
+
127
+ def line_to(x, y)
128
+ if @current
129
+ tx0, ty0 = transform_point(*@current)
130
+ tx1, ty1 = transform_point(x, y)
131
+ @lines << [tx0, ty0, tx1, ty1]
132
+ end
133
+ @current = [x, y]
134
+ end
135
+
136
+ def append_rectangle(x, y, w, h)
137
+ edges = [
138
+ [x, y, x + w, y],
139
+ [x + w, y, x + w, y + h],
140
+ [x + w, y + h, x, y + h],
141
+ [x, y + h, x, y]
142
+ ]
143
+ edges.each do |x0, y0, x1, y1|
144
+ tx0, ty0 = transform_point(x0, y0)
145
+ tx1, ty1 = transform_point(x1, y1)
146
+ @lines << [tx0, ty0, tx1, ty1]
147
+ end
148
+ end
149
+
150
+ # Path-painting operators we don't care about, but Processor expects them.
151
+ %i[close_subpath end_path stroke_path close_and_stroke_path
152
+ fill_path_non_zero fill_path_even_odd
153
+ fill_and_stroke_path_non_zero close_fill_and_stroke_path_non_zero
154
+ fill_and_stroke_path_even_odd close_fill_and_stroke_path_even_odd
155
+ curve_to curve_to_no_first_control curve_to_no_second_control
156
+ clip_path_non_zero clip_path_even_odd].each do |op|
157
+ define_method(op) { |*| }
158
+ end
159
+
160
+ private
161
+
162
+ def transform_point(x, y)
163
+ ctm = graphics_state.ctm
164
+ [ctm.a * x + ctm.c * y + ctm.e, ctm.b * x + ctm.d * y + ctm.f]
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'hexapdf'
4
+
5
+ module StackedPdfGenerator
6
+ # Applies a uniform crop rectangle (in PDF points) to every page of a source
7
+ # PDF, writing a new PDF whose CropBox and MediaBox match the requested
8
+ # region. Used as a pre-processing step before the imposition pipeline when
9
+ # the input contains printer crop marks.
10
+ module CropMarksProcessor
11
+ module_function
12
+
13
+ # Crop +input_path+ → +output_path+ using the given +box+ (a 4-element
14
+ # array [left, bottom, right, top] in PDF points).
15
+ def call(input_path, output_path, box)
16
+ validate_box!(box)
17
+ doc = HexaPDF::Document.open(input_path)
18
+ doc.pages.each do |page|
19
+ page[:CropBox] = box.dup
20
+ page[:MediaBox] = box.dup
21
+ page.delete(:BleedBox)
22
+ page.delete(:TrimBox)
23
+ page.delete(:ArtBox)
24
+ end
25
+ doc.write(output_path, optimize: true)
26
+ output_path
27
+ end
28
+
29
+ def validate_box!(box)
30
+ raise ProcessingError, 'Crop box must have 4 numeric values' unless box.is_a?(Array) && box.length == 4
31
+
32
+ left, bottom, right, top = box.map(&:to_f)
33
+ raise ProcessingError, "Crop box has zero or negative width (left=#{left}, right=#{right})" if right <= left
34
+ raise ProcessingError, "Crop box has zero or negative height (bottom=#{bottom}, top=#{top})" if top <= bottom
35
+ end
36
+ end
37
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module StackedPdfGenerator
4
- VERSION = '1.1.1'
4
+ VERSION = '1.2.0'
5
5
  end
@@ -6,6 +6,8 @@ require 'securerandom'
6
6
  require 'stacking_order'
7
7
 
8
8
  require_relative 'stacked_pdf_generator/version'
9
+ require_relative 'stacked_pdf_generator/crop_mark_detector'
10
+ require_relative 'stacked_pdf_generator/crop_marks_processor'
9
11
 
10
12
  # Provides library and CLI helpers for generating stack-cut friendly PDFs using
11
13
  # pdfjam/podofocrop tooling and stacking-order-based page sequencing.
@@ -14,6 +16,8 @@ module StackedPdfGenerator
14
16
 
15
17
  Result = Struct.new(:success?, :message, keyword_init: true)
16
18
 
19
+ PT_PER_MM = 72.0 / 25.4
20
+
17
21
  module_function
18
22
 
19
23
  def call(**kwargs)
@@ -24,10 +28,13 @@ module StackedPdfGenerator
24
28
  # and sequences pages via stacking-order to build the final PDF.
25
29
  class Generator
26
30
  attr_reader :input_path, :output_path, :paper_size, :autoscale, :portrait,
27
- :sheet_margins_raw, :rows, :columns, :pages_per_sheet, :two_sided_flipped
31
+ :sheet_margins_raw, :rows, :columns, :pages_per_sheet, :two_sided_flipped,
32
+ :crop_from_marks, :crop_box_mm, :mark_max_length_mm, :mark_edge_tolerance_mm
28
33
 
29
34
  def initialize(input_path:, output_path:, paper_size:, autoscale:, portrait:, rows: nil, columns: nil,
30
- pages_per_sheet: nil, sheet_margins: nil, two_sided_flipped: false)
35
+ pages_per_sheet: nil, sheet_margins: nil, two_sided_flipped: false,
36
+ crop_from_marks: false, crop_box: nil,
37
+ mark_max_length_mm: nil, mark_edge_tolerance_mm: nil)
31
38
  @input_path = input_path
32
39
  @output_path = output_path
33
40
  @paper_size = paper_size.to_s.upcase
@@ -38,18 +45,23 @@ module StackedPdfGenerator
38
45
  @columns = columns.nil? ? nil : Integer(columns)
39
46
  @pages_per_sheet = pages_per_sheet.nil? ? nil : Integer(pages_per_sheet)
40
47
  @two_sided_flipped = boolean_cast(two_sided_flipped)
48
+ @crop_from_marks = boolean_cast(crop_from_marks)
49
+ @crop_box_mm = crop_box # hash {top:, bottom:, left:, right:} in mm, optional
50
+ @mark_max_length_mm = mark_max_length_mm
51
+ @mark_edge_tolerance_mm = mark_edge_tolerance_mm
41
52
  normalize_layout_dimensions!
42
53
  end
43
54
 
44
55
  def call
45
56
  validate_arguments!
57
+ apply_crop_from_marks if crop_pre_processing?
46
58
  run_pdfjam
47
59
  finalize_output
48
60
  Result.new(success?: true, message: '')
49
61
  rescue ProcessingError => e
50
62
  Result.new(success?: false, message: e.message)
51
63
  ensure
52
- cleanup_tempfile
64
+ cleanup_tempfiles
53
65
  end
54
66
 
55
67
  private
@@ -60,6 +72,60 @@ module StackedPdfGenerator
60
72
  raise ProcessingError, 'pages_per_sheet must be positive' unless pages_per_sheet.positive?
61
73
  end
62
74
 
75
+ def crop_pre_processing?
76
+ crop_from_marks || crop_box_mm
77
+ end
78
+
79
+ def apply_crop_from_marks
80
+ box_pts = crop_box_mm ? manual_crop_box_pts : detect_crop_box_pts
81
+ CropMarksProcessor.call(input_path, cropped_input_path, box_pts)
82
+ @input_path = cropped_input_path
83
+ end
84
+
85
+ def detect_crop_box_pts
86
+ detector_args = {}
87
+ detector_args[:mark_max_length_pts] = mark_max_length_mm * PT_PER_MM if mark_max_length_mm
88
+ detector_args[:edge_tolerance_pts] = mark_edge_tolerance_mm * PT_PER_MM if mark_edge_tolerance_mm
89
+
90
+ detected = CropMarkDetector.call(input_path, **detector_args)
91
+ [detected.left, detected.bottom, detected.right, detected.top]
92
+ end
93
+
94
+ def manual_crop_box_pts
95
+ sym = crop_box_mm.transform_keys(&:to_sym)
96
+ missing = %i[top bottom].reject { |k| sym.key?(k) }
97
+ raise ProcessingError, "Manual crop_box missing keys: #{missing.inspect}" unless missing.empty?
98
+
99
+ page_w_pts, page_h_pts = first_page_dimensions_pts
100
+ left = (sym[:left] || 0.0).to_f * PT_PER_MM
101
+ right = sym[:right] ? sym[:right].to_f * PT_PER_MM : page_w_pts
102
+ # Manual values are given in mm from the bottom-left origin of the page,
103
+ # which matches the PDF coordinate system.
104
+ bottom = sym[:bottom].to_f * PT_PER_MM
105
+ top = sym[:top].to_f * PT_PER_MM
106
+
107
+ raise ProcessingError, "Manual crop top (#{top.round(2)}pts) exceeds page height (#{page_h_pts.round(2)}pts)" if top > page_h_pts + 0.5
108
+ raise ProcessingError, "Manual crop right (#{right.round(2)}pts) exceeds page width (#{page_w_pts.round(2)}pts)" if right > page_w_pts + 0.5
109
+
110
+ [left, bottom, right, top]
111
+ end
112
+
113
+ def first_page_dimensions_pts
114
+ doc = HexaPDF::Document.open(input_path)
115
+ page = doc.pages[0]
116
+ raise ProcessingError, 'PDF has no pages' unless page
117
+
118
+ [page.box.width.to_f, page.box.height.to_f]
119
+ end
120
+
121
+ def cropped_input_path
122
+ @cropped_input_path ||= begin
123
+ dirname = File.dirname(output_path)
124
+ FileUtils.mkdir_p(dirname)
125
+ File.join(dirname, "stacked_cropped_#{SecureRandom.hex(6)}.pdf")
126
+ end
127
+ end
128
+
63
129
  def run_pdfjam
64
130
  sequence = page_sequence
65
131
  cmd = [
@@ -93,8 +159,9 @@ module StackedPdfGenerator
93
159
  end
94
160
  end
95
161
 
96
- def cleanup_tempfile
162
+ def cleanup_tempfiles
97
163
  FileUtils.rm_f(temp_output_path) if defined?(@temp_output_path) && File.exist?(@temp_output_path)
164
+ FileUtils.rm_f(@cropped_input_path) if defined?(@cropped_input_path) && @cropped_input_path && File.exist?(@cropped_input_path)
98
165
  end
99
166
 
100
167
  def temp_output_path
metadata CHANGED
@@ -1,15 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stacked-pdf-generator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-11-26 00:00:00.000000000 Z
11
+ date: 2026-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: hexapdf
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2.0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '1.0'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.0'
13
33
  - !ruby/object:Gem::Dependency
14
34
  name: stacking-order
15
35
  requirement: !ruby/object:Gem::Requirement
@@ -37,6 +57,8 @@ files:
37
57
  - README.md
38
58
  - exe/stacked-pdf-generator
39
59
  - lib/stacked_pdf_generator.rb
60
+ - lib/stacked_pdf_generator/crop_mark_detector.rb
61
+ - lib/stacked_pdf_generator/crop_marks_processor.rb
40
62
  - lib/stacked_pdf_generator/version.rb
41
63
  homepage: https://github.com/jeremy/stacked-pdf-generator
42
64
  licenses: