pdf-trim_detector 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/CHANGELOG +2 -0
  2. data/README.markdown +28 -0
  3. data/lib/pdf/trim_detector.rb +162 -0
  4. metadata +115 -0
@@ -0,0 +1,2 @@
1
+ v0.1.0 (12th January 2013)
2
+ * initial release
@@ -0,0 +1,28 @@
1
+ = PDF::TrimDetector
2
+
3
+ Attempts to detect the trim marks on a PDF page and returns a rectangle
4
+ describing the box they indicate. Useful for when you have a PDF with trim
5
+ marks drawn on the page but no matching page box (TrimBox, ArtBox, etc)
6
+
7
+ == Installation
8
+
9
+ gem install pdf-trim_detector
10
+
11
+ == Usage
12
+
13
+ require 'pdf-reader'
14
+ require 'pdf/trim_detector'
15
+
16
+ detector = PDF::TrimDetector.new
17
+
18
+ PDF::Reader.open("somefile.pdf") do |pdf|
19
+ pdf.pages.each do |page|
20
+ page.walk(detector)
21
+ puts detector.trim.inspect
22
+ end
23
+ end
24
+
25
+ == Licensing
26
+
27
+ This library is distributed under the terms of the MIT License. See the included file for
28
+ more detail.
@@ -0,0 +1,162 @@
1
+ # coding: utf-8
2
+
3
+ require 'forwardable'
4
+ require 'bigdecimal'
5
+
6
+ module PDF
7
+
8
+ # attempts to detect the trim marks on a PDF page and returns a rectangle
9
+ # describing the box they indicate. Useful for when you have a PDF with trim
10
+ # marks drawn on the page but no matching page box (TrimBox, ArtBox, etc)
11
+ #
12
+ # See the project README for a usage overview.
13
+ #
14
+ class TrimDetector
15
+ extend Forwardable
16
+
17
+ ########## BEGIN FORWARDERS ##########
18
+ # Graphics State Operators
19
+ def_delegators :@state, :save_graphics_state, :restore_graphics_state
20
+
21
+ # Matrix Operators
22
+ def_delegators :@state, :concatenate_matrix
23
+
24
+ def page=(page)
25
+ @page = page
26
+ @state = PDF::Reader::PageState.new(page)
27
+ @paths = []
28
+ @trim = nil
29
+ @current_paths = []
30
+ end
31
+
32
+ def trim
33
+ xes = outer_x_candidates
34
+ yes = outer_y_candidates
35
+ if xes.size == 2 && yes.size == 2
36
+ [
37
+ xes.sort.first,
38
+ yes.sort.first,
39
+ xes.sort.last,
40
+ yes.sort.last,
41
+ ]
42
+ end
43
+ end
44
+
45
+ def begin_new_subpath(x, y) # m
46
+ @current_paths << Path.new
47
+ x, y = @state.ctm_transform(x, y)
48
+ @current_paths.last.add_point(x, y)
49
+ end
50
+
51
+ def append_line(x, y) # l
52
+ if @current_paths.last
53
+ x, y = @state.ctm_transform(x, y)
54
+ @current_paths.last.add_point(x, y)
55
+ end
56
+ end
57
+
58
+ def fill_stroke # B
59
+ # TODO set the path colour
60
+ while path = @current_paths.shift
61
+ @paths << path
62
+ end
63
+ end
64
+
65
+ def stroke_path # S
66
+ # TODO set the path colour
67
+ while path = @current_paths.shift
68
+ @paths << path
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ def horizontal_paths
75
+ @paths.select(&:horizontal?)
76
+ end
77
+
78
+ def vertical_paths
79
+ @paths.select(&:vertical?)
80
+ end
81
+
82
+ def grouped_vertical_paths
83
+ vertical_paths.group_by { |path| path.start.x }
84
+ end
85
+
86
+ def grouped_horizontal_paths
87
+ horizontal_paths.group_by { |path| path.start.y }
88
+ end
89
+
90
+ def x_with_two_paths
91
+ grouped_vertical_paths.map { |x, paths|
92
+ paths.size >= 2 ? x : nil
93
+ }.compact.sort
94
+ end
95
+
96
+ def y_with_two_paths
97
+ grouped_horizontal_paths.map { |y, paths|
98
+ paths.size >= 2 ? y : nil
99
+ }.compact.sort
100
+ end
101
+
102
+ def outer_x_candidates
103
+ xes = x_with_two_paths
104
+ if xes.size >= 2
105
+ [xes.first, xes.last]
106
+ else
107
+ xes
108
+ end
109
+ end
110
+
111
+ def outer_y_candidates
112
+ yes = y_with_two_paths
113
+ if yes.size >= 2
114
+ [yes.first, yes.last]
115
+ else
116
+ yes
117
+ end
118
+ end
119
+
120
+ class Point
121
+ attr_reader :x, :y
122
+
123
+ def initialize(x, y)
124
+ @x = BigDecimal.new(x.to_s).round(1)
125
+ @y = BigDecimal.new(y.to_s).round(1)
126
+ end
127
+ end
128
+
129
+ class Path
130
+
131
+ def initialize
132
+ @points = []
133
+ end
134
+
135
+ def start
136
+ @points.first
137
+ end
138
+
139
+ def add_point(x, y)
140
+ @points << Point.new(x, y)
141
+ end
142
+
143
+ def horizontal?
144
+ @points.size == 2 && y_points.uniq.size == 1
145
+ end
146
+
147
+ def vertical?
148
+ @points.size == 2 && x_points.uniq.size == 1
149
+ end
150
+
151
+ private
152
+
153
+ def x_points
154
+ @points.map { |point| point.x}
155
+ end
156
+
157
+ def y_points
158
+ @points.map { |point| point.y}
159
+ end
160
+ end
161
+ end
162
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdf-trim_detector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - James Healy
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-12 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: pdf-reader
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.1.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.1.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '2.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rdoc
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: Attempt to detect the region of a PDF page indicated by trim marks
79
+ email:
80
+ - james@yob.id.au
81
+ executables: []
82
+ extensions: []
83
+ extra_rdoc_files: []
84
+ files:
85
+ - lib/pdf/trim_detector.rb
86
+ - README.markdown
87
+ - CHANGELOG
88
+ homepage:
89
+ licenses: []
90
+ post_install_message:
91
+ rdoc_options:
92
+ - --title
93
+ - PDF::TrimDetector
94
+ - --line-numbers
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ! '>='
101
+ - !ruby/object:Gem::Version
102
+ version: 1.8.7
103
+ required_rubygems_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ! '>='
107
+ - !ruby/object:Gem::Version
108
+ version: 1.3.2
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 1.8.23
112
+ signing_key:
113
+ specification_version: 3
114
+ summary: Attempt to detect the region of a PDF page indicated by trim marks
115
+ test_files: []