podoff 0.0.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.txt ADDED
@@ -0,0 +1,13 @@
1
+
2
+ = podoff CHANGELOG.txt
3
+
4
+
5
+ == podoff 0.9.0 released 2015-10-21
6
+
7
+ - beta release
8
+
9
+
10
+ == podoff 0.0.1 released 2015-10-20
11
+
12
+ - initial, empty, release
13
+
data/README.md CHANGED
@@ -1,6 +1,19 @@
1
1
 
2
2
  # podoff
3
3
 
4
+ A Ruby tool to deface PDF documents.
5
+
6
+ If you're looking for serious libraries, look at
7
+
8
+ * https://github.com/payrollhero/pdf_tempura
9
+ * https://github.com/prawnpdf/prawn-templates
10
+
11
+
12
+ ## disclaimer
13
+
14
+ The author of this tool/library have no link whatsoever with the authors of the sample PDF documents found under `pdfs/`. Those documents have been selected because they are representative of the PDF forms podoff is meant to ~~deface~~fill.
15
+
16
+
4
17
  ## LICENSE
5
18
 
6
19
  MIT, see [LICENSE.txt](LICENSE.txt)
data/lib/podoff.rb CHANGED
@@ -26,6 +26,251 @@
26
26
 
27
27
  module Podoff
28
28
 
29
- VERSION = '0.0.1'
29
+ VERSION = '0.9.0'
30
+
31
+ def self.load(path)
32
+
33
+ Podoff::Document.new(
34
+ File.open(path, 'r:iso8859-1') { |f| f.read })
35
+ end
36
+
37
+ class Document
38
+
39
+ attr_reader :header
40
+ attr_reader :objs
41
+ attr_reader :footer
42
+
43
+ def initialize(s)
44
+
45
+ fail ArgumentError.new('not a PDF file') \
46
+ unless s.match(/\A%PDF-\d+\.\d+\n/)
47
+
48
+ @header = []
49
+ #
50
+ @objs = {}
51
+ cur = nil
52
+ #
53
+ @footer = nil
54
+
55
+ s.split("\n").each do |l|
56
+
57
+ if @footer
58
+ @footer << l
59
+ elsif m = /^(\d+ \d+) obj\b/.match(l)
60
+ cur = (@objs[m[1]] = Obj.new(self, m[1]))
61
+ cur << l
62
+ elsif m = /^xref\b/.match(l)
63
+ @footer = []
64
+ @footer << l
65
+ elsif cur
66
+ cur << l
67
+ else
68
+ @header << l
69
+ end
70
+ end
71
+ end
72
+
73
+ def fonts; @objs.values.select(&:is_font?); end
74
+ def pages; @objs.values.select(&:is_page?); end
75
+
76
+ def page(i)
77
+
78
+ i < 1 ? nil : @objs.values.find { |o| o.page_number == i }
79
+ end
80
+
81
+ def dup
82
+
83
+ d0 = self
84
+
85
+ d = d0.class.allocate
86
+
87
+ d.instance_eval do
88
+ @header = d0.header.dup
89
+ @footer = d0.footer.dup
90
+ @objs = d0.objs.values.inject({}) { |h, v| h[v.ref] = v.dup(d); h }
91
+ end
92
+
93
+ d
94
+ end
95
+
96
+ def write(path)
97
+
98
+ File.open(path, 'wb') do |f|
99
+
100
+ @header.each { |l| f.print(l); f.print("\n") }
101
+
102
+ @objs.values.each do |o|
103
+ o.lines.each { |l| f.print(l); f.print("\n") }
104
+ end
105
+
106
+ @footer.each { |l| f.print(l); f.print("\n") }
107
+ end
108
+ end
109
+ end
110
+
111
+ class Obj
112
+
113
+ attr_reader :document
114
+ attr_reader :ref
115
+ attr_reader :lines
116
+
117
+ def initialize(doc, ref)
118
+
119
+ @document = doc
120
+ @ref = ref
121
+ @lines = []
122
+ end
123
+
124
+ def <<(l)
125
+
126
+ @lines << l
127
+ end
128
+
129
+ def lookup(k)
130
+
131
+ @lines.each do |l|
132
+
133
+ m = l.match(/^\/#{k} (.*)$/)
134
+ return m[1] if m
135
+ end
136
+
137
+ nil
138
+ end
139
+
140
+ def index(o, start=0)
141
+
142
+ @lines[start..-1].each_with_index do |l, i|
143
+
144
+ if o.is_a?(String)
145
+ return start + i if l == o
146
+ else
147
+ return start + i if l.match(o)
148
+ end
149
+ end
150
+
151
+ nil
152
+ end
153
+
154
+ def type
155
+
156
+ t = lookup('Type')
157
+ t ? t[1..-1] : nil
158
+ end
159
+
160
+ def page_number
161
+
162
+ r = lookup('pdftk_PageNum')
163
+ r ? r.to_i : nil
164
+ end
165
+
166
+ def is_page?
167
+
168
+ page_number != nil
169
+ end
170
+
171
+ def is_font?
172
+
173
+ type() == 'Font'
174
+ end
175
+
176
+ def parent
177
+
178
+ # /Parent 2 0 R
179
+
180
+ r = lookup('Parent')
181
+
182
+ r ? r[0..-2].strip : nil
183
+ end
184
+
185
+ def kids
186
+
187
+ # /Kids [1 0 R 16 0 R 33 0 R]
188
+
189
+ r = lookup('Kids')
190
+ (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
191
+ end
192
+
193
+ def contents
194
+
195
+ r = lookup('Contents')
196
+ r ? r[0..-2].strip : nil
197
+ end
198
+
199
+ def font_names
200
+
201
+ @lines.inject(nil) do |names, l|
202
+
203
+ if names
204
+ return names if l == '>>'
205
+ if m = l.match(/\/([^ ]+) /); names << m[1]; end
206
+ elsif l.match(/\/Font\s*$/)
207
+ names = []
208
+ end
209
+
210
+ names
211
+ end
212
+
213
+ []
214
+ end
215
+
216
+ def dup(new_doc)
217
+
218
+ o0 = self
219
+ o = o0.class.new(new_doc, @ref)
220
+ o.instance_eval { @lines = o0.lines.dup }
221
+
222
+ o
223
+ end
224
+
225
+ def find(opts={}, &block)
226
+
227
+ return self if block.call(self)
228
+
229
+ [ *kids, contents ].compact.each do |k|
230
+ o = @document.objs[k]
231
+ return o if o && block.call(o)
232
+ end
233
+
234
+ nil
235
+ end
236
+
237
+ def crop_box
238
+
239
+ r = lookup('CropBox') || lookup('MediaBox')
240
+
241
+ r ? r.strip[1..-2].split(' ').collect(&:strip).collect(&:to_f) : nil
242
+ end
243
+
244
+ def crop_dims
245
+
246
+ x, y, w, h = crop_box
247
+
248
+ x ? [ w - x, h - y ] : nil
249
+ end
250
+
251
+ def prepend_text(x, y, text, opts={})
252
+
253
+ o = find { |o| o.index('BT') }
254
+ fail ArgumentError.new('found no BT in the tree') unless o
255
+
256
+ font = opts[:font] || o.font_names.first || 'TT0'
257
+ size = opts[:size] || 10
258
+ comm = opts[:comment]
259
+
260
+ i = o.index('BT')
261
+ bt = []
262
+ bt << 'BT'
263
+ bt << "#{x} #{y} Td"
264
+ bt << "/#{font} #{size} Tf"
265
+ bt << "(#{text})Tj"
266
+ bt << 'ET'
267
+ bt << " % #{comm}" if comm
268
+ bt = bt.join(' ')
269
+
270
+ o.lines.insert(i, bt)
271
+
272
+ o
273
+ end
274
+ end
30
275
  end
31
276
 
data/spec/core_spec.rb ADDED
@@ -0,0 +1,30 @@
1
+
2
+ #
3
+ # specifying podoff
4
+ #
5
+ # Tue Oct 20 13:11:38 JST 2015
6
+ #
7
+
8
+ require 'spec_helper'
9
+
10
+
11
+ describe Podoff do
12
+
13
+ describe '.load' do
14
+
15
+ it 'loads a PDF document' do
16
+
17
+ d = Podoff.load('pdfs/udocument0.pdf')
18
+
19
+ expect(d.class).to eq(Podoff::Document)
20
+ end
21
+
22
+ it 'rejects items that are not PDF documents' do
23
+
24
+ expect {
25
+ Podoff.load('spec/spec_helper.rb')
26
+ }.to raise_error(ArgumentError, 'not a PDF file')
27
+ end
28
+ end
29
+ end
30
+
@@ -0,0 +1,95 @@
1
+
2
+ #
3
+ # specifying podoff
4
+ #
5
+ # Tue Oct 20 13:30:16 JST 2015
6
+ #
7
+
8
+ require 'spec_helper'
9
+
10
+
11
+ describe Podoff::Document do
12
+
13
+ before :all do
14
+
15
+ @d = Podoff.load('pdfs/udocument0.pdf')
16
+ end
17
+
18
+ describe '#objs' do
19
+
20
+ it 'returns a hash of PDF "obj"' do
21
+
22
+ expect(@d.objs.class).to eq(Hash)
23
+ expect(@d.objs.values.first.class).to eq(Podoff::Obj)
24
+ expect(@d.objs.size).to eq(273)
25
+ end
26
+ end
27
+
28
+ describe '#pages' do
29
+
30
+ it 'returns the pages' do
31
+
32
+ expect(@d.pages.size).to eq(3)
33
+ expect(@d.pages.first.class).to eq(Podoff::Obj)
34
+ end
35
+ end
36
+
37
+ describe '#page' do
38
+
39
+ it 'returns a page given an index (starts at 1)' do
40
+
41
+ p = @d.page(1)
42
+
43
+ expect(p.class).to eq(Podoff::Obj)
44
+ expect(p.type).to eq('Page')
45
+ end
46
+
47
+ it 'returns nil if the page doesn\'t exist' do
48
+
49
+ expect(@d.page(0)).to eq(nil)
50
+ expect(@d.page(9)).to eq(nil)
51
+ end
52
+ end
53
+
54
+ describe '#fonts' do
55
+
56
+ it 'returns the font obj' do
57
+
58
+ expect(@d.fonts.size).to eq(35)
59
+ expect(@d.fonts.first.class).to eq(Podoff::Obj)
60
+ end
61
+ end
62
+
63
+ describe '#write' do
64
+
65
+ it 'writes the document to a given path' do
66
+
67
+ @d.write('tmp/out.pdf')
68
+
69
+ s = File.open('tmp/out.pdf', 'r:iso8859-1') { |f| f.read }
70
+ lines = s.split("\n")
71
+
72
+ expect(lines.first).to match(/^%PDF-1.7$/)
73
+ expect(lines.last).to match(/^%%EOF$/)
74
+ end
75
+ end
76
+
77
+ describe '#dup' do
78
+
79
+ it 'produces a shallow copy of the document' do
80
+
81
+ d = @d.dup
82
+
83
+ expect(d.class
84
+ ).to eq(Podoff::Document)
85
+ expect(d.objs.hash
86
+ ).not_to eq(@d.objs.hash)
87
+ expect(d.objs.values.first.hash
88
+ ).not_to eq(@d.objs.values.first.hash)
89
+
90
+ expect(d.objs.values.first.document).to eq(d)
91
+ expect(@d.objs.values.first.document).to eq(@d)
92
+ end
93
+ end
94
+ end
95
+
data/spec/obj_spec.rb ADDED
@@ -0,0 +1,183 @@
1
+
2
+ #
3
+ # specifying podoff
4
+ #
5
+ # Tue Oct 20 15:08:59 JST 2015
6
+ #
7
+
8
+ require 'spec_helper'
9
+
10
+
11
+ describe Podoff::Obj do
12
+
13
+ before :all do
14
+
15
+ @d = Podoff.load('pdfs/udocument0.pdf')
16
+ end
17
+
18
+ describe '#document' do
19
+
20
+ it 'points to the Podoff::Document owning this Obj' do
21
+
22
+ expect(@d.objs.values.first.document).to eq(@d)
23
+ end
24
+ end
25
+
26
+ describe '#type' do
27
+
28
+ it 'returns the type of the obj' do
29
+
30
+ expect(@d.objs['23 0'].type).to eq('Font')
31
+ end
32
+
33
+ it 'returns nil if there is no type' do
34
+
35
+ expect(@d.objs['17 0'].type).to eq(nil)
36
+ end
37
+ end
38
+
39
+ describe '#parent' do
40
+
41
+ it 'returns the parent ref if any' do
42
+
43
+ expect(@d.objs.values.first.parent).to eq('2 0')
44
+ end
45
+
46
+ it 'returns nil if there is no parent' do
47
+
48
+ expect(@d.objs['2 0'].parent).to eq(nil)
49
+ end
50
+ end
51
+
52
+ describe '#kids' do
53
+
54
+ it 'returns a list of refs' do
55
+
56
+ expect(@d.objs['2 0'].kids).to eq([ '1 0', '16 0', '33 0' ])
57
+ end
58
+
59
+ it 'returns an empty list if there are no kids' do
60
+
61
+ expect(@d.objs['224 0'].kids).to eq([])
62
+ end
63
+ end
64
+
65
+ describe '#contents' do
66
+
67
+ it 'returns the Contents reference' do
68
+
69
+ expect(@d.objs['1 0'].contents).to eq('3 0')
70
+ end
71
+
72
+ it 'returns nil if none' do
73
+
74
+ expect(@d.objs['224 0'].contents).to eq(nil)
75
+ end
76
+ end
77
+
78
+ describe '#font_names' do
79
+
80
+ it 'returns a list of font names visible in this obj' do
81
+
82
+ expect(
83
+ @d.objs.values.first.font_names
84
+ ).to eq(%w[
85
+ C2_0 TT2 TT1 TT0 C2_2 C2_1 Helv
86
+ ])
87
+ end
88
+ end
89
+
90
+ describe '#index' do
91
+
92
+ it 'returns the for a given line' do
93
+
94
+ o = @d.objs['3 0']
95
+
96
+ expect(o.index('stream')).to eq(4)
97
+ expect(o.index('BT')).to eq(14)
98
+ end
99
+
100
+ it 'returns nil when it doesn\'t find' do
101
+
102
+ o = @d.objs['3 0']
103
+
104
+ expect(o.index('nada')).to eq(nil)
105
+ end
106
+
107
+ it 'accepts regexes' do
108
+
109
+ o = @d.objs['1 0']
110
+
111
+ i = o.index(/^\/B.+Box /)
112
+
113
+ expect(i).to eq(40)
114
+ expect(o.lines[i]).to eq('/BleedBox [0.0 0.0 612.0 792.0]')
115
+ end
116
+
117
+ it 'accepts a start index' do
118
+
119
+ o = @d.objs['1 0']
120
+
121
+ i = o.index(/^\/.+Box /, 3)
122
+
123
+ expect(i).to eq(5)
124
+ expect(o.lines[i]).to eq('/TrimBox [0.0 0.0 612.0 792.0]')
125
+ end
126
+ end
127
+
128
+ describe '#find' do
129
+
130
+ it 'returns the first sub obj that matches the given block' do
131
+
132
+ o = @d.objs['1 0']
133
+
134
+ o1 = o.find { |o| o.index('stream') }
135
+
136
+ expect(o1).not_to eq(nil)
137
+ expect(o1.lines.first).to eq('3 0 obj ')
138
+ end
139
+
140
+ it 'accept a :skip_root option'
141
+ end
142
+
143
+ describe '#gather' do
144
+
145
+ it 'returns a list of sub obj that match the given block'
146
+ it 'accept a :skip_root option'
147
+ end
148
+
149
+ describe '#crop_box' do
150
+
151
+ it 'returns the [ x, y, w, h ] box for the obj' do
152
+
153
+ o = @d.objs['1 0']
154
+
155
+ expect(o.crop_box).to eq([ 0.0, 0.0, 612.0, 792.0 ])
156
+ end
157
+
158
+ it 'defaults to the MediaBox' do
159
+
160
+ o = @d.objs['16 0']
161
+
162
+ expect(o.crop_box).to eq([ 0.0, 0.0, 612.0, 792.0 ])
163
+ end
164
+ end
165
+
166
+ describe '#crop_dims' do
167
+
168
+ it 'returns [ w, h ]' do
169
+
170
+ o = @d.objs['1 0']
171
+
172
+ expect(o.crop_dims).to eq([ 612.0, 792.0 ])
173
+ end
174
+
175
+ it 'defaults to the MediaBox' do
176
+
177
+ o = @d.objs['16 0']
178
+
179
+ expect(o.crop_dims).to eq([ 612.0, 792.0 ])
180
+ end
181
+ end
182
+ end
183
+
@@ -0,0 +1,11 @@
1
+
2
+ #
3
+ # Specifying podoff
4
+ #
5
+ # Tue Oct 20 13:10:29 JST 2015
6
+ #
7
+
8
+ require 'pp'
9
+
10
+ require 'podoff'
11
+
@@ -0,0 +1,36 @@
1
+
2
+ #
3
+ # specifying podoff
4
+ #
5
+ # Tue Oct 20 17:25:49 JST 2015
6
+ #
7
+
8
+ require 'spec_helper'
9
+
10
+
11
+ describe Podoff do
12
+
13
+ before :all do
14
+
15
+ @d = Podoff.load('pdfs/udocument0.pdf')
16
+ end
17
+
18
+ describe 'Obj.prepend_text' do
19
+
20
+ it 'adds text at the beginning of an obj' do
21
+
22
+ d = @d.dup
23
+
24
+ p1 = d.page(1)
25
+ p1.prepend_text(0, 500, 'hello world! xxx', size: 35, font: 'Helv')
26
+ #p1.prepend_text(0, 450, '"stuff NADA"', size: 35, font: 'Helv')
27
+
28
+ d.write('tmp/out.pdf')
29
+
30
+ s = `grep -a "hello world!" tmp/out.pdf`.strip
31
+
32
+ expect(s).to eq('BT 0 500 Td /Helv 35 Tf (hello world! xxx)Tj ET')
33
+ end
34
+ end
35
+ end
36
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: podoff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.9.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-10-19 00:00:00.000000000 Z
12
+ date: 2015-10-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -52,7 +52,13 @@ extra_rdoc_files: []
52
52
  files:
53
53
  - Rakefile
54
54
  - lib/podoff.rb
55
+ - spec/core_spec.rb
56
+ - spec/document_spec.rb
57
+ - spec/obj_spec.rb
58
+ - spec/spec_helper.rb
59
+ - spec/update_spec.rb
55
60
  - podoff.gemspec
61
+ - CHANGELOG.txt
56
62
  - LICENSE.txt
57
63
  - README.md
58
64
  homepage: http://github.com/jmettraux/podoff