podoff 0.0.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.txt ADDED
@@ -0,0 +1,13 @@
1
+
2
+ = podoff CHANGELOG.txt
3
+
4
+
5
+ == podoff 0.9.0 released 2015-10-21
6
+
7
+ - beta release
8
+
9
+
10
+ == podoff 0.0.1 released 2015-10-20
11
+
12
+ - initial, empty, release
13
+
data/README.md CHANGED
@@ -1,6 +1,19 @@
1
1
 
2
2
  # podoff
3
3
 
4
+ A Ruby tool to deface PDF documents.
5
+
6
+ If you're looking for serious libraries, look at
7
+
8
+ * https://github.com/payrollhero/pdf_tempura
9
+ * https://github.com/prawnpdf/prawn-templates
10
+
11
+
12
+ ## disclaimer
13
+
14
+ The author of this tool/library have no link whatsoever with the authors of the sample PDF documents found under `pdfs/`. Those documents have been selected because they are representative of the PDF forms podoff is meant to ~~deface~~fill.
15
+
16
+
4
17
  ## LICENSE
5
18
 
6
19
  MIT, see [LICENSE.txt](LICENSE.txt)
data/lib/podoff.rb CHANGED
@@ -26,6 +26,251 @@
26
26
 
27
27
  module Podoff
28
28
 
29
- VERSION = '0.0.1'
29
+ VERSION = '0.9.0'
30
+
31
+ def self.load(path)
32
+
33
+ Podoff::Document.new(
34
+ File.open(path, 'r:iso8859-1') { |f| f.read })
35
+ end
36
+
37
+ class Document
38
+
39
+ attr_reader :header
40
+ attr_reader :objs
41
+ attr_reader :footer
42
+
43
+ def initialize(s)
44
+
45
+ fail ArgumentError.new('not a PDF file') \
46
+ unless s.match(/\A%PDF-\d+\.\d+\n/)
47
+
48
+ @header = []
49
+ #
50
+ @objs = {}
51
+ cur = nil
52
+ #
53
+ @footer = nil
54
+
55
+ s.split("\n").each do |l|
56
+
57
+ if @footer
58
+ @footer << l
59
+ elsif m = /^(\d+ \d+) obj\b/.match(l)
60
+ cur = (@objs[m[1]] = Obj.new(self, m[1]))
61
+ cur << l
62
+ elsif m = /^xref\b/.match(l)
63
+ @footer = []
64
+ @footer << l
65
+ elsif cur
66
+ cur << l
67
+ else
68
+ @header << l
69
+ end
70
+ end
71
+ end
72
+
73
+ def fonts; @objs.values.select(&:is_font?); end
74
+ def pages; @objs.values.select(&:is_page?); end
75
+
76
+ def page(i)
77
+
78
+ i < 1 ? nil : @objs.values.find { |o| o.page_number == i }
79
+ end
80
+
81
+ def dup
82
+
83
+ d0 = self
84
+
85
+ d = d0.class.allocate
86
+
87
+ d.instance_eval do
88
+ @header = d0.header.dup
89
+ @footer = d0.footer.dup
90
+ @objs = d0.objs.values.inject({}) { |h, v| h[v.ref] = v.dup(d); h }
91
+ end
92
+
93
+ d
94
+ end
95
+
96
+ def write(path)
97
+
98
+ File.open(path, 'wb') do |f|
99
+
100
+ @header.each { |l| f.print(l); f.print("\n") }
101
+
102
+ @objs.values.each do |o|
103
+ o.lines.each { |l| f.print(l); f.print("\n") }
104
+ end
105
+
106
+ @footer.each { |l| f.print(l); f.print("\n") }
107
+ end
108
+ end
109
+ end
110
+
111
+ class Obj
112
+
113
+ attr_reader :document
114
+ attr_reader :ref
115
+ attr_reader :lines
116
+
117
+ def initialize(doc, ref)
118
+
119
+ @document = doc
120
+ @ref = ref
121
+ @lines = []
122
+ end
123
+
124
+ def <<(l)
125
+
126
+ @lines << l
127
+ end
128
+
129
+ def lookup(k)
130
+
131
+ @lines.each do |l|
132
+
133
+ m = l.match(/^\/#{k} (.*)$/)
134
+ return m[1] if m
135
+ end
136
+
137
+ nil
138
+ end
139
+
140
+ def index(o, start=0)
141
+
142
+ @lines[start..-1].each_with_index do |l, i|
143
+
144
+ if o.is_a?(String)
145
+ return start + i if l == o
146
+ else
147
+ return start + i if l.match(o)
148
+ end
149
+ end
150
+
151
+ nil
152
+ end
153
+
154
+ def type
155
+
156
+ t = lookup('Type')
157
+ t ? t[1..-1] : nil
158
+ end
159
+
160
+ def page_number
161
+
162
+ r = lookup('pdftk_PageNum')
163
+ r ? r.to_i : nil
164
+ end
165
+
166
+ def is_page?
167
+
168
+ page_number != nil
169
+ end
170
+
171
+ def is_font?
172
+
173
+ type() == 'Font'
174
+ end
175
+
176
+ def parent
177
+
178
+ # /Parent 2 0 R
179
+
180
+ r = lookup('Parent')
181
+
182
+ r ? r[0..-2].strip : nil
183
+ end
184
+
185
+ def kids
186
+
187
+ # /Kids [1 0 R 16 0 R 33 0 R]
188
+
189
+ r = lookup('Kids')
190
+ (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
191
+ end
192
+
193
+ def contents
194
+
195
+ r = lookup('Contents')
196
+ r ? r[0..-2].strip : nil
197
+ end
198
+
199
+ def font_names
200
+
201
+ @lines.inject(nil) do |names, l|
202
+
203
+ if names
204
+ return names if l == '>>'
205
+ if m = l.match(/\/([^ ]+) /); names << m[1]; end
206
+ elsif l.match(/\/Font\s*$/)
207
+ names = []
208
+ end
209
+
210
+ names
211
+ end
212
+
213
+ []
214
+ end
215
+
216
+ def dup(new_doc)
217
+
218
+ o0 = self
219
+ o = o0.class.new(new_doc, @ref)
220
+ o.instance_eval { @lines = o0.lines.dup }
221
+
222
+ o
223
+ end
224
+
225
+ def find(opts={}, &block)
226
+
227
+ return self if block.call(self)
228
+
229
+ [ *kids, contents ].compact.each do |k|
230
+ o = @document.objs[k]
231
+ return o if o && block.call(o)
232
+ end
233
+
234
+ nil
235
+ end
236
+
237
+ def crop_box
238
+
239
+ r = lookup('CropBox') || lookup('MediaBox')
240
+
241
+ r ? r.strip[1..-2].split(' ').collect(&:strip).collect(&:to_f) : nil
242
+ end
243
+
244
+ def crop_dims
245
+
246
+ x, y, w, h = crop_box
247
+
248
+ x ? [ w - x, h - y ] : nil
249
+ end
250
+
251
+ def prepend_text(x, y, text, opts={})
252
+
253
+ o = find { |o| o.index('BT') }
254
+ fail ArgumentError.new('found no BT in the tree') unless o
255
+
256
+ font = opts[:font] || o.font_names.first || 'TT0'
257
+ size = opts[:size] || 10
258
+ comm = opts[:comment]
259
+
260
+ i = o.index('BT')
261
+ bt = []
262
+ bt << 'BT'
263
+ bt << "#{x} #{y} Td"
264
+ bt << "/#{font} #{size} Tf"
265
+ bt << "(#{text})Tj"
266
+ bt << 'ET'
267
+ bt << " % #{comm}" if comm
268
+ bt = bt.join(' ')
269
+
270
+ o.lines.insert(i, bt)
271
+
272
+ o
273
+ end
274
+ end
30
275
  end
31
276
 
data/spec/core_spec.rb ADDED
@@ -0,0 +1,30 @@
1
+
2
+ #
3
+ # specifying podoff
4
+ #
5
+ # Tue Oct 20 13:11:38 JST 2015
6
+ #
7
+
8
+ require 'spec_helper'
9
+
10
+
11
+ describe Podoff do
12
+
13
+ describe '.load' do
14
+
15
+ it 'loads a PDF document' do
16
+
17
+ d = Podoff.load('pdfs/udocument0.pdf')
18
+
19
+ expect(d.class).to eq(Podoff::Document)
20
+ end
21
+
22
+ it 'rejects items that are not PDF documents' do
23
+
24
+ expect {
25
+ Podoff.load('spec/spec_helper.rb')
26
+ }.to raise_error(ArgumentError, 'not a PDF file')
27
+ end
28
+ end
29
+ end
30
+
@@ -0,0 +1,95 @@
1
+
2
+ #
3
+ # specifying podoff
4
+ #
5
+ # Tue Oct 20 13:30:16 JST 2015
6
+ #
7
+
8
+ require 'spec_helper'
9
+
10
+
11
+ describe Podoff::Document do
12
+
13
+ before :all do
14
+
15
+ @d = Podoff.load('pdfs/udocument0.pdf')
16
+ end
17
+
18
+ describe '#objs' do
19
+
20
+ it 'returns a hash of PDF "obj"' do
21
+
22
+ expect(@d.objs.class).to eq(Hash)
23
+ expect(@d.objs.values.first.class).to eq(Podoff::Obj)
24
+ expect(@d.objs.size).to eq(273)
25
+ end
26
+ end
27
+
28
+ describe '#pages' do
29
+
30
+ it 'returns the pages' do
31
+
32
+ expect(@d.pages.size).to eq(3)
33
+ expect(@d.pages.first.class).to eq(Podoff::Obj)
34
+ end
35
+ end
36
+
37
+ describe '#page' do
38
+
39
+ it 'returns a page given an index (starts at 1)' do
40
+
41
+ p = @d.page(1)
42
+
43
+ expect(p.class).to eq(Podoff::Obj)
44
+ expect(p.type).to eq('Page')
45
+ end
46
+
47
+ it 'returns nil if the page doesn\'t exist' do
48
+
49
+ expect(@d.page(0)).to eq(nil)
50
+ expect(@d.page(9)).to eq(nil)
51
+ end
52
+ end
53
+
54
+ describe '#fonts' do
55
+
56
+ it 'returns the font obj' do
57
+
58
+ expect(@d.fonts.size).to eq(35)
59
+ expect(@d.fonts.first.class).to eq(Podoff::Obj)
60
+ end
61
+ end
62
+
63
+ describe '#write' do
64
+
65
+ it 'writes the document to a given path' do
66
+
67
+ @d.write('tmp/out.pdf')
68
+
69
+ s = File.open('tmp/out.pdf', 'r:iso8859-1') { |f| f.read }
70
+ lines = s.split("\n")
71
+
72
+ expect(lines.first).to match(/^%PDF-1.7$/)
73
+ expect(lines.last).to match(/^%%EOF$/)
74
+ end
75
+ end
76
+
77
+ describe '#dup' do
78
+
79
+ it 'produces a shallow copy of the document' do
80
+
81
+ d = @d.dup
82
+
83
+ expect(d.class
84
+ ).to eq(Podoff::Document)
85
+ expect(d.objs.hash
86
+ ).not_to eq(@d.objs.hash)
87
+ expect(d.objs.values.first.hash
88
+ ).not_to eq(@d.objs.values.first.hash)
89
+
90
+ expect(d.objs.values.first.document).to eq(d)
91
+ expect(@d.objs.values.first.document).to eq(@d)
92
+ end
93
+ end
94
+ end
95
+
data/spec/obj_spec.rb ADDED
@@ -0,0 +1,183 @@
1
+
2
+ #
3
+ # specifying podoff
4
+ #
5
+ # Tue Oct 20 15:08:59 JST 2015
6
+ #
7
+
8
+ require 'spec_helper'
9
+
10
+
11
+ describe Podoff::Obj do
12
+
13
+ before :all do
14
+
15
+ @d = Podoff.load('pdfs/udocument0.pdf')
16
+ end
17
+
18
+ describe '#document' do
19
+
20
+ it 'points to the Podoff::Document owning this Obj' do
21
+
22
+ expect(@d.objs.values.first.document).to eq(@d)
23
+ end
24
+ end
25
+
26
+ describe '#type' do
27
+
28
+ it 'returns the type of the obj' do
29
+
30
+ expect(@d.objs['23 0'].type).to eq('Font')
31
+ end
32
+
33
+ it 'returns nil if there is no type' do
34
+
35
+ expect(@d.objs['17 0'].type).to eq(nil)
36
+ end
37
+ end
38
+
39
+ describe '#parent' do
40
+
41
+ it 'returns the parent ref if any' do
42
+
43
+ expect(@d.objs.values.first.parent).to eq('2 0')
44
+ end
45
+
46
+ it 'returns nil if there is no parent' do
47
+
48
+ expect(@d.objs['2 0'].parent).to eq(nil)
49
+ end
50
+ end
51
+
52
+ describe '#kids' do
53
+
54
+ it 'returns a list of refs' do
55
+
56
+ expect(@d.objs['2 0'].kids).to eq([ '1 0', '16 0', '33 0' ])
57
+ end
58
+
59
+ it 'returns an empty list if there are no kids' do
60
+
61
+ expect(@d.objs['224 0'].kids).to eq([])
62
+ end
63
+ end
64
+
65
+ describe '#contents' do
66
+
67
+ it 'returns the Contents reference' do
68
+
69
+ expect(@d.objs['1 0'].contents).to eq('3 0')
70
+ end
71
+
72
+ it 'returns nil if none' do
73
+
74
+ expect(@d.objs['224 0'].contents).to eq(nil)
75
+ end
76
+ end
77
+
78
+ describe '#font_names' do
79
+
80
+ it 'returns a list of font names visible in this obj' do
81
+
82
+ expect(
83
+ @d.objs.values.first.font_names
84
+ ).to eq(%w[
85
+ C2_0 TT2 TT1 TT0 C2_2 C2_1 Helv
86
+ ])
87
+ end
88
+ end
89
+
90
+ describe '#index' do
91
+
92
+ it 'returns the for a given line' do
93
+
94
+ o = @d.objs['3 0']
95
+
96
+ expect(o.index('stream')).to eq(4)
97
+ expect(o.index('BT')).to eq(14)
98
+ end
99
+
100
+ it 'returns nil when it doesn\'t find' do
101
+
102
+ o = @d.objs['3 0']
103
+
104
+ expect(o.index('nada')).to eq(nil)
105
+ end
106
+
107
+ it 'accepts regexes' do
108
+
109
+ o = @d.objs['1 0']
110
+
111
+ i = o.index(/^\/B.+Box /)
112
+
113
+ expect(i).to eq(40)
114
+ expect(o.lines[i]).to eq('/BleedBox [0.0 0.0 612.0 792.0]')
115
+ end
116
+
117
+ it 'accepts a start index' do
118
+
119
+ o = @d.objs['1 0']
120
+
121
+ i = o.index(/^\/.+Box /, 3)
122
+
123
+ expect(i).to eq(5)
124
+ expect(o.lines[i]).to eq('/TrimBox [0.0 0.0 612.0 792.0]')
125
+ end
126
+ end
127
+
128
+ describe '#find' do
129
+
130
+ it 'returns the first sub obj that matches the given block' do
131
+
132
+ o = @d.objs['1 0']
133
+
134
+ o1 = o.find { |o| o.index('stream') }
135
+
136
+ expect(o1).not_to eq(nil)
137
+ expect(o1.lines.first).to eq('3 0 obj ')
138
+ end
139
+
140
+ it 'accept a :skip_root option'
141
+ end
142
+
143
+ describe '#gather' do
144
+
145
+ it 'returns a list of sub obj that match the given block'
146
+ it 'accept a :skip_root option'
147
+ end
148
+
149
+ describe '#crop_box' do
150
+
151
+ it 'returns the [ x, y, w, h ] box for the obj' do
152
+
153
+ o = @d.objs['1 0']
154
+
155
+ expect(o.crop_box).to eq([ 0.0, 0.0, 612.0, 792.0 ])
156
+ end
157
+
158
+ it 'defaults to the MediaBox' do
159
+
160
+ o = @d.objs['16 0']
161
+
162
+ expect(o.crop_box).to eq([ 0.0, 0.0, 612.0, 792.0 ])
163
+ end
164
+ end
165
+
166
+ describe '#crop_dims' do
167
+
168
+ it 'returns [ w, h ]' do
169
+
170
+ o = @d.objs['1 0']
171
+
172
+ expect(o.crop_dims).to eq([ 612.0, 792.0 ])
173
+ end
174
+
175
+ it 'defaults to the MediaBox' do
176
+
177
+ o = @d.objs['16 0']
178
+
179
+ expect(o.crop_dims).to eq([ 612.0, 792.0 ])
180
+ end
181
+ end
182
+ end
183
+
@@ -0,0 +1,11 @@
1
+
2
+ #
3
+ # Specifying podoff
4
+ #
5
+ # Tue Oct 20 13:10:29 JST 2015
6
+ #
7
+
8
+ require 'pp'
9
+
10
+ require 'podoff'
11
+
@@ -0,0 +1,36 @@
1
+
2
+ #
3
+ # specifying podoff
4
+ #
5
+ # Tue Oct 20 17:25:49 JST 2015
6
+ #
7
+
8
+ require 'spec_helper'
9
+
10
+
11
+ describe Podoff do
12
+
13
+ before :all do
14
+
15
+ @d = Podoff.load('pdfs/udocument0.pdf')
16
+ end
17
+
18
+ describe 'Obj.prepend_text' do
19
+
20
+ it 'adds text at the beginning of an obj' do
21
+
22
+ d = @d.dup
23
+
24
+ p1 = d.page(1)
25
+ p1.prepend_text(0, 500, 'hello world! xxx', size: 35, font: 'Helv')
26
+ #p1.prepend_text(0, 450, '"stuff NADA"', size: 35, font: 'Helv')
27
+
28
+ d.write('tmp/out.pdf')
29
+
30
+ s = `grep -a "hello world!" tmp/out.pdf`.strip
31
+
32
+ expect(s).to eq('BT 0 500 Td /Helv 35 Tf (hello world! xxx)Tj ET')
33
+ end
34
+ end
35
+ end
36
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: podoff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.9.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-10-19 00:00:00.000000000 Z
12
+ date: 2015-10-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -52,7 +52,13 @@ extra_rdoc_files: []
52
52
  files:
53
53
  - Rakefile
54
54
  - lib/podoff.rb
55
+ - spec/core_spec.rb
56
+ - spec/document_spec.rb
57
+ - spec/obj_spec.rb
58
+ - spec/spec_helper.rb
59
+ - spec/update_spec.rb
55
60
  - podoff.gemspec
61
+ - CHANGELOG.txt
56
62
  - LICENSE.txt
57
63
  - README.md
58
64
  homepage: http://github.com/jmettraux/podoff