podoff 0.9.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.txt CHANGED
@@ -2,6 +2,16 @@
2
2
  = podoff CHANGELOG.txt
3
3
 
4
4
 
5
+ == podoff 1.0.0 released 2015-10-23
6
+
7
+ - leverage incremental updates
8
+
9
+
10
+ == podoff 0.9.1 not released
11
+
12
+ - ensure Obj#contents accepts arrays
13
+
14
+
5
15
  == podoff 0.9.0 released 2015-10-21
6
16
 
7
17
  - beta release
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
 
2
2
  # podoff
3
3
 
4
+ [![Build Status](https://secure.travis-ci.org/jmettraux/podoff.png)](http://travis-ci.org/jmettraux/podoff)
5
+ [![Gem Version](https://badge.fury.io/rb/podoff.png)](http://badge.fury.io/rb/podoff)
6
+
4
7
  A Ruby tool to deface PDF documents.
5
8
 
6
9
  If you're looking for serious libraries, look at
data/lib/podoff.rb CHANGED
@@ -26,250 +26,521 @@
26
26
 
27
27
  module Podoff
28
28
 
29
- VERSION = '0.9.0'
29
+ VERSION = '1.0.0'
30
30
 
31
- def self.load(path)
31
+ def self.load(path, encoding='iso-8859-1')
32
32
 
33
- Podoff::Document.new(
34
- File.open(path, 'r:iso8859-1') { |f| f.read })
33
+ Podoff::Document.load(path, encoding)
35
34
  end
36
35
 
36
+ def self.parse(s)
37
+
38
+ Podoff::Document.new(s)
39
+ end
40
+
41
+ #OBJ_ATTRIBUTES =
42
+ # { type: 'Type', subtype: 'Subtype',
43
+ # parent: 'Parent', kids: 'Kids', contents: 'Contents', annots: 'Annots',
44
+ # pagenum: 'pdftk_PageNum' }
45
+ OBJ_ATTRIBUTES =
46
+ { type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' }
47
+
37
48
  class Document
38
49
 
39
- attr_reader :header
50
+ def self.load(path, encoding='iso-8859-1')
51
+
52
+ Podoff::Document.new(File.open(path, 'r:' + encoding) { |f| f.read })
53
+ end
54
+
55
+ def self.parse(s)
56
+
57
+ Podoff::Document.new(s)
58
+ end
59
+
60
+ attr_reader :source
61
+ attr_reader :xref
40
62
  attr_reader :objs
41
- attr_reader :footer
63
+ attr_reader :obj_counters
64
+ attr_reader :root
65
+ #
66
+ attr_reader :additions
42
67
 
43
68
  def initialize(s)
44
69
 
45
70
  fail ArgumentError.new('not a PDF file') \
46
71
  unless s.match(/\A%PDF-\d+\.\d+\n/)
47
72
 
48
- @header = []
49
- #
73
+ @source = s
74
+ @xref = nil
50
75
  @objs = {}
51
- cur = nil
76
+ @obj_counters = {}
77
+ @root = nil
78
+
79
+ @additions = {}
80
+
81
+ index = 0
82
+ matches = {}
52
83
  #
53
- @footer = nil
54
-
55
- s.split("\n").each do |l|
56
-
57
- if @footer
58
- @footer << l
59
- elsif m = /^(\d+ \d+) obj\b/.match(l)
60
- cur = (@objs[m[1]] = Obj.new(self, m[1]))
61
- cur << l
62
- elsif m = /^xref\b/.match(l)
63
- @footer = []
64
- @footer << l
65
- elsif cur
66
- cur << l
84
+ loop do
85
+
86
+ matches[:obj] ||= s.match(/^(\d+ \d+) obj\b/, index)
87
+ matches[:endobj] ||= s.match(/\bendobj\b/, index)
88
+ #
89
+ OBJ_ATTRIBUTES.each do |k, v|
90
+ matches[k] ||= s.match(/\/#{v} (\/?[^\/\n<>]+)/, index)
91
+ end
92
+ #
93
+ matches[:startxref] ||= s.match(/\bstartxref\s+(\d+)\s*%%EOF/, index)
94
+
95
+ objm = matches[:obj]
96
+ sxrm = matches[:startxref]
97
+
98
+ break unless sxrm || objm
99
+
100
+ fail ArgumentError.new('failed to find "startxref"') unless sxrm
101
+
102
+ @root = nil if @root && index > @root.offset(0).last
103
+ @root ||= s.match(/\/Root (\d+ \d+) R\b/, index)
104
+
105
+ sxri = sxrm.offset(0).first
106
+ obji = objm ? objm.offset(0).first : sxri + 1
107
+
108
+ if obji < sxri
109
+ obj = Podoff::Obj.extract(self, matches)
110
+ @objs[obj.ref] = obj
111
+ @obj_counters[obj.ref] = (@obj_counters[obj.ref] || 0) + 1
112
+ index = obj.end_index + 1
67
113
  else
68
- @header << l
114
+ @xref = sxrm[1].to_i
115
+ index = sxrm.offset(0).last + 1
116
+ matches.delete(:startxref)
69
117
  end
70
118
  end
71
- end
72
119
 
73
- def fonts; @objs.values.select(&:is_font?); end
74
- def pages; @objs.values.select(&:is_page?); end
120
+ fail ArgumentError.new('found no /Root') unless @root
121
+ @root = @root[1]
122
+ end
75
123
 
76
- def page(i)
124
+ def updated?
77
125
 
78
- i < 1 ? nil : @objs.values.find { |o| o.page_number == i }
126
+ @additions.any?
79
127
  end
80
128
 
81
129
  def dup
82
130
 
83
- d0 = self
131
+ o = self
132
+
133
+ self.class.allocate.instance_eval do
84
134
 
85
- d = d0.class.allocate
135
+ @source = o.source
136
+ @xref = o.xref
86
137
 
87
- d.instance_eval do
88
- @header = d0.header.dup
89
- @footer = d0.footer.dup
90
- @objs = d0.objs.values.inject({}) { |h, v| h[v.ref] = v.dup(d); h }
138
+ @objs = o.objs.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }
139
+ @obj_counters = o.obj_counters.dup
140
+
141
+ @root = o.root
142
+
143
+ @additions =
144
+ o.additions.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }
145
+
146
+ self
91
147
  end
148
+ end
149
+
150
+ def pages
151
+
152
+ @objs.values.select { |o| o.type == '/Page' }
153
+ end
154
+
155
+ def page(index)
156
+
157
+ return nil if index == 0
158
+
159
+ pas = pages
160
+ return nil if pas.empty?
161
+
162
+ return (
163
+ index > 0 ? pas.at(index - 1) : pas.at(index)
164
+ ) unless pas.first.attributes[:pagenum]
165
+
166
+ if index < 0
167
+ max = pas.inject(0) { |n, pa| [ n, pa.page_number ].max }
168
+ index = max + 1 + index
169
+ end
170
+
171
+ pas.find { |pa| pa.page_number == index }
172
+ end
173
+
174
+ def new_ref
175
+
176
+ "#{
177
+ @objs.keys.inject(-1) { |i, r| [ i, r.split(' ').first.to_i ].max } + 1
178
+ } 0"
179
+ end
180
+
181
+ def add(obj)
182
+
183
+ @objs[obj.ref] = obj
184
+ @additions[obj.ref] = obj
185
+
186
+ obj
187
+ end
188
+
189
+ def add_base_font(name)
190
+
191
+ name = name[1..-1] if name[0] == '/'
192
+
193
+ ref = new_ref
194
+
195
+ add(
196
+ Obj.create(
197
+ self,
198
+ ref,
199
+ [
200
+ "#{ref} obj",
201
+ "<< /Type /Font /Subtype /Type1 /BaseFont /#{name} >>",
202
+ "endobj"
203
+ ].join(' ')))
204
+ end
205
+
206
+ def add_stream(s=nil, &block)
207
+
208
+ ref = new_ref
209
+
210
+ s = s || make_stream(&block)
211
+
212
+ s = [
213
+ "#{ref} obj",
214
+ "<< /Length #{s.length} >>",
215
+ "stream\n#{s}\nendstream",
216
+ "endobj"
217
+ ].join("\n") if s.is_a?(String)
218
+
219
+ o = add(Obj.create(self, ref, s))
220
+
221
+ s.is_a?(Podoff::Stream) ? s : o
222
+ end
223
+
224
+ def re_add(obj_or_ref)
225
+
226
+ obj = obj_or_ref.is_a?(String) ? @objs[obj_or_ref] : obj_or_ref
227
+
228
+ obj = obj.replicate unless obj.replica?
92
229
 
93
- d
230
+ add(obj)
94
231
  end
95
232
 
96
233
  def write(path)
97
234
 
98
- File.open(path, 'wb') do |f|
235
+ f = (path == :string) ? StringIO.new : File.open(path, 'wb')
99
236
 
100
- @header.each { |l| f.print(l); f.print("\n") }
237
+ f.write(@source)
101
238
 
102
- @objs.values.each do |o|
103
- o.lines.each { |l| f.print(l); f.print("\n") }
239
+ if @additions.any?
240
+
241
+ pointers = {}
242
+
243
+ @additions.values.each do |o|
244
+ f.write("\n")
245
+ pointers[o.ref] = f.pos + 1
246
+ if o.source.is_a?(String)
247
+ f.write(o.source)
248
+ else # Stream
249
+ s = o.source.to_s
250
+ f.write("#{o.ref} obj\n<< /Length #{s.length} >>\n")
251
+ f.write("stream\n#{s}\nendstream\nendobj")
252
+ end
253
+ end
254
+ f.write("\n\n")
255
+
256
+ xref = f.pos + 1
257
+
258
+ f.write("xref\n")
259
+ f.write("0 1\n")
260
+ f.write("0000000000 65535 f\n")
261
+
262
+ pointers.each do |k, v|
263
+ f.write("#{k.split(' ').first} 1\n")
264
+ f.write(sprintf("%010d 00000 n\n", v))
104
265
  end
105
266
 
106
- @footer.each { |l| f.print(l); f.print("\n") }
267
+ f.write("trailer\n")
268
+ f.write("<<\n")
269
+ f.write("/Prev #{self.xref}\n")
270
+ f.write("/Size #{objs.size}\n")
271
+ f.write("/Root #{root} R\n")
272
+ f.write(">>\n")
273
+ f.write("startxref #{xref}\n")
274
+ f.write("%%EOF\n")
107
275
  end
276
+
277
+ f.close
278
+
279
+ path == :string ? f.string : nil
280
+ end
281
+
282
+ private
283
+
284
+ def make_stream(&block)
285
+
286
+ s = Stream.new
287
+ s.instance_exec(&block) if block
288
+
289
+ s
108
290
  end
109
291
  end
110
292
 
111
293
  class Obj
112
294
 
295
+ def self.extract(doc, matches)
296
+
297
+ re = matches[:obj][1]
298
+ st = matches[:obj].offset(0).first
299
+ en = matches[:endobj].offset(0).last - 1
300
+
301
+ atts = {}
302
+
303
+ OBJ_ATTRIBUTES.keys.each do |k|
304
+ m = matches[k]
305
+ if m && m.offset(0).last < en
306
+ atts[k] = m[1].strip
307
+ matches.delete(k)
308
+ end
309
+ end
310
+
311
+ matches.delete(:obj)
312
+ matches.delete(:endobj)
313
+
314
+ Podoff::Obj.new(doc, re, st, en, atts)
315
+ end
316
+
113
317
  attr_reader :document
114
318
  attr_reader :ref
115
- attr_reader :lines
319
+ attr_reader :start_index, :end_index
320
+ attr_reader :attributes
116
321
 
117
- def initialize(doc, ref)
322
+ def initialize(doc, ref, st, en, atts, source=nil)
118
323
 
119
324
  @document = doc
120
325
  @ref = ref
121
- @lines = []
326
+ @start_index = st
327
+ @end_index = en
328
+ @attributes = atts
329
+ @source = source
330
+
331
+ recompute_attributes if @source.is_a?(String)
332
+ @source.obj = self if @source.is_a?(Podoff::Stream)
122
333
  end
123
334
 
124
- def <<(l)
335
+ def dup(new_doc)
125
336
 
126
- @lines << l
337
+ self.class.new(new_doc, ref, start_index, end_index, attributes.dup)
127
338
  end
128
339
 
129
- def lookup(k)
340
+ def self.create(doc, ref, source)
130
341
 
131
- @lines.each do |l|
342
+ self.new(doc, ref, nil, nil, nil, source)
343
+ end
132
344
 
133
- m = l.match(/^\/#{k} (.*)$/)
134
- return m[1] if m
135
- end
345
+ def replicate
136
346
 
137
- nil
347
+ self.class.create(document, ref, source.dup)
138
348
  end
139
349
 
140
- def index(o, start=0)
350
+ def to_a
141
351
 
142
- @lines[start..-1].each_with_index do |l, i|
352
+ [ @ref, @start_index, @end_index, @attributes ]
353
+ end
143
354
 
144
- if o.is_a?(String)
145
- return start + i if l == o
146
- else
147
- return start + i if l.match(o)
148
- end
149
- end
355
+ def source
150
356
 
151
- nil
357
+ @source || @document.source[@start_index..@end_index]
358
+ end
359
+
360
+ def replica?
361
+
362
+ @source != nil
152
363
  end
153
364
 
154
365
  def type
155
366
 
156
- t = lookup('Type')
157
- t ? t[1..-1] : nil
367
+ @attributes && @attributes[:type]
158
368
  end
159
369
 
160
370
  def page_number
161
371
 
162
- r = lookup('pdftk_PageNum')
372
+ r = @attributes && @attributes[:pagenum]
163
373
  r ? r.to_i : nil
164
374
  end
165
375
 
166
- def is_page?
376
+ # def parent
377
+ #
378
+ # r = @attributes[:parent]
379
+ # r ? r[0..-2].strip : nil
380
+ # end
381
+ #
382
+ # def kids
383
+ #
384
+ # r = @attributes[:kids]
385
+ # (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
386
+ # end
387
+ #
388
+ # def contents
389
+ #
390
+ # r = @attributes[:contents]
391
+ # (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
392
+ # end
167
393
 
168
- page_number != nil
169
- end
394
+ # def add_annotation(ref)
395
+ #
396
+ # if annots = @attributes[:annots]
397
+ # fail "implement me!"
398
+ # else
399
+ # i = @source.index('/Type ')
400
+ # @source.insert(i, "/Annots [#{ref} R]\n")
401
+ # end
402
+ # recompute_attributes
403
+ # end
404
+
405
+ # def add_free_text(x, y, text, font, size)
406
+ #
407
+ # fail ArgumentError.new('target is not a page') unless type == '/Page'
408
+ #
409
+ # nref = document.new_ref
410
+ #
411
+ # s = [
412
+ # "#{nref} obj <<",
413
+ # "/Type /Annot",
414
+ # "/Subtype /FreeText",
415
+ # "/Da (/F1 70 Tf 0 100 Td)",
416
+ # "/Rect [0 0 500 600]",
417
+ # "/Contents (#{text})",
418
+ # ">>",
419
+ # "endobj"
420
+ # ].join("\n")
421
+ # anno = Obj.create(document, nref, s)
422
+ #
423
+ # page = self.replicate
424
+ # page.add_annotation(nref)
425
+ #
426
+ # document.add(anno)
427
+ # document.add(page)
428
+ #
429
+ # anno
430
+ # end
170
431
 
171
- def is_font?
432
+ def insert_font(nick, obj_or_ref)
172
433
 
173
- type() == 'Font'
434
+ fail ArgumentError.new("target '#{ref}' not a replica") \
435
+ unless @source
436
+
437
+ nick = nick[1..-1] if nick[0] == '/'
438
+
439
+ re = obj_or_ref
440
+ re = re.ref if re.respond_to?(:ref)
441
+
442
+ @source = @source.gsub(/\/Font\s*<</, "/Font\n<<\n/#{nick} #{re} R")
174
443
  end
175
444
 
176
- def parent
445
+ def insert_contents(obj_or_ref)
177
446
 
178
- # /Parent 2 0 R
447
+ fail ArgumentError.new("target '#{ref}' not a replica") \
448
+ unless @source
449
+ fail ArgumentError.new("target '#{ref}' doesn't have /Contents") \
450
+ unless @attributes[:contents]
179
451
 
180
- r = lookup('Parent')
452
+ re = obj_or_ref
453
+ re = re.obj if re.respond_to?(:obj) # Stream
454
+ re = re.ref if re.respond_to?(:ref)
181
455
 
182
- r ? r[0..-2].strip : nil
456
+ add_to_attribute(:contents, re)
183
457
  end
458
+ alias :insert_content :insert_contents
184
459
 
185
- def kids
460
+ protected
186
461
 
187
- # /Kids [1 0 R 16 0 R 33 0 R]
462
+ def recompute_attributes
188
463
 
189
- r = lookup('Kids')
190
- (r || '').split(/[\[\]R]/).collect(&:strip).reject(&:empty?)
464
+ @attributes =
465
+ OBJ_ATTRIBUTES.inject({}) do |h, (k, v)|
466
+ m = @source.match(/\/#{v} (\/?[^\/\n<>]+)/)
467
+ h[k] = m[1] if m
468
+ h
469
+ end
191
470
  end
192
471
 
193
- def contents
472
+ def concat(refs, ref)
194
473
 
195
- r = lookup('Contents')
196
- r ? r[0..-2].strip : nil
474
+ refs = refs.strip
475
+ refs = refs[1..-2] if refs[0] == '['
476
+
477
+ "[#{refs} #{ref} R]"
197
478
  end
198
479
 
199
- def font_names
480
+ def add_to_attribute(key, ref)
200
481
 
201
- @lines.inject(nil) do |names, l|
482
+ fail ArgumentError.new("obj not replicated") unless @source
202
483
 
203
- if names
204
- return names if l == '>>'
205
- if m = l.match(/\/([^ ]+) /); names << m[1]; end
206
- elsif l.match(/\/Font\s*$/)
207
- names = []
208
- end
484
+ pkey = OBJ_ATTRIBUTES[key]
209
485
 
210
- names
486
+ if v = @attributes[key]
487
+ v = concat(v, ref)
488
+ @source = @source.gsub(/#{pkey} ([\[\]0-9 R]+)/, "#{pkey} #{v}")
489
+ else
490
+ i = @source.index('/Type ')
491
+ @source.insert(i, "/#{pkey} [#{ref} R]\n")
211
492
  end
212
-
213
- []
493
+ recompute_attributes
214
494
  end
495
+ end
215
496
 
216
- def dup(new_doc)
497
+ class Stream
498
+
499
+ attr_accessor :obj
217
500
 
218
- o0 = self
219
- o = o0.class.new(new_doc, @ref)
220
- o.instance_eval { @lines = o0.lines.dup }
501
+ def initialize
221
502
 
222
- o
503
+ @font = nil
504
+ @content = StringIO.new
223
505
  end
224
506
 
225
- def find(opts={}, &block)
507
+ #def document; obj.document; end
508
+ #def ref; obj.ref; end
509
+ #def source; self; end
226
510
 
227
- return self if block.call(self)
511
+ def tf(font_name, font_size)
228
512
 
229
- [ *kids, contents ].compact.each do |k|
230
- o = @document.objs[k]
231
- return o if o && block.call(o)
232
- end
513
+ n = font_name[0] == '/' ? font_name[1..-1] : font_name
233
514
 
234
- nil
515
+ @font = "/#{n} #{font_size} Tf "
235
516
  end
517
+ alias :font :tf
236
518
 
237
- def crop_box
238
-
239
- r = lookup('CropBox') || lookup('MediaBox')
519
+ def bt(x, y, text)
240
520
 
241
- r ? r.strip[1..-2].split(' ').collect(&:strip).collect(&:to_f) : nil
521
+ @content.write "\n" if @content.size > 0
522
+ @content.write "BT "
523
+ @content.write @font if @font
524
+ @content.write "#{x} #{y} Td (#{escape(text)}) Tj"
525
+ @content.write " ET"
242
526
  end
527
+ alias :text :bt
243
528
 
244
- def crop_dims
529
+ def write(text)
245
530
 
246
- x, y, w, h = crop_box
247
-
248
- x ? [ w - x, h - y ] : nil
531
+ @content.write(text)
249
532
  end
250
533
 
251
- def prepend_text(x, y, text, opts={})
252
-
253
- o = find { |o| o.index('BT') }
254
- fail ArgumentError.new('found no BT in the tree') unless o
534
+ def to_s
255
535
 
256
- font = opts[:font] || o.font_names.first || 'TT0'
257
- size = opts[:size] || 10
258
- comm = opts[:comment]
536
+ @content.string
537
+ end
259
538
 
260
- i = o.index('BT')
261
- bt = []
262
- bt << 'BT'
263
- bt << "#{x} #{y} Td"
264
- bt << "/#{font} #{size} Tf"
265
- bt << "(#{text})Tj"
266
- bt << 'ET'
267
- bt << " % #{comm}" if comm
268
- bt = bt.join(' ')
539
+ protected
269
540
 
270
- o.lines.insert(i, bt)
541
+ def escape(s)
271
542
 
272
- o
543
+ s.gsub(/\(/, '\(').gsub(/\)/, '\)')
273
544
  end
274
545
  end
275
546
  end