podoff 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.txt CHANGED
@@ -2,6 +2,12 @@
2
2
  = podoff CHANGELOG.txt
3
3
 
4
4
 
5
+ == podoff 1.1.1 released 2015-10-26
6
+
7
+ - reworked xref table output
8
+ - FlateDecode stream if length > 98
9
+
10
+
5
11
  == podoff 1.1.0 released 2015-10-25
6
12
 
7
13
  - more tolerant at parsing (StringScanner)
data/lib/podoff.rb CHANGED
@@ -23,13 +23,14 @@
23
23
  # Made in Japan.
24
24
  #++
25
25
 
26
+ require 'zlib'
26
27
  require 'strscan'
27
28
  require 'stringio'
28
29
 
29
30
 
30
31
  module Podoff
31
32
 
32
- VERSION = '1.1.0'
33
+ VERSION = '1.1.1'
33
34
 
34
35
  def self.load(path, encoding='iso-8859-1')
35
36
 
@@ -53,7 +54,7 @@ module Podoff
53
54
  Podoff::Document.new(s)
54
55
  end
55
56
 
56
- attr_reader :source
57
+ attr_reader :scanner
57
58
  attr_reader :version
58
59
  attr_reader :xref
59
60
  attr_reader :objs
@@ -67,7 +68,7 @@ module Podoff
67
68
  fail ArgumentError.new('not a PDF file') \
68
69
  unless s.match(/\A%PDF-\d+\.\d+\s/)
69
70
 
70
- @source = s
71
+ @scanner = ::StringScanner.new(s)
71
72
  @version = nil
72
73
  @xref = nil
73
74
  @objs = {}
@@ -76,15 +77,14 @@ module Podoff
76
77
 
77
78
  @additions = {}
78
79
 
79
- sca = ::StringScanner.new(s)
80
- @version = sca.scan(/%PDF-\d+\.\d+/)
80
+ @version = @scanner.scan(/%PDF-\d+\.\d+/)
81
81
 
82
82
  loop do
83
83
 
84
- i = sca.skip_until(
84
+ i = @scanner.skip_until(
85
85
  /(startxref\s+\d+|\d+\s+\d+\s+obj|\/Root\s+\d+\s+\d+\s+R)/)
86
86
 
87
- m = sca.matched
87
+ m = @scanner.matched
88
88
  break unless m
89
89
 
90
90
  if m[0] == 's'
@@ -92,22 +92,27 @@ module Podoff
92
92
  elsif m[0] == '/'
93
93
  @root = extract_ref(m)
94
94
  else
95
- obj = Podoff::Obj.extract(self, sca)
95
+ obj = Podoff::Obj.extract(self)
96
96
  @objs[obj.ref] = obj
97
97
  @obj_counters[obj.ref] = (@obj_counters[obj.ref] || 0) + 1
98
98
  end
99
99
  end
100
100
 
101
101
  if @root == nil
102
- sca.pos = 0
102
+ @scanner.pos = 0
103
103
  loop do
104
- i = sca.skip_until(/\/Root\s+\d+\s+\d+\s+R/)
105
- break unless sca.matched
106
- @root = extract_ref(sca.matched)
104
+ i = @scanner.skip_until(/\/Root\s+\d+\s+\d+\s+R/)
105
+ break unless @scanner.matched
106
+ @root = extract_ref(@scanner.matched)
107
107
  end
108
108
  end
109
109
  end
110
110
 
111
+ def source
112
+
113
+ @scanner.string
114
+ end
115
+
111
116
  def extract_ref(s)
112
117
 
113
118
  s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip
@@ -124,7 +129,7 @@ module Podoff
124
129
 
125
130
  self.class.allocate.instance_eval do
126
131
 
127
- @source = o.source
132
+ @scanner = ::StringScanner.new(o.source)
128
133
  @xref = o.xref
129
134
 
130
135
  @objs = o.objs.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }
@@ -182,35 +187,32 @@ module Podoff
182
187
 
183
188
  name = name[1..-1] if name[0] == '/'
184
189
 
185
- ref = new_ref
190
+ r = new_ref
191
+ s = "#{r} obj <</Type /Font /Subtype /Type1 /BaseFont /#{name}>> endobj"
186
192
 
187
- add(
188
- Obj.create(
189
- self,
190
- ref,
191
- [
192
- "#{ref} obj",
193
- "<< /Type /Font /Subtype /Type1 /BaseFont /#{name} >>",
194
- "endobj"
195
- ].join(' ')))
193
+ add(Obj.new(self, r, source: s))
196
194
  end
197
195
 
198
- def add_stream(s=nil, &block)
196
+ def add_stream(src=nil, &block)
199
197
 
200
198
  ref = new_ref
201
199
 
202
- s = s || make_stream(&block)
200
+ src =
201
+ src &&
202
+ [
203
+ "#{ref} obj",
204
+ "<< /Length #{src.size} >>\nstream\n#{src}\nendstream",
205
+ "endobj"
206
+ ].join("\n")
203
207
 
204
- s = [
205
- "#{ref} obj",
206
- "<< /Length #{s.length} >>",
207
- "stream\n#{s}\nendstream",
208
- "endobj"
209
- ].join("\n") if s.is_a?(String)
208
+ str =
209
+ src ?
210
+ nil :
211
+ make_stream(&block)
210
212
 
211
- o = add(Obj.create(self, ref, s))
213
+ obj = add(Obj.new(self, ref, source: src, stream: str))
212
214
 
213
- s.is_a?(Podoff::Stream) ? s : o
215
+ str || obj
214
216
  end
215
217
 
216
218
  def re_add(obj_or_ref)
@@ -231,7 +233,7 @@ module Podoff
231
233
  else path
232
234
  end
233
235
 
234
- f.write(@source)
236
+ f.write(source)
235
237
 
236
238
  if @additions.any?
237
239
 
@@ -239,27 +241,14 @@ module Podoff
239
241
 
240
242
  @additions.values.each do |o|
241
243
  f.write("\n")
242
- pointers[o.ref] = f.pos + 1
243
- if o.source.is_a?(String)
244
- f.write(o.source)
245
- else # Stream
246
- s = o.source.to_s
247
- f.write("#{o.ref} obj\n<< /Length #{s.length} >>\n")
248
- f.write("stream\n#{s}\nendstream\nendobj")
249
- end
244
+ pointers[o.ref.split(' ').first.to_i] = f.pos + 1
245
+ f.write(o.to_s)
250
246
  end
251
247
  f.write("\n\n")
252
248
 
253
249
  xref = f.pos + 1
254
250
 
255
- f.write("xref\n")
256
- f.write("0 1\n")
257
- f.write("0000000000 65535 f\n")
258
-
259
- pointers.each do |k, v|
260
- f.write("#{k.split(' ').first} 1\n")
261
- f.write(sprintf("%010d 00000 n\n", v))
262
- end
251
+ write_xref(f, pointers)
263
252
 
264
253
  f.write("trailer\n")
265
254
  f.write("<<\n")
@@ -289,42 +278,17 @@ module Podoff
289
278
  f.write(v)
290
279
  f.write("\n")
291
280
 
292
- ptrs = {}
281
+ pointers = {}
293
282
 
294
283
  objs.keys.sort.each do |k|
295
- ptrs[k] = f.pos + 1
284
+ pointers[k.split(' ').first.to_i] = f.pos + 1
296
285
  f.write(objs[k].source)
297
286
  f.write("\n")
298
287
  end
299
288
 
300
289
  xref = f.pos + 1
301
- max = objs.keys.inject(-1) { |i, k| [ i, k.split(' ')[0].to_i ].max }
302
-
303
- #f.write("xref\n0 #{max}\n0000000000 65535 f\n")
304
- f.write("xref\n0 1\n0000000000 65535 f\n")
305
-
306
- partitions = [ [] ]
307
- #
308
- (1..max).each do |i|
309
- k = "#{i} 0"
310
- last = partitions.last
311
- if ptrs.has_key?(k)
312
- last << i
313
- else
314
- partitions << [] unless last == []
315
- end
316
- end
317
- #
318
- partitions.each do |part|
319
290
 
320
- f.write("#{part.first} #{part.size}\n")
321
-
322
- part.each do |i|
323
- k = "#{i} 0"
324
- #f.write(sprintf("%010d 00000 n %% %s\n", ptrs[k], k))
325
- f.write(sprintf("%010d 00000 n\n", ptrs[k]))
326
- end
327
- end
291
+ write_xref(f, pointers)
328
292
 
329
293
  f.write("trailer\n")
330
294
  f.write("<<\n")
@@ -339,7 +303,27 @@ module Podoff
339
303
  f.is_a?(StringIO) ? f.string : nil
340
304
  end
341
305
 
342
- private
306
+ protected
307
+
308
+ def write_xref(f, pointers)
309
+
310
+ f.write("xref\n")
311
+ f.write("0 1\n")
312
+ f.write("0000000000 65535 f\n")
313
+
314
+ pointers
315
+ .keys
316
+ .sort
317
+ .inject([ [] ]) { |ps, k|
318
+ ps << [] if ps.last != [] && k > ps.last.last + 1
319
+ ps.last << k
320
+ ps
321
+ }
322
+ .each { |part|
323
+ f.write("#{part.first} #{part.size}\n")
324
+ part.each { |k| f.write(sprintf("%010d 00000 n\n", pointers[k])) }
325
+ }
326
+ end
343
327
 
344
328
  def make_stream(&block)
345
329
 
@@ -355,7 +339,9 @@ module Podoff
355
339
  ATTRIBUTES =
356
340
  { type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' }
357
341
 
358
- def self.extract(doc, sca)
342
+ def self.extract(doc)
343
+
344
+ sca = doc.scanner
359
345
 
360
346
  re = sca.matched[0..-4].strip
361
347
  st = sca.pos - sca.matched.length
@@ -363,50 +349,48 @@ module Podoff
363
349
  i = sca.skip_until(/endobj/); return nil unless i
364
350
  en = sca.pos - 1
365
351
 
366
- atts = {}
367
- ATTRIBUTES.each do |k, v|
368
- sca.pos = st
369
- i = sca.skip_until(/\/#{v}\b/); next unless i
370
- next if sca.pos > en
371
- atts[k] = sca.scan(/ *\/?[^\n\r\/>]+/).strip
372
- end
373
-
374
- sca.pos = en
375
-
376
- Podoff::Obj.new(doc, re, st, en, atts)
352
+ Podoff::Obj.new(doc, re, start_index: st, end_index: en)
377
353
  end
378
354
 
379
355
  attr_reader :document
380
356
  attr_reader :ref
381
357
  attr_reader :start_index, :end_index
358
+ attr_reader :stream
382
359
  attr_reader :attributes
383
360
 
384
- def initialize(doc, ref, st, en, atts, source=nil)
361
+ def initialize(doc, ref, opts={})
385
362
 
386
363
  @document = doc
387
364
  @ref = ref
388
- @start_index = st
389
- @end_index = en
390
- @attributes = atts
391
- @source = source
392
365
 
393
- recompute_attributes if @source.is_a?(String)
394
- @source.obj = self if @source.is_a?(Podoff::Stream)
395
- end
366
+ @start_index = opts[:start_index]
367
+ @end_index = opts[:end_index]
368
+ @attributes = nil
369
+ @source = opts[:source]
396
370
 
397
- def dup(new_doc)
371
+ @stream = opts[:stream]
372
+ @stream.obj = self if @stream
398
373
 
399
- self.class.new(new_doc, ref, start_index, end_index, attributes.dup)
374
+ recompute_attributes
375
+ #@source.obj = self if @source.is_a?(Podoff::Stream)
376
+
377
+ @document.scanner.pos = @end_index if @document.scanner && @end_index
400
378
  end
401
379
 
402
- def self.create(doc, ref, source)
380
+ def dup(new_doc)
403
381
 
404
- self.new(doc, ref, nil, nil, nil, source)
382
+ self.class.new(
383
+ new_doc, ref,
384
+ start_index: start_index, end_index: end_index)
405
385
  end
406
386
 
387
+ #def self.create(doc, ref, source)
388
+ # self.new(doc, ref, nil, nil, nil, source)
389
+ #end
390
+
407
391
  def replicate
408
392
 
409
- self.class.create(document, ref, source.dup)
393
+ self.class.new(document, ref, source: source.dup)
410
394
  end
411
395
 
412
396
  def to_a
@@ -416,7 +400,7 @@ module Podoff
416
400
 
417
401
  def source
418
402
 
419
- @source || @document.source[@start_index..@end_index]
403
+ @source || (@start_index && @document.source[@start_index..@end_index])
420
404
  end
421
405
 
422
406
  def replica?
@@ -463,14 +447,29 @@ module Podoff
463
447
  end
464
448
  alias :insert_content :insert_contents
465
449
 
450
+ def to_s
451
+
452
+ source || stream.to_s
453
+ end
454
+
466
455
  protected
467
456
 
468
457
  def recompute_attributes
469
458
 
459
+ st, en, sca =
460
+ if @start_index
461
+ [ @start_index, @end_index, @document.scanner ]
462
+ elsif @source
463
+ [ 0, @source.length, ::StringScanner.new(@source) ]
464
+ end
465
+
466
+ return unless sca
467
+
470
468
  @attributes =
471
469
  ATTRIBUTES.inject({}) do |h, (k, v)|
472
- m = @source.match(/\/#{v}\s+(\/?[^\/\n<>]+)/)
473
- h[k] = m[1].strip if m
470
+ sca.pos = st
471
+ i = sca.skip_until(/\/#{v}\b/)
472
+ h[k] = sca.scan(/ *\/?[^\n\r\/>]+/).strip if i && sca.pos < en
474
473
  h
475
474
  end
476
475
  end
@@ -504,8 +503,9 @@ module Podoff
504
503
 
505
504
  attr_accessor :obj
506
505
 
507
- def initialize
506
+ def initialize(obj=nil)
508
507
 
508
+ @obj = obj
509
509
  @font = nil
510
510
  @content = StringIO.new
511
511
  end
@@ -535,7 +535,16 @@ module Podoff
535
535
 
536
536
  def to_s
537
537
 
538
- @content.string
538
+ s = @content.string
539
+ f = ''
540
+ if s.length > 98
541
+ f = ' /Filter /FlateDecode'
542
+ s = Zlib::Deflate.deflate(s)
543
+ end
544
+
545
+ "#{obj.ref} obj\n" +
546
+ "<</Length #{s.size}#{f}>>\nstream\n#{s}\nendstream\n" +
547
+ "endobj"
539
548
  end
540
549
 
541
550
  protected
@@ -97,6 +97,15 @@ describe Podoff::Document do
97
97
 
98
98
  expect(d.root).to eq('65 0')
99
99
  end
100
+
101
+ it 'sports objs with properly recomputed attributes' do
102
+
103
+ pa = @d.page(1)
104
+
105
+ d = @d.dup
106
+
107
+ expect(d.objs[pa.ref].attributes).to eq(pa.attributes)
108
+ end
100
109
  end
101
110
 
102
111
  context 'additions' do
@@ -120,13 +129,12 @@ describe Podoff::Document do
120
129
  expect(fo.ref).to eq('7 0')
121
130
 
122
131
  expect(fo.source).to eq(
123
- '7 0 obj ' +
124
- '<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> endobj')
132
+ '7 0 obj <</Type /Font /Subtype /Type1 /BaseFont /Helvetica>> endobj')
125
133
 
126
134
  s = @d.write(:string)
127
135
  d = Podoff.parse(s)
128
136
 
129
- expect(d.xref).to eq(682)
137
+ expect(d.xref).to eq(680)
130
138
  end
131
139
 
132
140
  it 'doesn\'t mind a slash in front of the font name' do
@@ -141,8 +149,7 @@ describe Podoff::Document do
141
149
  expect(fo.ref).to eq('7 0')
142
150
 
143
151
  expect(fo.source).to eq(
144
- '7 0 obj ' +
145
- '<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> endobj')
152
+ '7 0 obj <</Type /Font /Subtype /Type1 /BaseFont /Helvetica>> endobj')
146
153
  end
147
154
  end
148
155
 
@@ -185,15 +192,20 @@ endobj
185
192
  expect(st.obj.document).to eq(@d)
186
193
  expect(st.obj.ref).to eq('7 0')
187
194
 
188
- expect(st.obj.source.to_s).to eq(%{
195
+ expect(st.to_s).to eq(%{
196
+ 7 0 obj
197
+ <</Length 97>>
198
+ stream
189
199
  BT /Helvetica 35 Tf 10 20 Td (thirty here) Tj ET
190
200
  BT /Helvetica 35 Tf 40 50 Td (sixty there) Tj ET
201
+ endstream
202
+ endobj
191
203
  }.strip)
192
204
 
193
205
  d = Podoff.parse(@d.write(:string))
194
206
 
195
- expect(d.source.index('<< /Length 97 >>')).to eq(618)
196
- expect(d.xref).to eq(759)
207
+ expect(d.source.index('<</Length 97>>')).to eq(618)
208
+ expect(d.xref).to eq(757)
197
209
  end
198
210
 
199
211
  it 'returns the open stream when no arg given' do
@@ -273,9 +285,9 @@ BT /Helvetica 35 Tf 40 50 Td (sixty there) Tj ET
273
285
  s = d.write(:string)
274
286
 
275
287
  expect(
276
- d.write(:string).index(%{
288
+ s.index(%{
277
289
  7 0 obj
278
- << /Length 37 >>
290
+ <</Length 37>>
279
291
  stream
280
292
  BT 10 20 Td (hello open stream) Tj ET
281
293
  endstream
@@ -283,6 +295,36 @@ endobj
283
295
  }.strip)
284
296
  ).to eq(722)
285
297
  end
298
+
299
+ it 'writes a proper xref table' do
300
+
301
+ d = Podoff.load('pdfs/t0.pdf')
302
+
303
+ pa = d.re_add(d.page(1))
304
+ st = d.add_stream
305
+ st.bt(10, 20, 'hello open stream')
306
+ pa.insert_contents(st)
307
+
308
+ s = d.write(:string)
309
+
310
+ expect(s[808..-1].strip).to eq(%{
311
+ xref
312
+ 0 1
313
+ 0000000000 65535 f
314
+ 3 1
315
+ 0000000611 00000 n
316
+ 7 1
317
+ 0000000723 00000 n
318
+ trailer
319
+ <<
320
+ /Prev 414
321
+ /Size 7
322
+ /Root 1 0 R
323
+ >>
324
+ startxref 809
325
+ %%EOF
326
+ }.strip)
327
+ end
286
328
  end
287
329
 
288
330
  describe '#rewrite' do
data/spec/spec_helper.rb CHANGED
@@ -6,6 +6,7 @@
6
6
  #
7
7
 
8
8
  require 'pp'
9
+ require 'ostruct'
9
10
 
10
11
  require 'podoff'
11
12
 
data/spec/stream_spec.rb CHANGED
@@ -14,7 +14,7 @@ describe Podoff::Stream do
14
14
 
15
15
  it 'sets the current font' do
16
16
 
17
- st = Podoff::Stream.new
17
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
18
18
 
19
19
  st.tf('/Helvetica', 35)
20
20
  st.bt(10, 20, 'helvetic')
@@ -22,8 +22,13 @@ describe Podoff::Stream do
22
22
  st.bt(10, 50, 'zapfesque')
23
23
 
24
24
  expect(st.to_s).to eq(%{
25
+ 1 0 obj
26
+ <</Length 95>>
27
+ stream
25
28
  BT /Helvetica 35 Tf 10 20 Td (helvetic) Tj ET
26
29
  BT /ZapfDingbats 21 Tf 10 50 Td (zapfesque) Tj ET
30
+ endstream
31
+ endobj
27
32
  }.strip)
28
33
  end
29
34
  end
@@ -32,18 +37,32 @@ BT /ZapfDingbats 21 Tf 10 50 Td (zapfesque) Tj ET
32
37
 
33
38
  it 'works' do
34
39
 
35
- st = Podoff::Stream.new
40
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
36
41
  st.bt(10, 20, 'hello world')
37
42
 
38
- expect(st.to_s).to eq('BT 10 20 Td (hello world) Tj ET')
43
+ expect(st.to_s).to eq(%{
44
+ 1 0 obj
45
+ <</Length 31>>
46
+ stream
47
+ BT 10 20 Td (hello world) Tj ET
48
+ endstream
49
+ endobj
50
+ }.strip)
39
51
  end
40
52
 
41
53
  it 'escapes the text' do
42
54
 
43
- st = Podoff::Stream.new
55
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
44
56
  st.bt(10, 20, 'hello()world')
45
57
 
46
- expect(st.to_s).to eq('BT 10 20 Td (hello\(\)world) Tj ET')
58
+ expect(st.to_s).to eq(%{
59
+ 1 0 obj
60
+ <</Length 34>>
61
+ stream
62
+ BT 10 20 Td (hello\\(\\)world) Tj ET
63
+ endstream
64
+ endobj
65
+ }.strip)
47
66
  end
48
67
  end
49
68
 
@@ -51,17 +70,34 @@ BT /ZapfDingbats 21 Tf 10 50 Td (zapfesque) Tj ET
51
70
 
52
71
  it 'injects text into the stream' do
53
72
 
54
- st = Podoff::Stream.new
73
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
55
74
  st.bt(10, 20, 'abc')
56
75
  st.write("\nBT 25 35 Td (ABC) Tj ET")
57
76
  st.bt(30, 40, 'def')
58
77
 
59
78
  expect(st.to_s).to eq(%{
79
+ 1 0 obj
80
+ <</Length 71>>
81
+ stream
60
82
  BT 10 20 Td (abc) Tj ET
61
83
  BT 25 35 Td (ABC) Tj ET
62
84
  BT 30 40 Td (def) Tj ET
85
+ endstream
86
+ endobj
63
87
  }.strip)
64
88
  end
65
89
  end
90
+
91
+ describe '#to_s' do
92
+
93
+ it 'applies /Filter /FlateDecode if stream.size > 98' do
94
+
95
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
96
+ st.write("BT /Helvetica 35 Tf 123 456 Td (Hello Nada) Tj ET\n" * 4)
97
+
98
+ expect(st.to_s).to match(
99
+ /^1 0 obj\n<<\/Length 60 \/Filter \/FlateDecode>>/)
100
+ end
101
+ end
66
102
  end
67
103
 
data/todo.txt CHANGED
@@ -11,7 +11,7 @@
11
11
 
12
12
 
13
13
  [ ] stop using the st-mark thing
14
- [ ] recompress idea? uncompress with pdftk, recompress with podoff
14
+ [o] recompress idea? uncompress with pdftk, recompress with podoff
15
15
  smaller docs anyway... /Filter /FlatDecode
16
16
 
17
17
  decompress:
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: podoff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-10-24 00:00:00.000000000 Z
12
+ date: 2015-10-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake