podoff 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.txt CHANGED
@@ -2,6 +2,12 @@
2
2
  = podoff CHANGELOG.txt
3
3
 
4
4
 
5
+ == podoff 1.1.1 released 2015-10-26
6
+
7
+ - reworked xref table output
8
+ - FlateDecode stream if length > 98
9
+
10
+
5
11
  == podoff 1.1.0 released 2015-10-25
6
12
 
7
13
  - more tolerant at parsing (StringScanner)
data/lib/podoff.rb CHANGED
@@ -23,13 +23,14 @@
23
23
  # Made in Japan.
24
24
  #++
25
25
 
26
+ require 'zlib'
26
27
  require 'strscan'
27
28
  require 'stringio'
28
29
 
29
30
 
30
31
  module Podoff
31
32
 
32
- VERSION = '1.1.0'
33
+ VERSION = '1.1.1'
33
34
 
34
35
  def self.load(path, encoding='iso-8859-1')
35
36
 
@@ -53,7 +54,7 @@ module Podoff
53
54
  Podoff::Document.new(s)
54
55
  end
55
56
 
56
- attr_reader :source
57
+ attr_reader :scanner
57
58
  attr_reader :version
58
59
  attr_reader :xref
59
60
  attr_reader :objs
@@ -67,7 +68,7 @@ module Podoff
67
68
  fail ArgumentError.new('not a PDF file') \
68
69
  unless s.match(/\A%PDF-\d+\.\d+\s/)
69
70
 
70
- @source = s
71
+ @scanner = ::StringScanner.new(s)
71
72
  @version = nil
72
73
  @xref = nil
73
74
  @objs = {}
@@ -76,15 +77,14 @@ module Podoff
76
77
 
77
78
  @additions = {}
78
79
 
79
- sca = ::StringScanner.new(s)
80
- @version = sca.scan(/%PDF-\d+\.\d+/)
80
+ @version = @scanner.scan(/%PDF-\d+\.\d+/)
81
81
 
82
82
  loop do
83
83
 
84
- i = sca.skip_until(
84
+ i = @scanner.skip_until(
85
85
  /(startxref\s+\d+|\d+\s+\d+\s+obj|\/Root\s+\d+\s+\d+\s+R)/)
86
86
 
87
- m = sca.matched
87
+ m = @scanner.matched
88
88
  break unless m
89
89
 
90
90
  if m[0] == 's'
@@ -92,22 +92,27 @@ module Podoff
92
92
  elsif m[0] == '/'
93
93
  @root = extract_ref(m)
94
94
  else
95
- obj = Podoff::Obj.extract(self, sca)
95
+ obj = Podoff::Obj.extract(self)
96
96
  @objs[obj.ref] = obj
97
97
  @obj_counters[obj.ref] = (@obj_counters[obj.ref] || 0) + 1
98
98
  end
99
99
  end
100
100
 
101
101
  if @root == nil
102
- sca.pos = 0
102
+ @scanner.pos = 0
103
103
  loop do
104
- i = sca.skip_until(/\/Root\s+\d+\s+\d+\s+R/)
105
- break unless sca.matched
106
- @root = extract_ref(sca.matched)
104
+ i = @scanner.skip_until(/\/Root\s+\d+\s+\d+\s+R/)
105
+ break unless @scanner.matched
106
+ @root = extract_ref(@scanner.matched)
107
107
  end
108
108
  end
109
109
  end
110
110
 
111
+ def source
112
+
113
+ @scanner.string
114
+ end
115
+
111
116
  def extract_ref(s)
112
117
 
113
118
  s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip
@@ -124,7 +129,7 @@ module Podoff
124
129
 
125
130
  self.class.allocate.instance_eval do
126
131
 
127
- @source = o.source
132
+ @scanner = ::StringScanner.new(o.source)
128
133
  @xref = o.xref
129
134
 
130
135
  @objs = o.objs.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }
@@ -182,35 +187,32 @@ module Podoff
182
187
 
183
188
  name = name[1..-1] if name[0] == '/'
184
189
 
185
- ref = new_ref
190
+ r = new_ref
191
+ s = "#{r} obj <</Type /Font /Subtype /Type1 /BaseFont /#{name}>> endobj"
186
192
 
187
- add(
188
- Obj.create(
189
- self,
190
- ref,
191
- [
192
- "#{ref} obj",
193
- "<< /Type /Font /Subtype /Type1 /BaseFont /#{name} >>",
194
- "endobj"
195
- ].join(' ')))
193
+ add(Obj.new(self, r, source: s))
196
194
  end
197
195
 
198
- def add_stream(s=nil, &block)
196
+ def add_stream(src=nil, &block)
199
197
 
200
198
  ref = new_ref
201
199
 
202
- s = s || make_stream(&block)
200
+ src =
201
+ src &&
202
+ [
203
+ "#{ref} obj",
204
+ "<< /Length #{src.size} >>\nstream\n#{src}\nendstream",
205
+ "endobj"
206
+ ].join("\n")
203
207
 
204
- s = [
205
- "#{ref} obj",
206
- "<< /Length #{s.length} >>",
207
- "stream\n#{s}\nendstream",
208
- "endobj"
209
- ].join("\n") if s.is_a?(String)
208
+ str =
209
+ src ?
210
+ nil :
211
+ make_stream(&block)
210
212
 
211
- o = add(Obj.create(self, ref, s))
213
+ obj = add(Obj.new(self, ref, source: src, stream: str))
212
214
 
213
- s.is_a?(Podoff::Stream) ? s : o
215
+ str || obj
214
216
  end
215
217
 
216
218
  def re_add(obj_or_ref)
@@ -231,7 +233,7 @@ module Podoff
231
233
  else path
232
234
  end
233
235
 
234
- f.write(@source)
236
+ f.write(source)
235
237
 
236
238
  if @additions.any?
237
239
 
@@ -239,27 +241,14 @@ module Podoff
239
241
 
240
242
  @additions.values.each do |o|
241
243
  f.write("\n")
242
- pointers[o.ref] = f.pos + 1
243
- if o.source.is_a?(String)
244
- f.write(o.source)
245
- else # Stream
246
- s = o.source.to_s
247
- f.write("#{o.ref} obj\n<< /Length #{s.length} >>\n")
248
- f.write("stream\n#{s}\nendstream\nendobj")
249
- end
244
+ pointers[o.ref.split(' ').first.to_i] = f.pos + 1
245
+ f.write(o.to_s)
250
246
  end
251
247
  f.write("\n\n")
252
248
 
253
249
  xref = f.pos + 1
254
250
 
255
- f.write("xref\n")
256
- f.write("0 1\n")
257
- f.write("0000000000 65535 f\n")
258
-
259
- pointers.each do |k, v|
260
- f.write("#{k.split(' ').first} 1\n")
261
- f.write(sprintf("%010d 00000 n\n", v))
262
- end
251
+ write_xref(f, pointers)
263
252
 
264
253
  f.write("trailer\n")
265
254
  f.write("<<\n")
@@ -289,42 +278,17 @@ module Podoff
289
278
  f.write(v)
290
279
  f.write("\n")
291
280
 
292
- ptrs = {}
281
+ pointers = {}
293
282
 
294
283
  objs.keys.sort.each do |k|
295
- ptrs[k] = f.pos + 1
284
+ pointers[k.split(' ').first.to_i] = f.pos + 1
296
285
  f.write(objs[k].source)
297
286
  f.write("\n")
298
287
  end
299
288
 
300
289
  xref = f.pos + 1
301
- max = objs.keys.inject(-1) { |i, k| [ i, k.split(' ')[0].to_i ].max }
302
-
303
- #f.write("xref\n0 #{max}\n0000000000 65535 f\n")
304
- f.write("xref\n0 1\n0000000000 65535 f\n")
305
-
306
- partitions = [ [] ]
307
- #
308
- (1..max).each do |i|
309
- k = "#{i} 0"
310
- last = partitions.last
311
- if ptrs.has_key?(k)
312
- last << i
313
- else
314
- partitions << [] unless last == []
315
- end
316
- end
317
- #
318
- partitions.each do |part|
319
290
 
320
- f.write("#{part.first} #{part.size}\n")
321
-
322
- part.each do |i|
323
- k = "#{i} 0"
324
- #f.write(sprintf("%010d 00000 n %% %s\n", ptrs[k], k))
325
- f.write(sprintf("%010d 00000 n\n", ptrs[k]))
326
- end
327
- end
291
+ write_xref(f, pointers)
328
292
 
329
293
  f.write("trailer\n")
330
294
  f.write("<<\n")
@@ -339,7 +303,27 @@ module Podoff
339
303
  f.is_a?(StringIO) ? f.string : nil
340
304
  end
341
305
 
342
- private
306
+ protected
307
+
308
+ def write_xref(f, pointers)
309
+
310
+ f.write("xref\n")
311
+ f.write("0 1\n")
312
+ f.write("0000000000 65535 f\n")
313
+
314
+ pointers
315
+ .keys
316
+ .sort
317
+ .inject([ [] ]) { |ps, k|
318
+ ps << [] if ps.last != [] && k > ps.last.last + 1
319
+ ps.last << k
320
+ ps
321
+ }
322
+ .each { |part|
323
+ f.write("#{part.first} #{part.size}\n")
324
+ part.each { |k| f.write(sprintf("%010d 00000 n\n", pointers[k])) }
325
+ }
326
+ end
343
327
 
344
328
  def make_stream(&block)
345
329
 
@@ -355,7 +339,9 @@ module Podoff
355
339
  ATTRIBUTES =
356
340
  { type: 'Type', contents: 'Contents', pagenum: 'pdftk_PageNum' }
357
341
 
358
- def self.extract(doc, sca)
342
+ def self.extract(doc)
343
+
344
+ sca = doc.scanner
359
345
 
360
346
  re = sca.matched[0..-4].strip
361
347
  st = sca.pos - sca.matched.length
@@ -363,50 +349,48 @@ module Podoff
363
349
  i = sca.skip_until(/endobj/); return nil unless i
364
350
  en = sca.pos - 1
365
351
 
366
- atts = {}
367
- ATTRIBUTES.each do |k, v|
368
- sca.pos = st
369
- i = sca.skip_until(/\/#{v}\b/); next unless i
370
- next if sca.pos > en
371
- atts[k] = sca.scan(/ *\/?[^\n\r\/>]+/).strip
372
- end
373
-
374
- sca.pos = en
375
-
376
- Podoff::Obj.new(doc, re, st, en, atts)
352
+ Podoff::Obj.new(doc, re, start_index: st, end_index: en)
377
353
  end
378
354
 
379
355
  attr_reader :document
380
356
  attr_reader :ref
381
357
  attr_reader :start_index, :end_index
358
+ attr_reader :stream
382
359
  attr_reader :attributes
383
360
 
384
- def initialize(doc, ref, st, en, atts, source=nil)
361
+ def initialize(doc, ref, opts={})
385
362
 
386
363
  @document = doc
387
364
  @ref = ref
388
- @start_index = st
389
- @end_index = en
390
- @attributes = atts
391
- @source = source
392
365
 
393
- recompute_attributes if @source.is_a?(String)
394
- @source.obj = self if @source.is_a?(Podoff::Stream)
395
- end
366
+ @start_index = opts[:start_index]
367
+ @end_index = opts[:end_index]
368
+ @attributes = nil
369
+ @source = opts[:source]
396
370
 
397
- def dup(new_doc)
371
+ @stream = opts[:stream]
372
+ @stream.obj = self if @stream
398
373
 
399
- self.class.new(new_doc, ref, start_index, end_index, attributes.dup)
374
+ recompute_attributes
375
+ #@source.obj = self if @source.is_a?(Podoff::Stream)
376
+
377
+ @document.scanner.pos = @end_index if @document.scanner && @end_index
400
378
  end
401
379
 
402
- def self.create(doc, ref, source)
380
+ def dup(new_doc)
403
381
 
404
- self.new(doc, ref, nil, nil, nil, source)
382
+ self.class.new(
383
+ new_doc, ref,
384
+ start_index: start_index, end_index: end_index)
405
385
  end
406
386
 
387
+ #def self.create(doc, ref, source)
388
+ # self.new(doc, ref, nil, nil, nil, source)
389
+ #end
390
+
407
391
  def replicate
408
392
 
409
- self.class.create(document, ref, source.dup)
393
+ self.class.new(document, ref, source: source.dup)
410
394
  end
411
395
 
412
396
  def to_a
@@ -416,7 +400,7 @@ module Podoff
416
400
 
417
401
  def source
418
402
 
419
- @source || @document.source[@start_index..@end_index]
403
+ @source || (@start_index && @document.source[@start_index..@end_index])
420
404
  end
421
405
 
422
406
  def replica?
@@ -463,14 +447,29 @@ module Podoff
463
447
  end
464
448
  alias :insert_content :insert_contents
465
449
 
450
+ def to_s
451
+
452
+ source || stream.to_s
453
+ end
454
+
466
455
  protected
467
456
 
468
457
  def recompute_attributes
469
458
 
459
+ st, en, sca =
460
+ if @start_index
461
+ [ @start_index, @end_index, @document.scanner ]
462
+ elsif @source
463
+ [ 0, @source.length, ::StringScanner.new(@source) ]
464
+ end
465
+
466
+ return unless sca
467
+
470
468
  @attributes =
471
469
  ATTRIBUTES.inject({}) do |h, (k, v)|
472
- m = @source.match(/\/#{v}\s+(\/?[^\/\n<>]+)/)
473
- h[k] = m[1].strip if m
470
+ sca.pos = st
471
+ i = sca.skip_until(/\/#{v}\b/)
472
+ h[k] = sca.scan(/ *\/?[^\n\r\/>]+/).strip if i && sca.pos < en
474
473
  h
475
474
  end
476
475
  end
@@ -504,8 +503,9 @@ module Podoff
504
503
 
505
504
  attr_accessor :obj
506
505
 
507
- def initialize
506
+ def initialize(obj=nil)
508
507
 
508
+ @obj = obj
509
509
  @font = nil
510
510
  @content = StringIO.new
511
511
  end
@@ -535,7 +535,16 @@ module Podoff
535
535
 
536
536
  def to_s
537
537
 
538
- @content.string
538
+ s = @content.string
539
+ f = ''
540
+ if s.length > 98
541
+ f = ' /Filter /FlateDecode'
542
+ s = Zlib::Deflate.deflate(s)
543
+ end
544
+
545
+ "#{obj.ref} obj\n" +
546
+ "<</Length #{s.size}#{f}>>\nstream\n#{s}\nendstream\n" +
547
+ "endobj"
539
548
  end
540
549
 
541
550
  protected
@@ -97,6 +97,15 @@ describe Podoff::Document do
97
97
 
98
98
  expect(d.root).to eq('65 0')
99
99
  end
100
+
101
+ it 'sports objs with properly recomputed attributes' do
102
+
103
+ pa = @d.page(1)
104
+
105
+ d = @d.dup
106
+
107
+ expect(d.objs[pa.ref].attributes).to eq(pa.attributes)
108
+ end
100
109
  end
101
110
 
102
111
  context 'additions' do
@@ -120,13 +129,12 @@ describe Podoff::Document do
120
129
  expect(fo.ref).to eq('7 0')
121
130
 
122
131
  expect(fo.source).to eq(
123
- '7 0 obj ' +
124
- '<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> endobj')
132
+ '7 0 obj <</Type /Font /Subtype /Type1 /BaseFont /Helvetica>> endobj')
125
133
 
126
134
  s = @d.write(:string)
127
135
  d = Podoff.parse(s)
128
136
 
129
- expect(d.xref).to eq(682)
137
+ expect(d.xref).to eq(680)
130
138
  end
131
139
 
132
140
  it 'doesn\'t mind a slash in front of the font name' do
@@ -141,8 +149,7 @@ describe Podoff::Document do
141
149
  expect(fo.ref).to eq('7 0')
142
150
 
143
151
  expect(fo.source).to eq(
144
- '7 0 obj ' +
145
- '<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> endobj')
152
+ '7 0 obj <</Type /Font /Subtype /Type1 /BaseFont /Helvetica>> endobj')
146
153
  end
147
154
  end
148
155
 
@@ -185,15 +192,20 @@ endobj
185
192
  expect(st.obj.document).to eq(@d)
186
193
  expect(st.obj.ref).to eq('7 0')
187
194
 
188
- expect(st.obj.source.to_s).to eq(%{
195
+ expect(st.to_s).to eq(%{
196
+ 7 0 obj
197
+ <</Length 97>>
198
+ stream
189
199
  BT /Helvetica 35 Tf 10 20 Td (thirty here) Tj ET
190
200
  BT /Helvetica 35 Tf 40 50 Td (sixty there) Tj ET
201
+ endstream
202
+ endobj
191
203
  }.strip)
192
204
 
193
205
  d = Podoff.parse(@d.write(:string))
194
206
 
195
- expect(d.source.index('<< /Length 97 >>')).to eq(618)
196
- expect(d.xref).to eq(759)
207
+ expect(d.source.index('<</Length 97>>')).to eq(618)
208
+ expect(d.xref).to eq(757)
197
209
  end
198
210
 
199
211
  it 'returns the open stream when no arg given' do
@@ -273,9 +285,9 @@ BT /Helvetica 35 Tf 40 50 Td (sixty there) Tj ET
273
285
  s = d.write(:string)
274
286
 
275
287
  expect(
276
- d.write(:string).index(%{
288
+ s.index(%{
277
289
  7 0 obj
278
- << /Length 37 >>
290
+ <</Length 37>>
279
291
  stream
280
292
  BT 10 20 Td (hello open stream) Tj ET
281
293
  endstream
@@ -283,6 +295,36 @@ endobj
283
295
  }.strip)
284
296
  ).to eq(722)
285
297
  end
298
+
299
+ it 'writes a proper xref table' do
300
+
301
+ d = Podoff.load('pdfs/t0.pdf')
302
+
303
+ pa = d.re_add(d.page(1))
304
+ st = d.add_stream
305
+ st.bt(10, 20, 'hello open stream')
306
+ pa.insert_contents(st)
307
+
308
+ s = d.write(:string)
309
+
310
+ expect(s[808..-1].strip).to eq(%{
311
+ xref
312
+ 0 1
313
+ 0000000000 65535 f
314
+ 3 1
315
+ 0000000611 00000 n
316
+ 7 1
317
+ 0000000723 00000 n
318
+ trailer
319
+ <<
320
+ /Prev 414
321
+ /Size 7
322
+ /Root 1 0 R
323
+ >>
324
+ startxref 809
325
+ %%EOF
326
+ }.strip)
327
+ end
286
328
  end
287
329
 
288
330
  describe '#rewrite' do
data/spec/spec_helper.rb CHANGED
@@ -6,6 +6,7 @@
6
6
  #
7
7
 
8
8
  require 'pp'
9
+ require 'ostruct'
9
10
 
10
11
  require 'podoff'
11
12
 
data/spec/stream_spec.rb CHANGED
@@ -14,7 +14,7 @@ describe Podoff::Stream do
14
14
 
15
15
  it 'sets the current font' do
16
16
 
17
- st = Podoff::Stream.new
17
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
18
18
 
19
19
  st.tf('/Helvetica', 35)
20
20
  st.bt(10, 20, 'helvetic')
@@ -22,8 +22,13 @@ describe Podoff::Stream do
22
22
  st.bt(10, 50, 'zapfesque')
23
23
 
24
24
  expect(st.to_s).to eq(%{
25
+ 1 0 obj
26
+ <</Length 95>>
27
+ stream
25
28
  BT /Helvetica 35 Tf 10 20 Td (helvetic) Tj ET
26
29
  BT /ZapfDingbats 21 Tf 10 50 Td (zapfesque) Tj ET
30
+ endstream
31
+ endobj
27
32
  }.strip)
28
33
  end
29
34
  end
@@ -32,18 +37,32 @@ BT /ZapfDingbats 21 Tf 10 50 Td (zapfesque) Tj ET
32
37
 
33
38
  it 'works' do
34
39
 
35
- st = Podoff::Stream.new
40
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
36
41
  st.bt(10, 20, 'hello world')
37
42
 
38
- expect(st.to_s).to eq('BT 10 20 Td (hello world) Tj ET')
43
+ expect(st.to_s).to eq(%{
44
+ 1 0 obj
45
+ <</Length 31>>
46
+ stream
47
+ BT 10 20 Td (hello world) Tj ET
48
+ endstream
49
+ endobj
50
+ }.strip)
39
51
  end
40
52
 
41
53
  it 'escapes the text' do
42
54
 
43
- st = Podoff::Stream.new
55
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
44
56
  st.bt(10, 20, 'hello()world')
45
57
 
46
- expect(st.to_s).to eq('BT 10 20 Td (hello\(\)world) Tj ET')
58
+ expect(st.to_s).to eq(%{
59
+ 1 0 obj
60
+ <</Length 34>>
61
+ stream
62
+ BT 10 20 Td (hello\\(\\)world) Tj ET
63
+ endstream
64
+ endobj
65
+ }.strip)
47
66
  end
48
67
  end
49
68
 
@@ -51,17 +70,34 @@ BT /ZapfDingbats 21 Tf 10 50 Td (zapfesque) Tj ET
51
70
 
52
71
  it 'injects text into the stream' do
53
72
 
54
- st = Podoff::Stream.new
73
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
55
74
  st.bt(10, 20, 'abc')
56
75
  st.write("\nBT 25 35 Td (ABC) Tj ET")
57
76
  st.bt(30, 40, 'def')
58
77
 
59
78
  expect(st.to_s).to eq(%{
79
+ 1 0 obj
80
+ <</Length 71>>
81
+ stream
60
82
  BT 10 20 Td (abc) Tj ET
61
83
  BT 25 35 Td (ABC) Tj ET
62
84
  BT 30 40 Td (def) Tj ET
85
+ endstream
86
+ endobj
63
87
  }.strip)
64
88
  end
65
89
  end
90
+
91
+ describe '#to_s' do
92
+
93
+ it 'applies /Filter /FlateDecode if stream.size > 98' do
94
+
95
+ st = Podoff::Stream.new(OpenStruct.new(ref: '1 0'))
96
+ st.write("BT /Helvetica 35 Tf 123 456 Td (Hello Nada) Tj ET\n" * 4)
97
+
98
+ expect(st.to_s).to match(
99
+ /^1 0 obj\n<<\/Length 60 \/Filter \/FlateDecode>>/)
100
+ end
101
+ end
66
102
  end
67
103
 
data/todo.txt CHANGED
@@ -11,7 +11,7 @@
11
11
 
12
12
 
13
13
  [ ] stop using the st-mark thing
14
- [ ] recompress idea? uncompress with pdftk, recompress with podoff
14
+ [o] recompress idea? uncompress with pdftk, recompress with podoff
15
15
  smaller docs anyway... /Filter /FlatDecode
16
16
 
17
17
  decompress:
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: podoff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-10-24 00:00:00.000000000 Z
12
+ date: 2015-10-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake