combine_pdf 0.2.15 → 0.2.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/combine_pdf/parser.rb +15 -11
- data/lib/combine_pdf/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 114c27c4f23021df9f78df6468f5fcffe8022f70
|
4
|
+
data.tar.gz: 5b75261b3ae0c2a0d12520f5f366a9b17b6e2049
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff3cbca040ad3dece43ea7ce4ed1b3a006cf86842390761d88c601de0cddf3cc16056d15449ef3fc415886242f24d77c0637818831f7d04efd775d77a1fc23a6
|
7
|
+
data.tar.gz: f4e27c18d9b7925c7018059602f7d7cb16ca970dd06494025056f84db939828b309b399cfd1f52c6a27c36f8b17037e9e810b030b0f6e582f89ca2bba5991cdc
|
data/lib/combine_pdf/parser.rb
CHANGED
@@ -99,10 +99,10 @@ module CombinePDF
|
|
99
99
|
object_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :ObjStm}
|
100
100
|
unless object_streams.empty?
|
101
101
|
warn "PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects."
|
102
|
-
|
102
|
+
|
103
103
|
object_streams.each do |o|
|
104
104
|
## un-encode (using the correct filter) the object streams
|
105
|
-
PDFFilter.inflate_object o
|
105
|
+
PDFFilter.inflate_object o
|
106
106
|
## extract objects from stream to top level arry @parsed
|
107
107
|
@scanner = StringScanner.new o[:raw_stream_content]
|
108
108
|
stream_data = _parse_
|
@@ -123,7 +123,6 @@ module CombinePDF
|
|
123
123
|
# Strings were unified, we can let them go..
|
124
124
|
@strings_dictionary.clear
|
125
125
|
|
126
|
-
|
127
126
|
# serialize_objects_and_references.catalog_pages
|
128
127
|
|
129
128
|
# Benchmark.bm do |bm|
|
@@ -316,7 +315,7 @@ module CombinePDF
|
|
316
315
|
#is a comment, skip until new line
|
317
316
|
loop do
|
318
317
|
# break unless @scanner.scan(/[^\d\r\n]+/)
|
319
|
-
break if @scanner.check(/([\d]+[\s]+[\d]+[\s]+obj[\n\r\s]+\<\<)|([\n\r]+)/) || @scanner.eos? # || @scanner.scan(/[^\d]+[\r\n]+/) ||
|
318
|
+
break if @scanner.check(/([\d]+[\s]+[\d]+[\s]+obj[\n\r\s]+\<\<)|([\n\r]+)/) || @scanner.eos? # || @scanner.scan(/[^\d]+[\r\n]+/) ||
|
320
319
|
@scanner.scan(/[^\d\r\n]+/) || @scanner.pos += 1
|
321
320
|
end
|
322
321
|
# puts "AFTER COMMENT: #{@scanner.peek 8}"
|
@@ -364,13 +363,18 @@ module CombinePDF
|
|
364
363
|
if @scanner.skip_until(/<</)
|
365
364
|
data = _parse_
|
366
365
|
@root_object ||= {}
|
367
|
-
@root_object[data.shift] = data.shift while data[0]
|
366
|
+
@root_object[data.shift] = data.shift while data[0]
|
368
367
|
end
|
369
368
|
##########
|
370
369
|
## skip untill end of segment, maked by %%EOF
|
371
370
|
@scanner.skip_until(/\%\%EOF/)
|
371
|
+
##########
|
372
|
+
## If this was the last valid segment, ignore any trailing garbage
|
373
|
+
## (issue #49 resolution)
|
374
|
+
break unless @scanner.exist?(/\%\%EOF/)
|
375
|
+
|
372
376
|
end
|
373
|
-
|
377
|
+
|
374
378
|
when @scanner.scan(/[\s]+/)
|
375
379
|
# Generally, do nothing
|
376
380
|
nil
|
@@ -378,8 +382,8 @@ module CombinePDF
|
|
378
382
|
# Fix wkhtmltopdf PDF authoring issue - missing 'endobj' keywords
|
379
383
|
unless fresh || (out[-4].nil? || out[-4].is_a?(Hash))
|
380
384
|
keep = []
|
381
|
-
keep << out.pop # .tap {|i| puts "#{i} is an ID"}
|
382
|
-
keep << out.pop # .tap {|i| puts "#{i} is a REF"}
|
385
|
+
keep << out.pop # .tap {|i| puts "#{i} is an ID"}
|
386
|
+
keep << out.pop # .tap {|i| puts "#{i} is a REF"}
|
383
387
|
|
384
388
|
if out.last.is_a? Hash
|
385
389
|
out << out.pop.merge({indirect_generation_number: out.pop, indirect_reference_id: out.pop})
|
@@ -393,7 +397,7 @@ module CombinePDF
|
|
393
397
|
end
|
394
398
|
fresh = false
|
395
399
|
else
|
396
|
-
# always advance
|
400
|
+
# always advance
|
397
401
|
# warn "Advnacing for unknown reason... #{@scanner.peek(4)}" unless @scanner.peek(1) =~ /[\s\n]/
|
398
402
|
warn "Warning: parser advnacing for unknown reason. Potential data-loss."
|
399
403
|
@scanner.pos = @scanner.pos + 1
|
@@ -420,7 +424,7 @@ module CombinePDF
|
|
420
424
|
|
421
425
|
raise "Unknown error - parsed data doesn't contain a cataloged object!" unless catalogs
|
422
426
|
end
|
423
|
-
case
|
427
|
+
case
|
424
428
|
when catalogs.is_a?(Array)
|
425
429
|
catalogs.each {|c| catalog_pages(c, inheritance_hash ) unless c.nil?}
|
426
430
|
when catalogs.is_a?(Hash)
|
@@ -578,4 +582,4 @@ module CombinePDF
|
|
578
582
|
# end
|
579
583
|
|
580
584
|
end
|
581
|
-
end
|
585
|
+
end
|
data/lib/combine_pdf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-rc4
|