combine_pdf 0.2.15 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/combine_pdf/parser.rb +15 -11
- data/lib/combine_pdf/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 114c27c4f23021df9f78df6468f5fcffe8022f70
|
4
|
+
data.tar.gz: 5b75261b3ae0c2a0d12520f5f366a9b17b6e2049
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff3cbca040ad3dece43ea7ce4ed1b3a006cf86842390761d88c601de0cddf3cc16056d15449ef3fc415886242f24d77c0637818831f7d04efd775d77a1fc23a6
|
7
|
+
data.tar.gz: f4e27c18d9b7925c7018059602f7d7cb16ca970dd06494025056f84db939828b309b399cfd1f52c6a27c36f8b17037e9e810b030b0f6e582f89ca2bba5991cdc
|
data/lib/combine_pdf/parser.rb
CHANGED
@@ -99,10 +99,10 @@ module CombinePDF
|
|
99
99
|
object_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :ObjStm}
|
100
100
|
unless object_streams.empty?
|
101
101
|
warn "PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects."
|
102
|
-
|
102
|
+
|
103
103
|
object_streams.each do |o|
|
104
104
|
## un-encode (using the correct filter) the object streams
|
105
|
-
PDFFilter.inflate_object o
|
105
|
+
PDFFilter.inflate_object o
|
106
106
|
## extract objects from stream to top level arry @parsed
|
107
107
|
@scanner = StringScanner.new o[:raw_stream_content]
|
108
108
|
stream_data = _parse_
|
@@ -123,7 +123,6 @@ module CombinePDF
|
|
123
123
|
# Strings were unified, we can let them go..
|
124
124
|
@strings_dictionary.clear
|
125
125
|
|
126
|
-
|
127
126
|
# serialize_objects_and_references.catalog_pages
|
128
127
|
|
129
128
|
# Benchmark.bm do |bm|
|
@@ -316,7 +315,7 @@ module CombinePDF
|
|
316
315
|
#is a comment, skip until new line
|
317
316
|
loop do
|
318
317
|
# break unless @scanner.scan(/[^\d\r\n]+/)
|
319
|
-
break if @scanner.check(/([\d]+[\s]+[\d]+[\s]+obj[\n\r\s]+\<\<)|([\n\r]+)/) || @scanner.eos? # || @scanner.scan(/[^\d]+[\r\n]+/) ||
|
318
|
+
break if @scanner.check(/([\d]+[\s]+[\d]+[\s]+obj[\n\r\s]+\<\<)|([\n\r]+)/) || @scanner.eos? # || @scanner.scan(/[^\d]+[\r\n]+/) ||
|
320
319
|
@scanner.scan(/[^\d\r\n]+/) || @scanner.pos += 1
|
321
320
|
end
|
322
321
|
# puts "AFTER COMMENT: #{@scanner.peek 8}"
|
@@ -364,13 +363,18 @@ module CombinePDF
|
|
364
363
|
if @scanner.skip_until(/<</)
|
365
364
|
data = _parse_
|
366
365
|
@root_object ||= {}
|
367
|
-
@root_object[data.shift] = data.shift while data[0]
|
366
|
+
@root_object[data.shift] = data.shift while data[0]
|
368
367
|
end
|
369
368
|
##########
|
370
369
|
## skip untill end of segment, maked by %%EOF
|
371
370
|
@scanner.skip_until(/\%\%EOF/)
|
371
|
+
##########
|
372
|
+
## If this was the last valid segment, ignore any trailing garbage
|
373
|
+
## (issue #49 resolution)
|
374
|
+
break unless @scanner.exist?(/\%\%EOF/)
|
375
|
+
|
372
376
|
end
|
373
|
-
|
377
|
+
|
374
378
|
when @scanner.scan(/[\s]+/)
|
375
379
|
# Generally, do nothing
|
376
380
|
nil
|
@@ -378,8 +382,8 @@ module CombinePDF
|
|
378
382
|
# Fix wkhtmltopdf PDF authoring issue - missing 'endobj' keywords
|
379
383
|
unless fresh || (out[-4].nil? || out[-4].is_a?(Hash))
|
380
384
|
keep = []
|
381
|
-
keep << out.pop # .tap {|i| puts "#{i} is an ID"}
|
382
|
-
keep << out.pop # .tap {|i| puts "#{i} is a REF"}
|
385
|
+
keep << out.pop # .tap {|i| puts "#{i} is an ID"}
|
386
|
+
keep << out.pop # .tap {|i| puts "#{i} is a REF"}
|
383
387
|
|
384
388
|
if out.last.is_a? Hash
|
385
389
|
out << out.pop.merge({indirect_generation_number: out.pop, indirect_reference_id: out.pop})
|
@@ -393,7 +397,7 @@ module CombinePDF
|
|
393
397
|
end
|
394
398
|
fresh = false
|
395
399
|
else
|
396
|
-
# always advance
|
400
|
+
# always advance
|
397
401
|
# warn "Advnacing for unknown reason... #{@scanner.peek(4)}" unless @scanner.peek(1) =~ /[\s\n]/
|
398
402
|
warn "Warning: parser advnacing for unknown reason. Potential data-loss."
|
399
403
|
@scanner.pos = @scanner.pos + 1
|
@@ -420,7 +424,7 @@ module CombinePDF
|
|
420
424
|
|
421
425
|
raise "Unknown error - parsed data doesn't contain a cataloged object!" unless catalogs
|
422
426
|
end
|
423
|
-
case
|
427
|
+
case
|
424
428
|
when catalogs.is_a?(Array)
|
425
429
|
catalogs.each {|c| catalog_pages(c, inheritance_hash ) unless c.nil?}
|
426
430
|
when catalogs.is_a?(Hash)
|
@@ -578,4 +582,4 @@ module CombinePDF
|
|
578
582
|
# end
|
579
583
|
|
580
584
|
end
|
581
|
-
end
|
585
|
+
end
|
data/lib/combine_pdf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-rc4
|