combine_pdf 0.2.11 → 0.2.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -2
- data/lib/combine_pdf/parser.rb +38 -9
- data/lib/combine_pdf/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f66dc648f9871931fd703730cb5f876d41f17f95
|
4
|
+
data.tar.gz: f0e29ec0a4fdc4950c5f85f48ec5ea7eb556c03d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fefebcb9e77a306f4099e91f05fc60efa2596b33afc2ff484562d82cda06494d86e11349c00e6724ad5978b4795cf999236fa273d933df25a93e9975f91e9e65
|
7
|
+
data.tar.gz: 8f2e46ba6d0b3f136d80a38b95d85b6271b433216fa59faea82bf668835a356b65399d87a35fda7e095e3d03d210768fb2b659df3546ddafb4df734fbc8517a7
|
data/CHANGELOG.md
CHANGED
@@ -2,12 +2,16 @@
|
|
2
2
|
|
3
3
|
***
|
4
4
|
|
5
|
+
Change log v.0.2.12
|
6
|
+
|
7
|
+
**Compatability**: fixed issue #36 reported by @vitstradal (thank you for reporting!) regarding PDF files composed by PaperPort. PaperPort (at least version 12) has an issue where PDF data will be placed within a PDF comment. PDF comments start with a "%" sign and end with an EOL marker ("\r" or "\n"). PaperPort ommitted the EOL marker, placing critical data within the comment. A work-around was found by parsing the comment's data and attempting to salvage the misplaced data. This workaround assumes that comments would not contain PDF parsable data at the very end of the comment's line... which is an unsafe assumption. hence, **please let me know if you find _any_ PDF files that worked before the workaround was introduced**.
|
8
|
+
|
9
|
+
***
|
10
|
+
|
5
11
|
Change log v.0.2.11
|
6
12
|
|
7
13
|
**Fix**: fix for issue #35 , which was caused by the broken fix for issue #34. Credit to Davek Rupinski for pointing out the issue.
|
8
14
|
|
9
|
-
**Fix**: fixed an issue with data and (file) sending, introduced when extending the `send_data` method to allow for big file uploads.
|
10
|
-
|
11
15
|
***
|
12
16
|
|
13
17
|
Change log v.0.2.10
|
data/lib/combine_pdf/parser.rb
CHANGED
@@ -67,10 +67,15 @@ module CombinePDF
|
|
67
67
|
@scanner.pos = 0
|
68
68
|
if @scanner.scan /\%PDF\-[\d\-\.]+/
|
69
69
|
@version = @scanner.matched.scan(/[\d\.]+/)[0].to_f
|
70
|
-
|
71
|
-
|
70
|
+
loop do
|
71
|
+
break unless @scanner.scan(/[^\d\r\n]+/)
|
72
|
+
break if @scanner.check(/([\d]+[\s]+[\d]+[\s]+obj[\n\r\s]+\<\<)|([\n\r]+)/)
|
73
|
+
break if @scanner.eos?
|
74
|
+
@scanner.pos += 1
|
75
|
+
end
|
72
76
|
end
|
73
77
|
@parsed = _parse_
|
78
|
+
# puts @parsed
|
74
79
|
|
75
80
|
raise "Unknown PDF parsing error - maleformed PDF file?" unless (@parsed.select {|i| !i.is_a?(Hash)}).empty?
|
76
81
|
|
@@ -151,7 +156,20 @@ module CombinePDF
|
|
151
156
|
def _parse_
|
152
157
|
out = []
|
153
158
|
str = ''
|
159
|
+
fresh = true
|
154
160
|
while @scanner.rest? do
|
161
|
+
# last ||= 0
|
162
|
+
# out.last.tap do |o|
|
163
|
+
# if o.is_a?(Hash)
|
164
|
+
# puts "[#{@scanner.pos}] Parser has a Dictionary (#{o.class.name}) with data:"
|
165
|
+
# o.each do |k, v|
|
166
|
+
# puts " #{k}: is #{v.class.name} with data: #{v.to_s[0..4]}#{"..." if v.to_s.length > 5}"
|
167
|
+
# end
|
168
|
+
# else
|
169
|
+
# puts "[#{@scanner.pos}] Parser has #{o.class.name} with data: #{o.to_s[0..4]}#{"..." if o.to_s.length > 5}"
|
170
|
+
# end
|
171
|
+
# puts "next is #{@scanner.peek 8}"
|
172
|
+
# end unless (last == out.count) || (-1 == (last = out.count))
|
155
173
|
case
|
156
174
|
##########################################
|
157
175
|
## parse an Array
|
@@ -174,7 +192,8 @@ module CombinePDF
|
|
174
192
|
##########################################
|
175
193
|
## parse a Stream
|
176
194
|
##########################################
|
177
|
-
when @scanner.scan(/stream[\r
|
195
|
+
when @scanner.scan(/stream[\r\n]/)
|
196
|
+
@scanner.pos += 1 if @scanner.peek(1) == "\n".freeze && @scanner.matched[-1] != "\n".freeze
|
178
197
|
# the following was dicarded because some PDF files didn't have an EOL marker as required
|
179
198
|
# str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
|
180
199
|
# instead, a non-strict RegExp is used:
|
@@ -199,6 +218,8 @@ module CombinePDF
|
|
199
218
|
else
|
200
219
|
out << {indirect_without_dictionary: out.pop, indirect_generation_number: out.pop, indirect_reference_id: out.pop}
|
201
220
|
end
|
221
|
+
fresh = true
|
222
|
+
# puts "!!!!!!!!! Error with :indirect_reference_id\n\nObject #{out.last} :indirect_reference_id = #{out.last[:indirect_reference_id]}" unless out.last[:indirect_reference_id].is_a?(Fixnum)
|
202
223
|
##########################################
|
203
224
|
## parse a Hex String
|
204
225
|
##########################################
|
@@ -293,7 +314,13 @@ module CombinePDF
|
|
293
314
|
##########################################
|
294
315
|
when str = @scanner.scan(/\%/)
|
295
316
|
#is a comment, skip until new line
|
296
|
-
|
317
|
+
loop do
|
318
|
+
break unless @scanner.scan(/[^\d\r\n]+/)
|
319
|
+
break if @scanner.check(/([\d]+ [\d]+ obj)?[\n\r]+/)
|
320
|
+
break if @scanner.eos?
|
321
|
+
@scanner.pos += 1
|
322
|
+
end
|
323
|
+
# puts "AFTER COMMENT: #{@scanner.peek 8}"
|
297
324
|
##########################################
|
298
325
|
## Parse a Name
|
299
326
|
##########################################
|
@@ -333,7 +360,7 @@ module CombinePDF
|
|
333
360
|
##########
|
334
361
|
## get root object to check for encryption
|
335
362
|
@scanner.scan_until(/(trailer)|(\%EOF)/)
|
336
|
-
|
363
|
+
fresh = true
|
337
364
|
if @scanner.matched[-1] == 'r'
|
338
365
|
if @scanner.skip_until(/<</)
|
339
366
|
data = _parse_
|
@@ -350,10 +377,10 @@ module CombinePDF
|
|
350
377
|
nil
|
351
378
|
when @scanner.scan(/obj[\s]*/)
|
352
379
|
# Fix wkhtmltopdf PDF authoring issue - missing 'endobj' keywords
|
353
|
-
unless out[-4].nil? || out[-4].is_a?(Hash)
|
380
|
+
unless fresh || (out[-4].nil? || out[-4].is_a?(Hash))
|
354
381
|
keep = []
|
355
|
-
keep << out.pop
|
356
|
-
keep << out.pop
|
382
|
+
keep << out.pop # .tap {|i| puts "#{i} is an ID"}
|
383
|
+
keep << out.pop # .tap {|i| puts "#{i} is a REF"}
|
357
384
|
|
358
385
|
if out.last.is_a? Hash
|
359
386
|
out << out.pop.merge({indirect_generation_number: out.pop, indirect_reference_id: out.pop})
|
@@ -365,9 +392,11 @@ module CombinePDF
|
|
365
392
|
out << keep.pop
|
366
393
|
out << keep.pop
|
367
394
|
end
|
395
|
+
fresh = false
|
368
396
|
else
|
369
397
|
# always advance
|
370
|
-
# warn "Advnacing for unknown reason..."
|
398
|
+
# warn "Advnacing for unknown reason... #{@scanner.peek(4)}" unless @scanner.peek(1) =~ /[\s\n]/
|
399
|
+
warn "Warning: parser advnacing for unknown reason. Potential data-loss."
|
371
400
|
@scanner.pos = @scanner.pos + 1
|
372
401
|
end
|
373
402
|
end
|
data/lib/combine_pdf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-rc4
|