combine_pdf 0.2.11 → 0.2.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4ae077f6437bdf78fe092fef2b738da12e6fb9bc
4
- data.tar.gz: eca7b7e5c9c4f0f5b5c77d6d6904aed94e0c348a
3
+ metadata.gz: f66dc648f9871931fd703730cb5f876d41f17f95
4
+ data.tar.gz: f0e29ec0a4fdc4950c5f85f48ec5ea7eb556c03d
5
5
  SHA512:
6
- metadata.gz: 2b7bec4b0cdeb0a3764d0ee0d7a8d8576c8a01c29fb2dfcbfb8d06d64bfe633dfe0aa9da9801e630a59754d621a3b00383df9be3d8a4ec9247c3c47b55a4623f
7
- data.tar.gz: 82d77ada70dcf80ca0a4354dd6c9254f56c4ebaa637d0e875a1cb2621829a62c6cdcbe11a8afb421e40da0f6b0b0cc26938c6e7a8294cd8832e2ee2658bc285c
6
+ metadata.gz: fefebcb9e77a306f4099e91f05fc60efa2596b33afc2ff484562d82cda06494d86e11349c00e6724ad5978b4795cf999236fa273d933df25a93e9975f91e9e65
7
+ data.tar.gz: 8f2e46ba6d0b3f136d80a38b95d85b6271b433216fa59faea82bf668835a356b65399d87a35fda7e095e3d03d210768fb2b659df3546ddafb4df734fbc8517a7
data/CHANGELOG.md CHANGED
@@ -2,12 +2,16 @@
2
2
 
3
3
  ***
4
4
 
5
+ Change log v.0.2.12
6
+
7
+ **Compatability**: fixed issue #36 reported by @vitstradal (thank you for reporting!) regarding PDF files composed by PaperPort. PaperPort (at least version 12) has an issue where PDF data will be placed within a PDF comment. PDF comments start with a "%" sign and end with an EOL marker ("\r" or "\n"). PaperPort ommitted the EOL marker, placing critical data within the comment. A work-around was found by parsing the comment's data and attempting to salvage the misplaced data. This workaround assumes that comments would not contain PDF parsable data at the very end of the comment's line... which is an unsafe assumption. hence, **please let me know if you find _any_ PDF files that worked before the workaround was introduced**.
8
+
9
+ ***
10
+
5
11
  Change log v.0.2.11
6
12
 
7
13
  **Fix**: fix for issue #35 , which was caused by the broken fix for issue #34. Credit to Davek Rupinski for pointing out the issue.
8
14
 
9
- **Fix**: fixed an issue with data and (file) sending, introduced when extending the `send_data` method to allow for big file uploads.
10
-
11
15
  ***
12
16
 
13
17
  Change log v.0.2.10
@@ -67,10 +67,15 @@ module CombinePDF
67
67
  @scanner.pos = 0
68
68
  if @scanner.scan /\%PDF\-[\d\-\.]+/
69
69
  @version = @scanner.matched.scan(/[\d\.]+/)[0].to_f
70
- @scanner.skip_until /[\n\r]+/
71
- # @scanner.skip /[^\d]*/
70
+ loop do
71
+ break unless @scanner.scan(/[^\d\r\n]+/)
72
+ break if @scanner.check(/([\d]+[\s]+[\d]+[\s]+obj[\n\r\s]+\<\<)|([\n\r]+)/)
73
+ break if @scanner.eos?
74
+ @scanner.pos += 1
75
+ end
72
76
  end
73
77
  @parsed = _parse_
78
+ # puts @parsed
74
79
 
75
80
  raise "Unknown PDF parsing error - maleformed PDF file?" unless (@parsed.select {|i| !i.is_a?(Hash)}).empty?
76
81
 
@@ -151,7 +156,20 @@ module CombinePDF
151
156
  def _parse_
152
157
  out = []
153
158
  str = ''
159
+ fresh = true
154
160
  while @scanner.rest? do
161
+ # last ||= 0
162
+ # out.last.tap do |o|
163
+ # if o.is_a?(Hash)
164
+ # puts "[#{@scanner.pos}] Parser has a Dictionary (#{o.class.name}) with data:"
165
+ # o.each do |k, v|
166
+ # puts " #{k}: is #{v.class.name} with data: #{v.to_s[0..4]}#{"..." if v.to_s.length > 5}"
167
+ # end
168
+ # else
169
+ # puts "[#{@scanner.pos}] Parser has #{o.class.name} with data: #{o.to_s[0..4]}#{"..." if o.to_s.length > 5}"
170
+ # end
171
+ # puts "next is #{@scanner.peek 8}"
172
+ # end unless (last == out.count) || (-1 == (last = out.count))
155
173
  case
156
174
  ##########################################
157
175
  ## parse an Array
@@ -174,7 +192,8 @@ module CombinePDF
174
192
  ##########################################
175
193
  ## parse a Stream
176
194
  ##########################################
177
- when @scanner.scan(/stream[\r]?[\n]/)
195
+ when @scanner.scan(/stream[\r\n]/)
196
+ @scanner.pos += 1 if @scanner.peek(1) == "\n".freeze && @scanner.matched[-1] != "\n".freeze
178
197
  # the following was dicarded because some PDF files didn't have an EOL marker as required
179
198
  # str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
180
199
  # instead, a non-strict RegExp is used:
@@ -199,6 +218,8 @@ module CombinePDF
199
218
  else
200
219
  out << {indirect_without_dictionary: out.pop, indirect_generation_number: out.pop, indirect_reference_id: out.pop}
201
220
  end
221
+ fresh = true
222
+ # puts "!!!!!!!!! Error with :indirect_reference_id\n\nObject #{out.last} :indirect_reference_id = #{out.last[:indirect_reference_id]}" unless out.last[:indirect_reference_id].is_a?(Fixnum)
202
223
  ##########################################
203
224
  ## parse a Hex String
204
225
  ##########################################
@@ -293,7 +314,13 @@ module CombinePDF
293
314
  ##########################################
294
315
  when str = @scanner.scan(/\%/)
295
316
  #is a comment, skip until new line
296
- @scanner.skip_until /[\n\r]+/
317
+ loop do
318
+ break unless @scanner.scan(/[^\d\r\n]+/)
319
+ break if @scanner.check(/([\d]+ [\d]+ obj)?[\n\r]+/)
320
+ break if @scanner.eos?
321
+ @scanner.pos += 1
322
+ end
323
+ # puts "AFTER COMMENT: #{@scanner.peek 8}"
297
324
  ##########################################
298
325
  ## Parse a Name
299
326
  ##########################################
@@ -333,7 +360,7 @@ module CombinePDF
333
360
  ##########
334
361
  ## get root object to check for encryption
335
362
  @scanner.scan_until(/(trailer)|(\%EOF)/)
336
-
363
+ fresh = true
337
364
  if @scanner.matched[-1] == 'r'
338
365
  if @scanner.skip_until(/<</)
339
366
  data = _parse_
@@ -350,10 +377,10 @@ module CombinePDF
350
377
  nil
351
378
  when @scanner.scan(/obj[\s]*/)
352
379
  # Fix wkhtmltopdf PDF authoring issue - missing 'endobj' keywords
353
- unless out[-4].nil? || out[-4].is_a?(Hash)
380
+ unless fresh || (out[-4].nil? || out[-4].is_a?(Hash))
354
381
  keep = []
355
- keep << out.pop
356
- keep << out.pop
382
+ keep << out.pop # .tap {|i| puts "#{i} is an ID"}
383
+ keep << out.pop # .tap {|i| puts "#{i} is a REF"}
357
384
 
358
385
  if out.last.is_a? Hash
359
386
  out << out.pop.merge({indirect_generation_number: out.pop, indirect_reference_id: out.pop})
@@ -365,9 +392,11 @@ module CombinePDF
365
392
  out << keep.pop
366
393
  out << keep.pop
367
394
  end
395
+ fresh = false
368
396
  else
369
397
  # always advance
370
- # warn "Advnacing for unknown reason..."
398
+ # warn "Advnacing for unknown reason... #{@scanner.peek(4)}" unless @scanner.peek(1) =~ /[\s\n]/
399
+ warn "Warning: parser advnacing for unknown reason. Potential data-loss."
371
400
  @scanner.pos = @scanner.pos + 1
372
401
  end
373
402
  end
@@ -1,4 +1,4 @@
1
1
  module CombinePDF
2
- VERSION = "0.2.11"
2
+ VERSION = "0.2.12"
3
3
  end
4
4
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.11
4
+ version: 0.2.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-06 00:00:00.000000000 Z
11
+ date: 2015-11-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-rc4