combine_pdf 1.0.20 → 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b482b2bf36c858f6bb603a66536ee17682b21bc78381fa86a115a4e437f9160c
4
- data.tar.gz: 29ab4a06d5ec597b806feeb346a2e972e98566442606f6e3e765348e5fee8ea4
3
+ metadata.gz: 6ee7f4bb8ca8b2a51a7f42251d9a3faa6c1d91d19ed3d91db8ce470a1b1995a9
4
+ data.tar.gz: 5be4868dcb9f1b834fc12e5316d5a46b6fee97104c8509cbcdb13908b499d79f
5
5
  SHA512:
6
- metadata.gz: 615c0917cab8ad406d1eb61a81528656e596bc91f88ec96d8662446496bf9e465df917f3fcf1a9aa9ba8b3057e7ec8b64dc7a0799ca53597d3d071e3afc1fd36
7
- data.tar.gz: 045ef9bc9ab9a29a1665df621f1f9f41e2744f6b285b68f6c76ec64e05493f29b423cfb1433c903267e5893d53de71558bf677912551e13fe86ccb66e827f89e
6
+ metadata.gz: 751d04258815d32076eccc5bb2905cfed641b2942721900dfa5826b0cf902a35bbf60178501a81617ce0979b1be87554666320423c4066b375dced68927e8aea
7
+ data.tar.gz: 7171c0c987646b6336984fff497fa40f02ae7b6079cd8a29b58cb7659c1b527179b44f0c7210edeb63dcc931c634320ae93d0513579852ee28beca23896a085e
@@ -2,6 +2,9 @@
2
2
 
3
3
  ***
4
4
 
5
+ #### Change log v.1.0.21
6
+
7
+ **Fix**: possible fix for issue #184, where nested PDF files within an object stream could break the parser. Credit to Greg Sparrow (@hazelsparrow) for exposng the issue.
5
8
 
6
9
  #### Change log v.1.0.20
7
10
 
@@ -358,25 +358,35 @@ module CombinePDF
358
358
  ##########################################
359
359
  elsif @scanner.scan(/stream[ \t]*[\r\n]/)
360
360
  @scanner.pos += 1 if @scanner.peek(1) == "\n".freeze && @scanner.matched[-1] != "\n".freeze
361
+ # advance by the publshed stream length (if any)
362
+ old_pos = @scanner.pos
363
+ if(out.last.is_a?(Hash) && out.last[:Length].is_a?(Integer) && out.last[:Length].to_i > 2)
364
+ @scanner.pos += out.last[:Length].to_i - 2
365
+ end
366
+
361
367
  # the following was dicarded because some PDF files didn't have an EOL marker as required
362
368
  # str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
363
369
  # instead, a non-strict RegExp is used:
364
- str = @scanner.scan_until(/endstream/)
370
+
365
371
 
366
372
  # raise error if the stream doesn't end.
367
- unless str
373
+ unless @scanner.skip_until(/endstream/)
368
374
  raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
369
375
  end
376
+ length = @scanner.pos - (old_pos + 9)
377
+ length = 0 if(length < 0)
378
+ length -= 1 if(@scanner.string[old_pos + length - 1] == "\n")
379
+ length -= 1 if(@scanner.string[old_pos + length - 1] == "\r")
380
+ str = (length > 0) ? @scanner.string.slice(old_pos, length) : ''
370
381
 
371
382
  # warn "CombinePDF parser: detected Stream #{str.length} bytes long #{str[0..3]}...#{str[-4..-1]}"
372
383
 
373
384
  # need to remove end of stream
374
385
  if out.last.is_a? Hash
375
- # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
376
- out.last[:raw_stream_content] = unify_string str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
386
+ out.last[:raw_stream_content] = unify_string str.force_encoding(Encoding::ASCII_8BIT)
377
387
  else
378
388
  warn 'Stream not attached to dictionary!'
379
- out << str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
389
+ out << str.force_encoding(Encoding::ASCII_8BIT)
380
390
  end
381
391
  ##########################################
382
392
  ## parse an Object after finished
@@ -1,3 +1,3 @@
1
1
  module CombinePDF
2
- VERSION = '1.0.20'.freeze
2
+ VERSION = '1.0.21'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.20
4
+ version: 1.0.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-07 00:00:00.000000000 Z
11
+ date: 2020-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-rc4