combine_pdf 1.0.7 → 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 26b12c91a180581e3aab431bac1cb46b0d9cecb1
4
- data.tar.gz: f9fe760a66b6a8314106faab6c5b2a45a889e440
2
+ SHA256:
3
+ metadata.gz: 9e5d5d814a254c4939fa88b782ebe49057b917d81134c3fc94385eb19d814092
4
+ data.tar.gz: b43a6e98d54ca72f7f13600898bdf1d6bbe0614276d411f8575ed9c1fe54c3d0
5
5
  SHA512:
6
- metadata.gz: 72f53db4e8cb69b52fa8044bffe42acb21f683fd39afdd7bbf96a924916775fb2ac338c2fb4f5ddd23d0685d0442ec3acc9ee0416581c9f82b576e86beea6c45
7
- data.tar.gz: a555bd19c697042ee8e75742b7a6f0e14282abf650443571bef157c39d4d34d4ab70fa3b7ed7acee9aa49dcd045bacced91ea01fd58f7bb31763614debc8083c
6
+ metadata.gz: 061b5aa579f0dd243eb2602150933a00258d54cf22504ba3d15355ff6593f25f3b429e7b752d3f81172e5d3a3186f03b345e228d447236732bf20edf38336686
7
+ data.tar.gz: db6820d4af86523dd174f25885e7ab5378bdb176bff40fbb20d58203af264df2c2811a4767b41a2ccf84b3391fa9c3046bfb6efa5ed614c24f0ddf3d52439aa1
@@ -2,6 +2,18 @@
2
2
 
3
3
  ***
4
4
 
5
+ #### Change log v.1.0.8
6
+
7
+ **Fix**: Fixed an issue with octal representation in escaped string data. The issue would (usually) go unnoticed (altering internal labels in a non-disruptive manner), however the issue did effect `ColorSpace` data in the rare use of `ICCBased` color maps, causing color distortion and transparency loss.
8
+
9
+ **Fix**: Fixed an issue with non English alphabet in PDF literal strings. This issue went undetected since PDF literal strings aren't used by CombinePDF except for the date stamping...
10
+
11
+ **Fix**: Improbable, but possibly a fix for issue #127, where the JRuby interpreter would fail to pass the correct arguments to the Hash update Proc. Since I'm trying to author a workaround, I have my doubts... but an attempt is better than nothing.
12
+
13
+ **Update**: Improved parsing error handling, courtesy of Evgeny Garlukovich (@evgenygarl).
14
+
15
+ **Update**: Added reader methods for the `names` and `outlines` PDF objects in response to issue #133. Use with care.
16
+
5
17
  #### Change log v.1.0.7
6
18
 
7
19
  **Fix**: Fix an issue where page property inheritance might break PDF structure if there's a conflict between property types (inheritance using properties by reference vs. nested properties), fixing issue #124. Credit to @erikaxel for exposing the issue.
@@ -6,6 +6,8 @@
6
6
  ########################################################
7
7
 
8
8
  module CombinePDF
9
+ ParsingError = Class.new(StandardError)
10
+
9
11
  # @!visibility private
10
12
  # @private
11
13
  #:nodoc: all
@@ -77,7 +79,9 @@ module CombinePDF
77
79
  @parsed = _parse_
78
80
  # puts @parsed
79
81
 
80
- raise 'Unknown PDF parsing error - malformed PDF file?' unless (@parsed.select { |i| !i.is_a?(Hash) }).empty?
82
+ unless (@parsed.select { |i| !i.is_a?(Hash) }).empty?
83
+ raise ParsingError, 'Unknown PDF parsing error - malformed PDF file?'
84
+ end
81
85
 
82
86
  if @root_object == {}.freeze
83
87
  xref_streams = @parsed.select { |obj| obj.is_a?(Hash) && obj[:Type] == :XRef }
@@ -86,7 +90,9 @@ module CombinePDF
86
90
  end
87
91
  end
88
92
 
89
- raise 'root is unknown - cannot determine if file is Encrypted' if @root_object == {}.freeze
93
+ if @root_object == {}.freeze
94
+ raise ParsingError, 'root is unknown - cannot determine if file is Encrypted'
95
+ end
90
96
 
91
97
  if @root_object[:Encrypt]
92
98
  # change_references_to_actual_values @root_object
@@ -310,10 +316,10 @@ module CombinePDF
310
316
  when 102 # f, form-feed
311
317
  str << 12
312
318
  when 48..57 # octal notation for byte?
313
- rep = rep.chr
314
- rep += str_bytes.shift.chr if str_bytes[0].between?(48, 57)
315
- rep += str_bytes.shift.chr if str_bytes[0].between?(48, 57) && ((rep + str_bytes[0].chr).to_i <= 255)
316
- str << rep.to_i
319
+ rep -= 48
320
+ rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0].between?(48, 57)
321
+ rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0].between?(48, 57) && (((rep << 3) + (str_bytes[0] - 48)) <= 255)
322
+ str << rep
317
323
  when 10 # new line, ignore
318
324
  str_bytes.shift if str_bytes[0] == 13
319
325
  true
@@ -350,8 +356,12 @@ module CombinePDF
350
356
  # str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
351
357
  # instead, a non-strict RegExp is used:
352
358
  str = @scanner.scan_until(/endstream/)
359
+
353
360
  # raise error if the stream doesn't end.
354
- raise "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!" unless str
361
+ unless str
362
+ raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
363
+ end
364
+
355
365
  # need to remove end of stream
356
366
  if out.last.is_a? Hash
357
367
  # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
@@ -475,7 +485,9 @@ module CombinePDF
475
485
  @parsed.delete_if { |obj| obj.nil? || obj[:Type] == :Catalog }
476
486
  @parsed << catalogs
477
487
 
478
- raise "Unknown error - parsed data doesn't contain a cataloged object!" unless catalogs
488
+ unless catalogs
489
+ raise ParsingError, "Unknown error - parsed data doesn't contain a cataloged object!"
490
+ end
479
491
  end
480
492
  if catalogs.is_a?(Array)
481
493
  catalogs.each { |c| catalog_pages(c, inheritance_hash) unless c.nil? }
@@ -488,20 +500,23 @@ module CombinePDF
488
500
  end
489
501
  else
490
502
  unless catalogs[:Type] == :Page
491
- raise "Optional Content PDF files aren't supported and their pages cannot be safely extracted." if (catalogs[:AS] || catalogs[:OCProperties]) && !@allow_optional_content
503
+ if (catalogs[:AS] || catalogs[:OCProperties]) && !@allow_optional_content
504
+ raise ParsingError, "Optional Content PDF files aren't supported and their pages cannot be safely extracted."
505
+ end
506
+
492
507
  inheritance_hash[:MediaBox] = catalogs[:MediaBox] if catalogs[:MediaBox]
493
508
  inheritance_hash[:CropBox] = catalogs[:CropBox] if catalogs[:CropBox]
494
509
  inheritance_hash[:Rotate] = catalogs[:Rotate] if catalogs[:Rotate]
495
510
  if catalogs[:Resources]
496
511
  inheritance_hash[:Resources] ||= { referenced_object: {}, is_reference_only: true }.dup
497
- (inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &self.class.method(:hash_update_proc_for_old))
512
+ (inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &HASH_UPDATE_PROC_FOR_OLD)
498
513
  end
499
514
  if catalogs[:ColorSpace]
500
515
  inheritance_hash[:ColorSpace] ||= { referenced_object: {}, is_reference_only: true }.dup
501
- (inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &self.class.method(:hash_update_proc_for_old))
516
+ (inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
502
517
  end
503
- # (inheritance_hash[:Resources] ||= {}).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:Resources]
504
- # (inheritance_hash[:ColorSpace] ||= {}).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:ColorSpace]
518
+ # (inheritance_hash[:Resources] ||= {}).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Resources]
519
+ # (inheritance_hash[:ColorSpace] ||= {}).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:ColorSpace]
505
520
 
506
521
  # inheritance_hash[:Order] = catalogs[:Order] if catalogs[:Order]
507
522
  # inheritance_hash[:OCProperties] = catalogs[:OCProperties] if catalogs[:OCProperties]
@@ -517,14 +532,14 @@ module CombinePDF
517
532
  if inheritance_hash[:Resources]
518
533
  catalogs[:Resources] ||= { referenced_object: {}, is_reference_only: true }.dup
519
534
  catalogs[:Resources] = { referenced_object: catalogs[:Resources], is_reference_only: true } unless catalogs[:Resources][:referenced_object]
520
- catalogs[:Resources][:referenced_object].update((inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]), &self.class.method(:hash_update_proc_for_old))
535
+ catalogs[:Resources][:referenced_object].update((inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]), &HASH_UPDATE_PROC_FOR_OLD)
521
536
  end
522
537
  if inheritance_hash[:ColorSpace]
523
538
  catalogs[:ColorSpace] ||= { referenced_object: {}, is_reference_only: true }.dup
524
539
  catalogs[:ColorSpace] = { referenced_object: catalogs[:ColorSpace], is_reference_only: true } unless catalogs[:ColorSpace][:referenced_object]
525
- catalogs[:ColorSpace][:referenced_object].update((inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]), &self.class.method(:hash_update_proc_for_old))
540
+ catalogs[:ColorSpace][:referenced_object].update((inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
526
541
  end
527
- # (catalogs[:ColorSpace] ||= {}).update(inheritance_hash[:ColorSpace], &self.class.method(:hash_update_proc_for_old)) if inheritance_hash[:ColorSpace]
542
+ # (catalogs[:ColorSpace] ||= {}).update(inheritance_hash[:ColorSpace], &HASH_UPDATE_PROC_FOR_OLD) if inheritance_hash[:ColorSpace]
528
543
  # catalogs[:Order] ||= inheritance_hash[:Order] if inheritance_hash[:Order]
529
544
  # catalogs[:AS] ||= inheritance_hash[:AS] if inheritance_hash[:AS]
530
545
  # catalogs[:OCProperties] ||= inheritance_hash[:OCProperties] if inheritance_hash[:OCProperties]
@@ -538,9 +553,9 @@ module CombinePDF
538
553
  when :Pages
539
554
  catalog_pages(catalogs[:Kids], inheritance_hash.dup) unless catalogs[:Kids].nil?
540
555
  when :Catalog
541
- @forms_object.update((catalogs[:AcroForm][:referenced_object] || catalogs[:AcroForm]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:AcroForm]
542
- @names_object.update((catalogs[:Names][:referenced_object] || catalogs[:Names]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:Names]
543
- @outlines_object.update((catalogs[:Outlines][:referenced_object] || catalogs[:Outlines]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:Outlines]
556
+ @forms_object.update((catalogs[:AcroForm][:referenced_object] || catalogs[:AcroForm]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:AcroForm]
557
+ @names_object.update((catalogs[:Names][:referenced_object] || catalogs[:Names]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Names]
558
+ @outlines_object.update((catalogs[:Outlines][:referenced_object] || catalogs[:Outlines]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Outlines]
544
559
  if catalogs[:Dests] # convert PDF 1.1 Dests to PDF 1.2+ Dests
545
560
  dests_arry = (@names_object[:Dests] ||= {})
546
561
  dests_arry = ((dests_arry[:referenced_object] || dests_arry)[:Names] ||= [])
@@ -654,30 +669,45 @@ module CombinePDF
654
669
 
655
670
  # All Strings are one String
656
671
  def unify_string(str)
672
+ str.force_encoding(Encoding::ASCII_8BIT)
657
673
  @strings_dictionary[str] ||= str
658
674
  end
659
675
 
660
676
  # @private
661
677
  # this method reviews a Hash and updates it by merging Hash data,
662
678
  # preffering the old over the new.
663
- def self.hash_update_proc_for_old(_key, old_data, new_data)
679
+ HASH_UPDATE_PROC_FOR_OLD = Proc.new do |_key, old_data, new_data|
664
680
  if old_data.is_a? Hash
665
- old_data.merge(new_data, &method(:hash_update_proc_for_old))
681
+ old_data.merge(new_data, &HASH_UPDATE_PROC_FOR_OLD)
666
682
  else
667
683
  old_data
668
684
  end
669
685
  end
686
+ # def self.hash_update_proc_for_old(_key, old_data, new_data)
687
+ # if old_data.is_a? Hash
688
+ # old_data.merge(new_data, &method(:hash_update_proc_for_old))
689
+ # else
690
+ # old_data
691
+ # end
692
+ # end
670
693
 
671
694
  # @private
672
695
  # this method reviews a Hash an updates it by merging Hash data,
673
696
  # preffering the new over the old.
674
- def self.hash_update_proc_for_new(_key, old_data, new_data)
697
+ HASH_UPDATE_PROC_FOR_NEW = Proc.new do |_key, old_data, new_data|
675
698
  if old_data.is_a? Hash
676
- old_data.merge(new_data, &method(:hash_update_proc_for_new))
699
+ old_data.merge(new_data, &HASH_UPDATE_PROC_FOR_NEW)
677
700
  else
678
701
  new_data
679
702
  end
680
703
  end
704
+ # def self.hash_update_proc_for_new(_key, old_data, new_data)
705
+ # if old_data.is_a? Hash
706
+ # old_data.merge(new_data, &method(:hash_update_proc_for_new))
707
+ # else
708
+ # new_data
709
+ # end
710
+ # end
681
711
 
682
712
  # # run block of code on evey PDF object (PDF objects are class Hash)
683
713
  # def each_object(object, limit_references = true, already_visited = {}, &block)
@@ -137,11 +137,14 @@ module CombinePDF
137
137
  catalog_object
138
138
  end
139
139
 
140
+ # Deprecation Notice
140
141
  def names_object
142
+ puts "CombinePDF Deprecation Notice: the protected method `names_object` will be deprecated in the upcoming version. Use `names` instead."
141
143
  @names
142
144
  end
143
145
 
144
146
  def outlines_object
147
+ puts "CombinePDF Deprecation Notice: the protected method `outlines_object` will be deprecated in the upcoming version. Use `oulines` instead."
145
148
  @outlines
146
149
  end
147
150
  # def forms_data
@@ -82,6 +82,10 @@ module CombinePDF
82
82
  # use, for example:
83
83
  # pdf.viewer_preferences[:HideMenubar] = true
84
84
  attr_reader :viewer_preferences
85
+ # Access the Outlines PDF object Hash (or reference). Use with care.
86
+ attr_reader :outlines
87
+ # Access the Names PDF object Hash (or reference). Use with care.
88
+ attr_reader :names
85
89
 
86
90
  def initialize(parser = nil)
87
91
  # default before setting
@@ -207,7 +211,7 @@ module CombinePDF
207
211
  # when finished, remove the numbering system and keep only pointers
208
212
  remove_old_ids
209
213
  # output the pdf stream
210
- out.join("\n").force_encoding(Encoding::ASCII_8BIT)
214
+ out.join("\n".force_encoding(Encoding::ASCII_8BIT)).force_encoding(Encoding::ASCII_8BIT)
211
215
  end
212
216
 
213
217
  # this method returns all the pages cataloged in the catalog.
@@ -302,8 +306,8 @@ module CombinePDF
302
306
  if data.is_a? PDF
303
307
  @version = [@version, data.version].max
304
308
  pages_to_add = data.pages
305
- actual_value(@names ||= {}.dup).update actual_value(data.names_object), &self.class.method(:hash_merge_new_no_page)
306
- merge_outlines((@outlines ||= {}.dup), data.outlines_object, location) unless actual_value(data.outlines_object).empty?
309
+ actual_value(@names ||= {}.dup).update data.names, &self.class.method(:hash_merge_new_no_page)
310
+ merge_outlines((@outlines ||= {}.dup), actual_value(data.outlines), location) unless actual_value(data.outlines).empty?
307
311
  if actual_value(@forms_data)
308
312
  actual_value(@forms_data).update actual_value(data.forms_data), &self.class.method(:hash_merge_new_no_page) if data.forms_data
309
313
  else
@@ -29,25 +29,30 @@ module CombinePDF
29
29
  end
30
30
  end
31
31
 
32
- STRING_REPLACEMENT_HASH = { "\x0A" => '\\n',
33
- "\x0D" => '\\r',
34
- "\x09" => '\\t',
35
- "\x08" => '\\b',
36
- "\x0C" => '\\f', # form-feed (\f) == 0x0C
37
- "\x28" => '\\(',
38
- "\x29" => '\\)',
39
- "\x5C" => '\\\\' }.dup
40
- 32.times { |i| STRING_REPLACEMENT_HASH[i.chr] ||= "\\#{i}" }
41
- (256 - 127).times { |i| STRING_REPLACEMENT_HASH[(i + 127).chr] ||= "\\#{i + 127}" }
32
+ STRING_REPLACEMENT_ARRAY = []
33
+ 256.times {|i| STRING_REPLACEMENT_ARRAY[i] = [i]}
34
+ 8.times { |i| STRING_REPLACEMENT_ARRAY[i] = "\\00#{i.to_s(8)}".bytes.to_a }
35
+ 24.times { |i| STRING_REPLACEMENT_ARRAY[i + 7] = "\\0#{i.to_s(8)}".bytes.to_a }
36
+ (256 - 127).times { |i| STRING_REPLACEMENT_ARRAY[(i + 127)] ||= "\\#{(i + 127).to_s(8)}".bytes.to_a }
37
+ STRING_REPLACEMENT_ARRAY[0x0A] = '\\n'.bytes.to_a
38
+ STRING_REPLACEMENT_ARRAY[0x0D] = '\\r'.bytes.to_a
39
+ STRING_REPLACEMENT_ARRAY[0x09] = '\\t'.bytes.to_a
40
+ STRING_REPLACEMENT_ARRAY[0x08] = '\\b'.bytes.to_a
41
+ STRING_REPLACEMENT_ARRAY[0x0C] = '\\f'.bytes.to_a # form-feed (\f) == 0x0C
42
+ STRING_REPLACEMENT_ARRAY[0x28] = '\\('.bytes.to_a
43
+ STRING_REPLACEMENT_ARRAY[0x29] = '\\)'.bytes.to_a
44
+ STRING_REPLACEMENT_ARRAY[0x5C] = '\\\\'.bytes.to_a
42
45
 
43
46
  def format_string_to_pdf(object)
47
+ obj_bytes = object.bytes.to_a
44
48
  # object.force_encoding(Encoding::ASCII_8BIT)
45
- if !object.match(/[^D\:\d\+\-Z\']/) # if format is set to Literal and string isn't a date
46
- ('(' + ([].tap { |out| object.bytes.to_a.each { |byte| STRING_REPLACEMENT_HASH[byte.chr] ? (STRING_REPLACEMENT_HASH[byte.chr].bytes.each { |b| out << b }) : out << byte } }).pack('C*') + ')').force_encoding(Encoding::ASCII_8BIT)
47
- else
49
+ if object.length == 0 || obj_bytes.min <= 31 || obj_bytes.max >= 127 # || (obj_bytes[0] != 68 object.match(/[^D\:\d\+\-Z\']/))
48
50
  # A hexadecimal string shall be written as a sequence of hexadecimal digits (0–9 and either A–F or a–f)
49
51
  # encoded as ASCII characters and enclosed within angle brackets (using LESS-THAN SIGN (3Ch) and GREATER- THAN SIGN (3Eh)).
50
52
  "<#{object.unpack('H*')[0]}>".force_encoding(Encoding::ASCII_8BIT)
53
+ else
54
+ # a good fit for a Literal String or the string is a date (MUST be literal)
55
+ ('(' + ([].tap { |out| obj_bytes.each { |byte| out.concat(STRING_REPLACEMENT_ARRAY[byte]) } } ).pack('C*') + ')').force_encoding(Encoding::ASCII_8BIT)
51
56
  end
52
57
  end
53
58
 
@@ -1,3 +1,3 @@
1
1
  module CombinePDF
2
- VERSION = '1.0.7'.freeze
2
+ VERSION = '1.0.8'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
4
+ version: 1.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-02 00:00:00.000000000 Z
11
+ date: 2018-02-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-rc4
@@ -104,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
104
  version: '0'
105
105
  requirements: []
106
106
  rubyforge_project:
107
- rubygems_version: 2.6.11
107
+ rubygems_version: 2.7.3
108
108
  signing_key:
109
109
  specification_version: 4
110
110
  summary: Combine, stamp and watermark PDF files in pure Ruby.