combine_pdf 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 26b12c91a180581e3aab431bac1cb46b0d9cecb1
4
- data.tar.gz: f9fe760a66b6a8314106faab6c5b2a45a889e440
2
+ SHA256:
3
+ metadata.gz: 9e5d5d814a254c4939fa88b782ebe49057b917d81134c3fc94385eb19d814092
4
+ data.tar.gz: b43a6e98d54ca72f7f13600898bdf1d6bbe0614276d411f8575ed9c1fe54c3d0
5
5
  SHA512:
6
- metadata.gz: 72f53db4e8cb69b52fa8044bffe42acb21f683fd39afdd7bbf96a924916775fb2ac338c2fb4f5ddd23d0685d0442ec3acc9ee0416581c9f82b576e86beea6c45
7
- data.tar.gz: a555bd19c697042ee8e75742b7a6f0e14282abf650443571bef157c39d4d34d4ab70fa3b7ed7acee9aa49dcd045bacced91ea01fd58f7bb31763614debc8083c
6
+ metadata.gz: 061b5aa579f0dd243eb2602150933a00258d54cf22504ba3d15355ff6593f25f3b429e7b752d3f81172e5d3a3186f03b345e228d447236732bf20edf38336686
7
+ data.tar.gz: db6820d4af86523dd174f25885e7ab5378bdb176bff40fbb20d58203af264df2c2811a4767b41a2ccf84b3391fa9c3046bfb6efa5ed614c24f0ddf3d52439aa1
@@ -2,6 +2,18 @@
2
2
 
3
3
  ***
4
4
 
5
+ #### Change log v.1.0.8
6
+
7
+ **Fix**: Fixed an issue with octal representation in escaped string data. The issue would (usually) go unnoticed (altering internal labels in a non-disruptive manner), however the issue did effect `ColorSpace` data in the rare use of `ICCBased` color maps, causing color distortion and transparency loss.
8
+
9
+ **Fix**: Fixed an issue with non English alphabet in PDF literal strings. This issue went undetected since PDF literal strings aren't used by CombinePDF except for the date stamping...
10
+
11
+ **Fix**: Improbable, but possibly a fix for issue #127, where the JRuby interpreter would fail to pass the correct arguments to the Hash update Proc. Since I'm trying to author a workaround, I have my doubts... but an attempt is better than nothing.
12
+
13
+ **Update**: Improved parsing error handling, courtesy of Evgeny Garlukovich (@evgenygarl).
14
+
15
+ **Update**: Added reader methods for the `names` and `outlines` PDF objects in response to issue #133. Use with care.
16
+
5
17
  #### Change log v.1.0.7
6
18
 
7
19
  **Fix**: Fix an issue where page property inheritance might break PDF structure if there's a conflict between property types (inheritance using properties by reference vs. nested properties), fixing issue #124. Credit to @erikaxel for exposing the issue.
@@ -6,6 +6,8 @@
6
6
  ########################################################
7
7
 
8
8
  module CombinePDF
9
+ ParsingError = Class.new(StandardError)
10
+
9
11
  # @!visibility private
10
12
  # @private
11
13
  #:nodoc: all
@@ -77,7 +79,9 @@ module CombinePDF
77
79
  @parsed = _parse_
78
80
  # puts @parsed
79
81
 
80
- raise 'Unknown PDF parsing error - malformed PDF file?' unless (@parsed.select { |i| !i.is_a?(Hash) }).empty?
82
+ unless (@parsed.select { |i| !i.is_a?(Hash) }).empty?
83
+ raise ParsingError, 'Unknown PDF parsing error - malformed PDF file?'
84
+ end
81
85
 
82
86
  if @root_object == {}.freeze
83
87
  xref_streams = @parsed.select { |obj| obj.is_a?(Hash) && obj[:Type] == :XRef }
@@ -86,7 +90,9 @@ module CombinePDF
86
90
  end
87
91
  end
88
92
 
89
- raise 'root is unknown - cannot determine if file is Encrypted' if @root_object == {}.freeze
93
+ if @root_object == {}.freeze
94
+ raise ParsingError, 'root is unknown - cannot determine if file is Encrypted'
95
+ end
90
96
 
91
97
  if @root_object[:Encrypt]
92
98
  # change_references_to_actual_values @root_object
@@ -310,10 +316,10 @@ module CombinePDF
310
316
  when 102 # f, form-feed
311
317
  str << 12
312
318
  when 48..57 # octal notation for byte?
313
- rep = rep.chr
314
- rep += str_bytes.shift.chr if str_bytes[0].between?(48, 57)
315
- rep += str_bytes.shift.chr if str_bytes[0].between?(48, 57) && ((rep + str_bytes[0].chr).to_i <= 255)
316
- str << rep.to_i
319
+ rep -= 48
320
+ rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0].between?(48, 57)
321
+ rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0].between?(48, 57) && (((rep << 3) + (str_bytes[0] - 48)) <= 255)
322
+ str << rep
317
323
  when 10 # new line, ignore
318
324
  str_bytes.shift if str_bytes[0] == 13
319
325
  true
@@ -350,8 +356,12 @@ module CombinePDF
350
356
  # str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
351
357
  # instead, a non-strict RegExp is used:
352
358
  str = @scanner.scan_until(/endstream/)
359
+
353
360
  # raise error if the stream doesn't end.
354
- raise "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!" unless str
361
+ unless str
362
+ raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
363
+ end
364
+
355
365
  # need to remove end of stream
356
366
  if out.last.is_a? Hash
357
367
  # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
@@ -475,7 +485,9 @@ module CombinePDF
475
485
  @parsed.delete_if { |obj| obj.nil? || obj[:Type] == :Catalog }
476
486
  @parsed << catalogs
477
487
 
478
- raise "Unknown error - parsed data doesn't contain a cataloged object!" unless catalogs
488
+ unless catalogs
489
+ raise ParsingError, "Unknown error - parsed data doesn't contain a cataloged object!"
490
+ end
479
491
  end
480
492
  if catalogs.is_a?(Array)
481
493
  catalogs.each { |c| catalog_pages(c, inheritance_hash) unless c.nil? }
@@ -488,20 +500,23 @@ module CombinePDF
488
500
  end
489
501
  else
490
502
  unless catalogs[:Type] == :Page
491
- raise "Optional Content PDF files aren't supported and their pages cannot be safely extracted." if (catalogs[:AS] || catalogs[:OCProperties]) && !@allow_optional_content
503
+ if (catalogs[:AS] || catalogs[:OCProperties]) && !@allow_optional_content
504
+ raise ParsingError, "Optional Content PDF files aren't supported and their pages cannot be safely extracted."
505
+ end
506
+
492
507
  inheritance_hash[:MediaBox] = catalogs[:MediaBox] if catalogs[:MediaBox]
493
508
  inheritance_hash[:CropBox] = catalogs[:CropBox] if catalogs[:CropBox]
494
509
  inheritance_hash[:Rotate] = catalogs[:Rotate] if catalogs[:Rotate]
495
510
  if catalogs[:Resources]
496
511
  inheritance_hash[:Resources] ||= { referenced_object: {}, is_reference_only: true }.dup
497
- (inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &self.class.method(:hash_update_proc_for_old))
512
+ (inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &HASH_UPDATE_PROC_FOR_OLD)
498
513
  end
499
514
  if catalogs[:ColorSpace]
500
515
  inheritance_hash[:ColorSpace] ||= { referenced_object: {}, is_reference_only: true }.dup
501
- (inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &self.class.method(:hash_update_proc_for_old))
516
+ (inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
502
517
  end
503
- # (inheritance_hash[:Resources] ||= {}).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:Resources]
504
- # (inheritance_hash[:ColorSpace] ||= {}).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:ColorSpace]
518
+ # (inheritance_hash[:Resources] ||= {}).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Resources]
519
+ # (inheritance_hash[:ColorSpace] ||= {}).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:ColorSpace]
505
520
 
506
521
  # inheritance_hash[:Order] = catalogs[:Order] if catalogs[:Order]
507
522
  # inheritance_hash[:OCProperties] = catalogs[:OCProperties] if catalogs[:OCProperties]
@@ -517,14 +532,14 @@ module CombinePDF
517
532
  if inheritance_hash[:Resources]
518
533
  catalogs[:Resources] ||= { referenced_object: {}, is_reference_only: true }.dup
519
534
  catalogs[:Resources] = { referenced_object: catalogs[:Resources], is_reference_only: true } unless catalogs[:Resources][:referenced_object]
520
- catalogs[:Resources][:referenced_object].update((inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]), &self.class.method(:hash_update_proc_for_old))
535
+ catalogs[:Resources][:referenced_object].update((inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]), &HASH_UPDATE_PROC_FOR_OLD)
521
536
  end
522
537
  if inheritance_hash[:ColorSpace]
523
538
  catalogs[:ColorSpace] ||= { referenced_object: {}, is_reference_only: true }.dup
524
539
  catalogs[:ColorSpace] = { referenced_object: catalogs[:ColorSpace], is_reference_only: true } unless catalogs[:ColorSpace][:referenced_object]
525
- catalogs[:ColorSpace][:referenced_object].update((inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]), &self.class.method(:hash_update_proc_for_old))
540
+ catalogs[:ColorSpace][:referenced_object].update((inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
526
541
  end
527
- # (catalogs[:ColorSpace] ||= {}).update(inheritance_hash[:ColorSpace], &self.class.method(:hash_update_proc_for_old)) if inheritance_hash[:ColorSpace]
542
+ # (catalogs[:ColorSpace] ||= {}).update(inheritance_hash[:ColorSpace], &HASH_UPDATE_PROC_FOR_OLD) if inheritance_hash[:ColorSpace]
528
543
  # catalogs[:Order] ||= inheritance_hash[:Order] if inheritance_hash[:Order]
529
544
  # catalogs[:AS] ||= inheritance_hash[:AS] if inheritance_hash[:AS]
530
545
  # catalogs[:OCProperties] ||= inheritance_hash[:OCProperties] if inheritance_hash[:OCProperties]
@@ -538,9 +553,9 @@ module CombinePDF
538
553
  when :Pages
539
554
  catalog_pages(catalogs[:Kids], inheritance_hash.dup) unless catalogs[:Kids].nil?
540
555
  when :Catalog
541
- @forms_object.update((catalogs[:AcroForm][:referenced_object] || catalogs[:AcroForm]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:AcroForm]
542
- @names_object.update((catalogs[:Names][:referenced_object] || catalogs[:Names]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:Names]
543
- @outlines_object.update((catalogs[:Outlines][:referenced_object] || catalogs[:Outlines]), &self.class.method(:hash_update_proc_for_new)) if catalogs[:Outlines]
556
+ @forms_object.update((catalogs[:AcroForm][:referenced_object] || catalogs[:AcroForm]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:AcroForm]
557
+ @names_object.update((catalogs[:Names][:referenced_object] || catalogs[:Names]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Names]
558
+ @outlines_object.update((catalogs[:Outlines][:referenced_object] || catalogs[:Outlines]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Outlines]
544
559
  if catalogs[:Dests] # convert PDF 1.1 Dests to PDF 1.2+ Dests
545
560
  dests_arry = (@names_object[:Dests] ||= {})
546
561
  dests_arry = ((dests_arry[:referenced_object] || dests_arry)[:Names] ||= [])
@@ -654,30 +669,45 @@ module CombinePDF
654
669
 
655
670
  # All Strings are one String
656
671
  def unify_string(str)
672
+ str.force_encoding(Encoding::ASCII_8BIT)
657
673
  @strings_dictionary[str] ||= str
658
674
  end
659
675
 
660
676
  # @private
661
677
  # this method reviews a Hash and updates it by merging Hash data,
662
678
  # preffering the old over the new.
663
- def self.hash_update_proc_for_old(_key, old_data, new_data)
679
+ HASH_UPDATE_PROC_FOR_OLD = Proc.new do |_key, old_data, new_data|
664
680
  if old_data.is_a? Hash
665
- old_data.merge(new_data, &method(:hash_update_proc_for_old))
681
+ old_data.merge(new_data, &HASH_UPDATE_PROC_FOR_OLD)
666
682
  else
667
683
  old_data
668
684
  end
669
685
  end
686
+ # def self.hash_update_proc_for_old(_key, old_data, new_data)
687
+ # if old_data.is_a? Hash
688
+ # old_data.merge(new_data, &method(:hash_update_proc_for_old))
689
+ # else
690
+ # old_data
691
+ # end
692
+ # end
670
693
 
671
694
  # @private
672
695
  # this method reviews a Hash an updates it by merging Hash data,
673
696
  # preffering the new over the old.
674
- def self.hash_update_proc_for_new(_key, old_data, new_data)
697
+ HASH_UPDATE_PROC_FOR_NEW = Proc.new do |_key, old_data, new_data|
675
698
  if old_data.is_a? Hash
676
- old_data.merge(new_data, &method(:hash_update_proc_for_new))
699
+ old_data.merge(new_data, &HASH_UPDATE_PROC_FOR_NEW)
677
700
  else
678
701
  new_data
679
702
  end
680
703
  end
704
+ # def self.hash_update_proc_for_new(_key, old_data, new_data)
705
+ # if old_data.is_a? Hash
706
+ # old_data.merge(new_data, &method(:hash_update_proc_for_new))
707
+ # else
708
+ # new_data
709
+ # end
710
+ # end
681
711
 
682
712
  # # run block of code on evey PDF object (PDF objects are class Hash)
683
713
  # def each_object(object, limit_references = true, already_visited = {}, &block)
@@ -137,11 +137,14 @@ module CombinePDF
137
137
  catalog_object
138
138
  end
139
139
 
140
+ # Deprecation Notice
140
141
  def names_object
142
+ puts "CombinePDF Deprecation Notice: the protected method `names_object` will be deprecated in the upcoming version. Use `names` instead."
141
143
  @names
142
144
  end
143
145
 
144
146
  def outlines_object
147
+ puts "CombinePDF Deprecation Notice: the protected method `outlines_object` will be deprecated in the upcoming version. Use `oulines` instead."
145
148
  @outlines
146
149
  end
147
150
  # def forms_data
@@ -82,6 +82,10 @@ module CombinePDF
82
82
  # use, for example:
83
83
  # pdf.viewer_preferences[:HideMenubar] = true
84
84
  attr_reader :viewer_preferences
85
+ # Access the Outlines PDF object Hash (or reference). Use with care.
86
+ attr_reader :outlines
87
+ # Access the Names PDF object Hash (or reference). Use with care.
88
+ attr_reader :names
85
89
 
86
90
  def initialize(parser = nil)
87
91
  # default before setting
@@ -207,7 +211,7 @@ module CombinePDF
207
211
  # when finished, remove the numbering system and keep only pointers
208
212
  remove_old_ids
209
213
  # output the pdf stream
210
- out.join("\n").force_encoding(Encoding::ASCII_8BIT)
214
+ out.join("\n".force_encoding(Encoding::ASCII_8BIT)).force_encoding(Encoding::ASCII_8BIT)
211
215
  end
212
216
 
213
217
  # this method returns all the pages cataloged in the catalog.
@@ -302,8 +306,8 @@ module CombinePDF
302
306
  if data.is_a? PDF
303
307
  @version = [@version, data.version].max
304
308
  pages_to_add = data.pages
305
- actual_value(@names ||= {}.dup).update actual_value(data.names_object), &self.class.method(:hash_merge_new_no_page)
306
- merge_outlines((@outlines ||= {}.dup), data.outlines_object, location) unless actual_value(data.outlines_object).empty?
309
+ actual_value(@names ||= {}.dup).update data.names, &self.class.method(:hash_merge_new_no_page)
310
+ merge_outlines((@outlines ||= {}.dup), actual_value(data.outlines), location) unless actual_value(data.outlines).empty?
307
311
  if actual_value(@forms_data)
308
312
  actual_value(@forms_data).update actual_value(data.forms_data), &self.class.method(:hash_merge_new_no_page) if data.forms_data
309
313
  else
@@ -29,25 +29,30 @@ module CombinePDF
29
29
  end
30
30
  end
31
31
 
32
- STRING_REPLACEMENT_HASH = { "\x0A" => '\\n',
33
- "\x0D" => '\\r',
34
- "\x09" => '\\t',
35
- "\x08" => '\\b',
36
- "\x0C" => '\\f', # form-feed (\f) == 0x0C
37
- "\x28" => '\\(',
38
- "\x29" => '\\)',
39
- "\x5C" => '\\\\' }.dup
40
- 32.times { |i| STRING_REPLACEMENT_HASH[i.chr] ||= "\\#{i}" }
41
- (256 - 127).times { |i| STRING_REPLACEMENT_HASH[(i + 127).chr] ||= "\\#{i + 127}" }
32
+ STRING_REPLACEMENT_ARRAY = []
33
+ 256.times {|i| STRING_REPLACEMENT_ARRAY[i] = [i]}
34
+ 8.times { |i| STRING_REPLACEMENT_ARRAY[i] = "\\00#{i.to_s(8)}".bytes.to_a }
35
+ 24.times { |i| STRING_REPLACEMENT_ARRAY[i + 7] = "\\0#{i.to_s(8)}".bytes.to_a }
36
+ (256 - 127).times { |i| STRING_REPLACEMENT_ARRAY[(i + 127)] ||= "\\#{(i + 127).to_s(8)}".bytes.to_a }
37
+ STRING_REPLACEMENT_ARRAY[0x0A] = '\\n'.bytes.to_a
38
+ STRING_REPLACEMENT_ARRAY[0x0D] = '\\r'.bytes.to_a
39
+ STRING_REPLACEMENT_ARRAY[0x09] = '\\t'.bytes.to_a
40
+ STRING_REPLACEMENT_ARRAY[0x08] = '\\b'.bytes.to_a
41
+ STRING_REPLACEMENT_ARRAY[0x0C] = '\\f'.bytes.to_a # form-feed (\f) == 0x0C
42
+ STRING_REPLACEMENT_ARRAY[0x28] = '\\('.bytes.to_a
43
+ STRING_REPLACEMENT_ARRAY[0x29] = '\\)'.bytes.to_a
44
+ STRING_REPLACEMENT_ARRAY[0x5C] = '\\\\'.bytes.to_a
42
45
 
43
46
  def format_string_to_pdf(object)
47
+ obj_bytes = object.bytes.to_a
44
48
  # object.force_encoding(Encoding::ASCII_8BIT)
45
- if !object.match(/[^D\:\d\+\-Z\']/) # if format is set to Literal and string isn't a date
46
- ('(' + ([].tap { |out| object.bytes.to_a.each { |byte| STRING_REPLACEMENT_HASH[byte.chr] ? (STRING_REPLACEMENT_HASH[byte.chr].bytes.each { |b| out << b }) : out << byte } }).pack('C*') + ')').force_encoding(Encoding::ASCII_8BIT)
47
- else
49
+ if object.length == 0 || obj_bytes.min <= 31 || obj_bytes.max >= 127 # || (obj_bytes[0] != 68 object.match(/[^D\:\d\+\-Z\']/))
48
50
  # A hexadecimal string shall be written as a sequence of hexadecimal digits (0–9 and either A–F or a–f)
49
51
  # encoded as ASCII characters and enclosed within angle brackets (using LESS-THAN SIGN (3Ch) and GREATER- THAN SIGN (3Eh)).
50
52
  "<#{object.unpack('H*')[0]}>".force_encoding(Encoding::ASCII_8BIT)
53
+ else
54
+ # a good fit for a Literal String or the string is a date (MUST be literal)
55
+ ('(' + ([].tap { |out| obj_bytes.each { |byte| out.concat(STRING_REPLACEMENT_ARRAY[byte]) } } ).pack('C*') + ')').force_encoding(Encoding::ASCII_8BIT)
51
56
  end
52
57
  end
53
58
 
@@ -1,3 +1,3 @@
1
1
  module CombinePDF
2
- VERSION = '1.0.7'.freeze
2
+ VERSION = '1.0.8'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
4
+ version: 1.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-02 00:00:00.000000000 Z
11
+ date: 2018-02-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-rc4
@@ -104,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
104
  version: '0'
105
105
  requirements: []
106
106
  rubyforge_project:
107
- rubygems_version: 2.6.11
107
+ rubygems_version: 2.7.3
108
108
  signing_key:
109
109
  specification_version: 4
110
110
  summary: Combine, stamp and watermark PDF files in pure Ruby.