combine_pdf 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +12 -0
- data/lib/combine_pdf/parser.rb +53 -23
- data/lib/combine_pdf/pdf_protected.rb +3 -0
- data/lib/combine_pdf/pdf_public.rb +7 -3
- data/lib/combine_pdf/renderer.rb +18 -13
- data/lib/combine_pdf/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9e5d5d814a254c4939fa88b782ebe49057b917d81134c3fc94385eb19d814092
|
4
|
+
data.tar.gz: b43a6e98d54ca72f7f13600898bdf1d6bbe0614276d411f8575ed9c1fe54c3d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 061b5aa579f0dd243eb2602150933a00258d54cf22504ba3d15355ff6593f25f3b429e7b752d3f81172e5d3a3186f03b345e228d447236732bf20edf38336686
|
7
|
+
data.tar.gz: db6820d4af86523dd174f25885e7ab5378bdb176bff40fbb20d58203af264df2c2811a4767b41a2ccf84b3391fa9c3046bfb6efa5ed614c24f0ddf3d52439aa1
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,18 @@
|
|
2
2
|
|
3
3
|
***
|
4
4
|
|
5
|
+
#### Change log v.1.0.8
|
6
|
+
|
7
|
+
**Fix**: Fixed an issue with octal representation in escaped string data. The issue would (usually) go unnoticed (altering internal labels in a non-disruptive manner), however the issue did effect `ColorSpace` data in the rare use of `ICCBased` color maps, causing color distortion and transparency loss.
|
8
|
+
|
9
|
+
**Fix**: Fixed an issue with non English alphabet in PDF literal strings. This issue went undetected since PDF literal strings aren't used by CombinePDF except for the date stamping...
|
10
|
+
|
11
|
+
**Fix**: Improbable, but possibly a fix for issue #127, where the JRuby interpreter would fail to pass the correct arguments to the Hash update Proc. Since I'm trying to author a workaround, I have my doubts... but an attempt is better than nothing.
|
12
|
+
|
13
|
+
**Update**: Improved parsing error handling, courtesy of Evgeny Garlukovich (@evgenygarl).
|
14
|
+
|
15
|
+
**Update**: Added reader methods for the `names` and `outlines` PDF objects in response to issue #133. Use with care.
|
16
|
+
|
5
17
|
#### Change log v.1.0.7
|
6
18
|
|
7
19
|
**Fix**: Fix an issue where page property inheritance might break PDF structure if there's a conflict between property types (inheritance using properties by reference vs. nested properties), fixing issue #124. Credit to @erikaxel for exposing the issue.
|
data/lib/combine_pdf/parser.rb
CHANGED
@@ -6,6 +6,8 @@
|
|
6
6
|
########################################################
|
7
7
|
|
8
8
|
module CombinePDF
|
9
|
+
ParsingError = Class.new(StandardError)
|
10
|
+
|
9
11
|
# @!visibility private
|
10
12
|
# @private
|
11
13
|
#:nodoc: all
|
@@ -77,7 +79,9 @@ module CombinePDF
|
|
77
79
|
@parsed = _parse_
|
78
80
|
# puts @parsed
|
79
81
|
|
80
|
-
|
82
|
+
unless (@parsed.select { |i| !i.is_a?(Hash) }).empty?
|
83
|
+
raise ParsingError, 'Unknown PDF parsing error - malformed PDF file?'
|
84
|
+
end
|
81
85
|
|
82
86
|
if @root_object == {}.freeze
|
83
87
|
xref_streams = @parsed.select { |obj| obj.is_a?(Hash) && obj[:Type] == :XRef }
|
@@ -86,7 +90,9 @@ module CombinePDF
|
|
86
90
|
end
|
87
91
|
end
|
88
92
|
|
89
|
-
|
93
|
+
if @root_object == {}.freeze
|
94
|
+
raise ParsingError, 'root is unknown - cannot determine if file is Encrypted'
|
95
|
+
end
|
90
96
|
|
91
97
|
if @root_object[:Encrypt]
|
92
98
|
# change_references_to_actual_values @root_object
|
@@ -310,10 +316,10 @@ module CombinePDF
|
|
310
316
|
when 102 # f, form-feed
|
311
317
|
str << 12
|
312
318
|
when 48..57 # octal notation for byte?
|
313
|
-
rep
|
314
|
-
rep
|
315
|
-
rep
|
316
|
-
str << rep
|
319
|
+
rep -= 48
|
320
|
+
rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0].between?(48, 57)
|
321
|
+
rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0].between?(48, 57) && (((rep << 3) + (str_bytes[0] - 48)) <= 255)
|
322
|
+
str << rep
|
317
323
|
when 10 # new line, ignore
|
318
324
|
str_bytes.shift if str_bytes[0] == 13
|
319
325
|
true
|
@@ -350,8 +356,12 @@ module CombinePDF
|
|
350
356
|
# str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
|
351
357
|
# instead, a non-strict RegExp is used:
|
352
358
|
str = @scanner.scan_until(/endstream/)
|
359
|
+
|
353
360
|
# raise error if the stream doesn't end.
|
354
|
-
|
361
|
+
unless str
|
362
|
+
raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
|
363
|
+
end
|
364
|
+
|
355
365
|
# need to remove end of stream
|
356
366
|
if out.last.is_a? Hash
|
357
367
|
# out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
|
@@ -475,7 +485,9 @@ module CombinePDF
|
|
475
485
|
@parsed.delete_if { |obj| obj.nil? || obj[:Type] == :Catalog }
|
476
486
|
@parsed << catalogs
|
477
487
|
|
478
|
-
|
488
|
+
unless catalogs
|
489
|
+
raise ParsingError, "Unknown error - parsed data doesn't contain a cataloged object!"
|
490
|
+
end
|
479
491
|
end
|
480
492
|
if catalogs.is_a?(Array)
|
481
493
|
catalogs.each { |c| catalog_pages(c, inheritance_hash) unless c.nil? }
|
@@ -488,20 +500,23 @@ module CombinePDF
|
|
488
500
|
end
|
489
501
|
else
|
490
502
|
unless catalogs[:Type] == :Page
|
491
|
-
|
503
|
+
if (catalogs[:AS] || catalogs[:OCProperties]) && !@allow_optional_content
|
504
|
+
raise ParsingError, "Optional Content PDF files aren't supported and their pages cannot be safely extracted."
|
505
|
+
end
|
506
|
+
|
492
507
|
inheritance_hash[:MediaBox] = catalogs[:MediaBox] if catalogs[:MediaBox]
|
493
508
|
inheritance_hash[:CropBox] = catalogs[:CropBox] if catalogs[:CropBox]
|
494
509
|
inheritance_hash[:Rotate] = catalogs[:Rotate] if catalogs[:Rotate]
|
495
510
|
if catalogs[:Resources]
|
496
511
|
inheritance_hash[:Resources] ||= { referenced_object: {}, is_reference_only: true }.dup
|
497
|
-
(inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &
|
512
|
+
(inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &HASH_UPDATE_PROC_FOR_OLD)
|
498
513
|
end
|
499
514
|
if catalogs[:ColorSpace]
|
500
515
|
inheritance_hash[:ColorSpace] ||= { referenced_object: {}, is_reference_only: true }.dup
|
501
|
-
(inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &
|
516
|
+
(inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
|
502
517
|
end
|
503
|
-
# (inheritance_hash[:Resources] ||= {}).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &
|
504
|
-
# (inheritance_hash[:ColorSpace] ||= {}).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &
|
518
|
+
# (inheritance_hash[:Resources] ||= {}).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Resources]
|
519
|
+
# (inheritance_hash[:ColorSpace] ||= {}).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:ColorSpace]
|
505
520
|
|
506
521
|
# inheritance_hash[:Order] = catalogs[:Order] if catalogs[:Order]
|
507
522
|
# inheritance_hash[:OCProperties] = catalogs[:OCProperties] if catalogs[:OCProperties]
|
@@ -517,14 +532,14 @@ module CombinePDF
|
|
517
532
|
if inheritance_hash[:Resources]
|
518
533
|
catalogs[:Resources] ||= { referenced_object: {}, is_reference_only: true }.dup
|
519
534
|
catalogs[:Resources] = { referenced_object: catalogs[:Resources], is_reference_only: true } unless catalogs[:Resources][:referenced_object]
|
520
|
-
catalogs[:Resources][:referenced_object].update((inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]), &
|
535
|
+
catalogs[:Resources][:referenced_object].update((inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]), &HASH_UPDATE_PROC_FOR_OLD)
|
521
536
|
end
|
522
537
|
if inheritance_hash[:ColorSpace]
|
523
538
|
catalogs[:ColorSpace] ||= { referenced_object: {}, is_reference_only: true }.dup
|
524
539
|
catalogs[:ColorSpace] = { referenced_object: catalogs[:ColorSpace], is_reference_only: true } unless catalogs[:ColorSpace][:referenced_object]
|
525
|
-
catalogs[:ColorSpace][:referenced_object].update((inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]), &
|
540
|
+
catalogs[:ColorSpace][:referenced_object].update((inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
|
526
541
|
end
|
527
|
-
# (catalogs[:ColorSpace] ||= {}).update(inheritance_hash[:ColorSpace], &
|
542
|
+
# (catalogs[:ColorSpace] ||= {}).update(inheritance_hash[:ColorSpace], &HASH_UPDATE_PROC_FOR_OLD) if inheritance_hash[:ColorSpace]
|
528
543
|
# catalogs[:Order] ||= inheritance_hash[:Order] if inheritance_hash[:Order]
|
529
544
|
# catalogs[:AS] ||= inheritance_hash[:AS] if inheritance_hash[:AS]
|
530
545
|
# catalogs[:OCProperties] ||= inheritance_hash[:OCProperties] if inheritance_hash[:OCProperties]
|
@@ -538,9 +553,9 @@ module CombinePDF
|
|
538
553
|
when :Pages
|
539
554
|
catalog_pages(catalogs[:Kids], inheritance_hash.dup) unless catalogs[:Kids].nil?
|
540
555
|
when :Catalog
|
541
|
-
@forms_object.update((catalogs[:AcroForm][:referenced_object] || catalogs[:AcroForm]), &
|
542
|
-
@names_object.update((catalogs[:Names][:referenced_object] || catalogs[:Names]), &
|
543
|
-
@outlines_object.update((catalogs[:Outlines][:referenced_object] || catalogs[:Outlines]), &
|
556
|
+
@forms_object.update((catalogs[:AcroForm][:referenced_object] || catalogs[:AcroForm]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:AcroForm]
|
557
|
+
@names_object.update((catalogs[:Names][:referenced_object] || catalogs[:Names]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Names]
|
558
|
+
@outlines_object.update((catalogs[:Outlines][:referenced_object] || catalogs[:Outlines]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Outlines]
|
544
559
|
if catalogs[:Dests] # convert PDF 1.1 Dests to PDF 1.2+ Dests
|
545
560
|
dests_arry = (@names_object[:Dests] ||= {})
|
546
561
|
dests_arry = ((dests_arry[:referenced_object] || dests_arry)[:Names] ||= [])
|
@@ -654,30 +669,45 @@ module CombinePDF
|
|
654
669
|
|
655
670
|
# All Strings are one String
|
656
671
|
def unify_string(str)
|
672
|
+
str.force_encoding(Encoding::ASCII_8BIT)
|
657
673
|
@strings_dictionary[str] ||= str
|
658
674
|
end
|
659
675
|
|
660
676
|
# @private
|
661
677
|
# this method reviews a Hash and updates it by merging Hash data,
|
662
678
|
# preffering the old over the new.
|
663
|
-
|
679
|
+
HASH_UPDATE_PROC_FOR_OLD = Proc.new do |_key, old_data, new_data|
|
664
680
|
if old_data.is_a? Hash
|
665
|
-
old_data.merge(new_data, &
|
681
|
+
old_data.merge(new_data, &HASH_UPDATE_PROC_FOR_OLD)
|
666
682
|
else
|
667
683
|
old_data
|
668
684
|
end
|
669
685
|
end
|
686
|
+
# def self.hash_update_proc_for_old(_key, old_data, new_data)
|
687
|
+
# if old_data.is_a? Hash
|
688
|
+
# old_data.merge(new_data, &method(:hash_update_proc_for_old))
|
689
|
+
# else
|
690
|
+
# old_data
|
691
|
+
# end
|
692
|
+
# end
|
670
693
|
|
671
694
|
# @private
|
672
695
|
# this method reviews a Hash an updates it by merging Hash data,
|
673
696
|
# preffering the new over the old.
|
674
|
-
|
697
|
+
HASH_UPDATE_PROC_FOR_NEW = Proc.new do |_key, old_data, new_data|
|
675
698
|
if old_data.is_a? Hash
|
676
|
-
old_data.merge(new_data, &
|
699
|
+
old_data.merge(new_data, &HASH_UPDATE_PROC_FOR_NEW)
|
677
700
|
else
|
678
701
|
new_data
|
679
702
|
end
|
680
703
|
end
|
704
|
+
# def self.hash_update_proc_for_new(_key, old_data, new_data)
|
705
|
+
# if old_data.is_a? Hash
|
706
|
+
# old_data.merge(new_data, &method(:hash_update_proc_for_new))
|
707
|
+
# else
|
708
|
+
# new_data
|
709
|
+
# end
|
710
|
+
# end
|
681
711
|
|
682
712
|
# # run block of code on evey PDF object (PDF objects are class Hash)
|
683
713
|
# def each_object(object, limit_references = true, already_visited = {}, &block)
|
@@ -137,11 +137,14 @@ module CombinePDF
|
|
137
137
|
catalog_object
|
138
138
|
end
|
139
139
|
|
140
|
+
# Deprecation Notice
|
140
141
|
def names_object
|
142
|
+
puts "CombinePDF Deprecation Notice: the protected method `names_object` will be deprecated in the upcoming version. Use `names` instead."
|
141
143
|
@names
|
142
144
|
end
|
143
145
|
|
144
146
|
def outlines_object
|
147
|
+
puts "CombinePDF Deprecation Notice: the protected method `outlines_object` will be deprecated in the upcoming version. Use `oulines` instead."
|
145
148
|
@outlines
|
146
149
|
end
|
147
150
|
# def forms_data
|
@@ -82,6 +82,10 @@ module CombinePDF
|
|
82
82
|
# use, for example:
|
83
83
|
# pdf.viewer_preferences[:HideMenubar] = true
|
84
84
|
attr_reader :viewer_preferences
|
85
|
+
# Access the Outlines PDF object Hash (or reference). Use with care.
|
86
|
+
attr_reader :outlines
|
87
|
+
# Access the Names PDF object Hash (or reference). Use with care.
|
88
|
+
attr_reader :names
|
85
89
|
|
86
90
|
def initialize(parser = nil)
|
87
91
|
# default before setting
|
@@ -207,7 +211,7 @@ module CombinePDF
|
|
207
211
|
# when finished, remove the numbering system and keep only pointers
|
208
212
|
remove_old_ids
|
209
213
|
# output the pdf stream
|
210
|
-
out.join("\n").force_encoding(Encoding::ASCII_8BIT)
|
214
|
+
out.join("\n".force_encoding(Encoding::ASCII_8BIT)).force_encoding(Encoding::ASCII_8BIT)
|
211
215
|
end
|
212
216
|
|
213
217
|
# this method returns all the pages cataloged in the catalog.
|
@@ -302,8 +306,8 @@ module CombinePDF
|
|
302
306
|
if data.is_a? PDF
|
303
307
|
@version = [@version, data.version].max
|
304
308
|
pages_to_add = data.pages
|
305
|
-
actual_value(@names ||= {}.dup).update
|
306
|
-
merge_outlines((@outlines ||= {}.dup), data.
|
309
|
+
actual_value(@names ||= {}.dup).update data.names, &self.class.method(:hash_merge_new_no_page)
|
310
|
+
merge_outlines((@outlines ||= {}.dup), actual_value(data.outlines), location) unless actual_value(data.outlines).empty?
|
307
311
|
if actual_value(@forms_data)
|
308
312
|
actual_value(@forms_data).update actual_value(data.forms_data), &self.class.method(:hash_merge_new_no_page) if data.forms_data
|
309
313
|
else
|
data/lib/combine_pdf/renderer.rb
CHANGED
@@ -29,25 +29,30 @@ module CombinePDF
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
32
|
+
STRING_REPLACEMENT_ARRAY = []
|
33
|
+
256.times {|i| STRING_REPLACEMENT_ARRAY[i] = [i]}
|
34
|
+
8.times { |i| STRING_REPLACEMENT_ARRAY[i] = "\\00#{i.to_s(8)}".bytes.to_a }
|
35
|
+
24.times { |i| STRING_REPLACEMENT_ARRAY[i + 7] = "\\0#{i.to_s(8)}".bytes.to_a }
|
36
|
+
(256 - 127).times { |i| STRING_REPLACEMENT_ARRAY[(i + 127)] ||= "\\#{(i + 127).to_s(8)}".bytes.to_a }
|
37
|
+
STRING_REPLACEMENT_ARRAY[0x0A] = '\\n'.bytes.to_a
|
38
|
+
STRING_REPLACEMENT_ARRAY[0x0D] = '\\r'.bytes.to_a
|
39
|
+
STRING_REPLACEMENT_ARRAY[0x09] = '\\t'.bytes.to_a
|
40
|
+
STRING_REPLACEMENT_ARRAY[0x08] = '\\b'.bytes.to_a
|
41
|
+
STRING_REPLACEMENT_ARRAY[0x0C] = '\\f'.bytes.to_a # form-feed (\f) == 0x0C
|
42
|
+
STRING_REPLACEMENT_ARRAY[0x28] = '\\('.bytes.to_a
|
43
|
+
STRING_REPLACEMENT_ARRAY[0x29] = '\\)'.bytes.to_a
|
44
|
+
STRING_REPLACEMENT_ARRAY[0x5C] = '\\\\'.bytes.to_a
|
42
45
|
|
43
46
|
def format_string_to_pdf(object)
|
47
|
+
obj_bytes = object.bytes.to_a
|
44
48
|
# object.force_encoding(Encoding::ASCII_8BIT)
|
45
|
-
if
|
46
|
-
('(' + ([].tap { |out| object.bytes.to_a.each { |byte| STRING_REPLACEMENT_HASH[byte.chr] ? (STRING_REPLACEMENT_HASH[byte.chr].bytes.each { |b| out << b }) : out << byte } }).pack('C*') + ')').force_encoding(Encoding::ASCII_8BIT)
|
47
|
-
else
|
49
|
+
if object.length == 0 || obj_bytes.min <= 31 || obj_bytes.max >= 127 # || (obj_bytes[0] != 68 object.match(/[^D\:\d\+\-Z\']/))
|
48
50
|
# A hexadecimal string shall be written as a sequence of hexadecimal digits (0–9 and either A–F or a–f)
|
49
51
|
# encoded as ASCII characters and enclosed within angle brackets (using LESS-THAN SIGN (3Ch) and GREATER- THAN SIGN (3Eh)).
|
50
52
|
"<#{object.unpack('H*')[0]}>".force_encoding(Encoding::ASCII_8BIT)
|
53
|
+
else
|
54
|
+
# a good fit for a Literal String or the string is a date (MUST be literal)
|
55
|
+
('(' + ([].tap { |out| obj_bytes.each { |byte| out.concat(STRING_REPLACEMENT_ARRAY[byte]) } } ).pack('C*') + ')').force_encoding(Encoding::ASCII_8BIT)
|
51
56
|
end
|
52
57
|
end
|
53
58
|
|
data/lib/combine_pdf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-rc4
|
@@ -104,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
104
|
version: '0'
|
105
105
|
requirements: []
|
106
106
|
rubyforge_project:
|
107
|
-
rubygems_version: 2.
|
107
|
+
rubygems_version: 2.7.3
|
108
108
|
signing_key:
|
109
109
|
specification_version: 4
|
110
110
|
summary: Combine, stamp and watermark PDF files in pure Ruby.
|