combine_pdf 1.0.7 → 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +12 -0
- data/lib/combine_pdf/parser.rb +53 -23
- data/lib/combine_pdf/pdf_protected.rb +3 -0
- data/lib/combine_pdf/pdf_public.rb +7 -3
- data/lib/combine_pdf/renderer.rb +18 -13
- data/lib/combine_pdf/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9e5d5d814a254c4939fa88b782ebe49057b917d81134c3fc94385eb19d814092
|
4
|
+
data.tar.gz: b43a6e98d54ca72f7f13600898bdf1d6bbe0614276d411f8575ed9c1fe54c3d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 061b5aa579f0dd243eb2602150933a00258d54cf22504ba3d15355ff6593f25f3b429e7b752d3f81172e5d3a3186f03b345e228d447236732bf20edf38336686
|
7
|
+
data.tar.gz: db6820d4af86523dd174f25885e7ab5378bdb176bff40fbb20d58203af264df2c2811a4767b41a2ccf84b3391fa9c3046bfb6efa5ed614c24f0ddf3d52439aa1
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,18 @@
|
|
2
2
|
|
3
3
|
***
|
4
4
|
|
5
|
+
#### Change log v.1.0.8
|
6
|
+
|
7
|
+
**Fix**: Fixed an issue with octal representation in escaped string data. The issue would (usually) go unnoticed (altering internal labels in a non-disruptive manner), however the issue did effect `ColorSpace` data in the rare use of `ICCBased` color maps, causing color distortion and transparency loss.
|
8
|
+
|
9
|
+
**Fix**: Fixed an issue with non English alphabet in PDF literal strings. This issue went undetected since PDF literal strings aren't used by CombinePDF except for the date stamping...
|
10
|
+
|
11
|
+
**Fix**: Improbable, but possibly a fix for issue #127, where the JRuby interpreter would fail to pass the correct arguments to the Hash update Proc. Since I'm trying to author a workaround, I have my doubts... but an attempt is better than nothing.
|
12
|
+
|
13
|
+
**Update**: Improved parsing error handling, courtesy of Evgeny Garlukovich (@evgenygarl).
|
14
|
+
|
15
|
+
**Update**: Added reader methods for the `names` and `outlines` PDF objects in response to issue #133. Use with care.
|
16
|
+
|
5
17
|
#### Change log v.1.0.7
|
6
18
|
|
7
19
|
**Fix**: Fix an issue where page property inheritance might break PDF structure if there's a conflict between property types (inheritance using properties by reference vs. nested properties), fixing issue #124. Credit to @erikaxel for exposing the issue.
|
data/lib/combine_pdf/parser.rb
CHANGED
@@ -6,6 +6,8 @@
|
|
6
6
|
########################################################
|
7
7
|
|
8
8
|
module CombinePDF
|
9
|
+
ParsingError = Class.new(StandardError)
|
10
|
+
|
9
11
|
# @!visibility private
|
10
12
|
# @private
|
11
13
|
#:nodoc: all
|
@@ -77,7 +79,9 @@ module CombinePDF
|
|
77
79
|
@parsed = _parse_
|
78
80
|
# puts @parsed
|
79
81
|
|
80
|
-
|
82
|
+
unless (@parsed.select { |i| !i.is_a?(Hash) }).empty?
|
83
|
+
raise ParsingError, 'Unknown PDF parsing error - malformed PDF file?'
|
84
|
+
end
|
81
85
|
|
82
86
|
if @root_object == {}.freeze
|
83
87
|
xref_streams = @parsed.select { |obj| obj.is_a?(Hash) && obj[:Type] == :XRef }
|
@@ -86,7 +90,9 @@ module CombinePDF
|
|
86
90
|
end
|
87
91
|
end
|
88
92
|
|
89
|
-
|
93
|
+
if @root_object == {}.freeze
|
94
|
+
raise ParsingError, 'root is unknown - cannot determine if file is Encrypted'
|
95
|
+
end
|
90
96
|
|
91
97
|
if @root_object[:Encrypt]
|
92
98
|
# change_references_to_actual_values @root_object
|
@@ -310,10 +316,10 @@ module CombinePDF
|
|
310
316
|
when 102 # f, form-feed
|
311
317
|
str << 12
|
312
318
|
when 48..57 # octal notation for byte?
|
313
|
-
rep
|
314
|
-
rep
|
315
|
-
rep
|
316
|
-
str << rep
|
319
|
+
rep -= 48
|
320
|
+
rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0].between?(48, 57)
|
321
|
+
rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0].between?(48, 57) && (((rep << 3) + (str_bytes[0] - 48)) <= 255)
|
322
|
+
str << rep
|
317
323
|
when 10 # new line, ignore
|
318
324
|
str_bytes.shift if str_bytes[0] == 13
|
319
325
|
true
|
@@ -350,8 +356,12 @@ module CombinePDF
|
|
350
356
|
# str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
|
351
357
|
# instead, a non-strict RegExp is used:
|
352
358
|
str = @scanner.scan_until(/endstream/)
|
359
|
+
|
353
360
|
# raise error if the stream doesn't end.
|
354
|
-
|
361
|
+
unless str
|
362
|
+
raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
|
363
|
+
end
|
364
|
+
|
355
365
|
# need to remove end of stream
|
356
366
|
if out.last.is_a? Hash
|
357
367
|
# out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
|
@@ -475,7 +485,9 @@ module CombinePDF
|
|
475
485
|
@parsed.delete_if { |obj| obj.nil? || obj[:Type] == :Catalog }
|
476
486
|
@parsed << catalogs
|
477
487
|
|
478
|
-
|
488
|
+
unless catalogs
|
489
|
+
raise ParsingError, "Unknown error - parsed data doesn't contain a cataloged object!"
|
490
|
+
end
|
479
491
|
end
|
480
492
|
if catalogs.is_a?(Array)
|
481
493
|
catalogs.each { |c| catalog_pages(c, inheritance_hash) unless c.nil? }
|
@@ -488,20 +500,23 @@ module CombinePDF
|
|
488
500
|
end
|
489
501
|
else
|
490
502
|
unless catalogs[:Type] == :Page
|
491
|
-
|
503
|
+
if (catalogs[:AS] || catalogs[:OCProperties]) && !@allow_optional_content
|
504
|
+
raise ParsingError, "Optional Content PDF files aren't supported and their pages cannot be safely extracted."
|
505
|
+
end
|
506
|
+
|
492
507
|
inheritance_hash[:MediaBox] = catalogs[:MediaBox] if catalogs[:MediaBox]
|
493
508
|
inheritance_hash[:CropBox] = catalogs[:CropBox] if catalogs[:CropBox]
|
494
509
|
inheritance_hash[:Rotate] = catalogs[:Rotate] if catalogs[:Rotate]
|
495
510
|
if catalogs[:Resources]
|
496
511
|
inheritance_hash[:Resources] ||= { referenced_object: {}, is_reference_only: true }.dup
|
497
|
-
(inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &
|
512
|
+
(inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &HASH_UPDATE_PROC_FOR_OLD)
|
498
513
|
end
|
499
514
|
if catalogs[:ColorSpace]
|
500
515
|
inheritance_hash[:ColorSpace] ||= { referenced_object: {}, is_reference_only: true }.dup
|
501
|
-
(inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &
|
516
|
+
(inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
|
502
517
|
end
|
503
|
-
# (inheritance_hash[:Resources] ||= {}).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &
|
504
|
-
# (inheritance_hash[:ColorSpace] ||= {}).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &
|
518
|
+
# (inheritance_hash[:Resources] ||= {}).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Resources]
|
519
|
+
# (inheritance_hash[:ColorSpace] ||= {}).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:ColorSpace]
|
505
520
|
|
506
521
|
# inheritance_hash[:Order] = catalogs[:Order] if catalogs[:Order]
|
507
522
|
# inheritance_hash[:OCProperties] = catalogs[:OCProperties] if catalogs[:OCProperties]
|
@@ -517,14 +532,14 @@ module CombinePDF
|
|
517
532
|
if inheritance_hash[:Resources]
|
518
533
|
catalogs[:Resources] ||= { referenced_object: {}, is_reference_only: true }.dup
|
519
534
|
catalogs[:Resources] = { referenced_object: catalogs[:Resources], is_reference_only: true } unless catalogs[:Resources][:referenced_object]
|
520
|
-
catalogs[:Resources][:referenced_object].update((inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]), &
|
535
|
+
catalogs[:Resources][:referenced_object].update((inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]), &HASH_UPDATE_PROC_FOR_OLD)
|
521
536
|
end
|
522
537
|
if inheritance_hash[:ColorSpace]
|
523
538
|
catalogs[:ColorSpace] ||= { referenced_object: {}, is_reference_only: true }.dup
|
524
539
|
catalogs[:ColorSpace] = { referenced_object: catalogs[:ColorSpace], is_reference_only: true } unless catalogs[:ColorSpace][:referenced_object]
|
525
|
-
catalogs[:ColorSpace][:referenced_object].update((inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]), &
|
540
|
+
catalogs[:ColorSpace][:referenced_object].update((inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
|
526
541
|
end
|
527
|
-
# (catalogs[:ColorSpace] ||= {}).update(inheritance_hash[:ColorSpace], &
|
542
|
+
# (catalogs[:ColorSpace] ||= {}).update(inheritance_hash[:ColorSpace], &HASH_UPDATE_PROC_FOR_OLD) if inheritance_hash[:ColorSpace]
|
528
543
|
# catalogs[:Order] ||= inheritance_hash[:Order] if inheritance_hash[:Order]
|
529
544
|
# catalogs[:AS] ||= inheritance_hash[:AS] if inheritance_hash[:AS]
|
530
545
|
# catalogs[:OCProperties] ||= inheritance_hash[:OCProperties] if inheritance_hash[:OCProperties]
|
@@ -538,9 +553,9 @@ module CombinePDF
|
|
538
553
|
when :Pages
|
539
554
|
catalog_pages(catalogs[:Kids], inheritance_hash.dup) unless catalogs[:Kids].nil?
|
540
555
|
when :Catalog
|
541
|
-
@forms_object.update((catalogs[:AcroForm][:referenced_object] || catalogs[:AcroForm]), &
|
542
|
-
@names_object.update((catalogs[:Names][:referenced_object] || catalogs[:Names]), &
|
543
|
-
@outlines_object.update((catalogs[:Outlines][:referenced_object] || catalogs[:Outlines]), &
|
556
|
+
@forms_object.update((catalogs[:AcroForm][:referenced_object] || catalogs[:AcroForm]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:AcroForm]
|
557
|
+
@names_object.update((catalogs[:Names][:referenced_object] || catalogs[:Names]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Names]
|
558
|
+
@outlines_object.update((catalogs[:Outlines][:referenced_object] || catalogs[:Outlines]), &HASH_UPDATE_PROC_FOR_NEW) if catalogs[:Outlines]
|
544
559
|
if catalogs[:Dests] # convert PDF 1.1 Dests to PDF 1.2+ Dests
|
545
560
|
dests_arry = (@names_object[:Dests] ||= {})
|
546
561
|
dests_arry = ((dests_arry[:referenced_object] || dests_arry)[:Names] ||= [])
|
@@ -654,30 +669,45 @@ module CombinePDF
|
|
654
669
|
|
655
670
|
# All Strings are one String
|
656
671
|
def unify_string(str)
|
672
|
+
str.force_encoding(Encoding::ASCII_8BIT)
|
657
673
|
@strings_dictionary[str] ||= str
|
658
674
|
end
|
659
675
|
|
660
676
|
# @private
|
661
677
|
# this method reviews a Hash and updates it by merging Hash data,
|
662
678
|
# preffering the old over the new.
|
663
|
-
|
679
|
+
HASH_UPDATE_PROC_FOR_OLD = Proc.new do |_key, old_data, new_data|
|
664
680
|
if old_data.is_a? Hash
|
665
|
-
old_data.merge(new_data, &
|
681
|
+
old_data.merge(new_data, &HASH_UPDATE_PROC_FOR_OLD)
|
666
682
|
else
|
667
683
|
old_data
|
668
684
|
end
|
669
685
|
end
|
686
|
+
# def self.hash_update_proc_for_old(_key, old_data, new_data)
|
687
|
+
# if old_data.is_a? Hash
|
688
|
+
# old_data.merge(new_data, &method(:hash_update_proc_for_old))
|
689
|
+
# else
|
690
|
+
# old_data
|
691
|
+
# end
|
692
|
+
# end
|
670
693
|
|
671
694
|
# @private
|
672
695
|
# this method reviews a Hash an updates it by merging Hash data,
|
673
696
|
# preffering the new over the old.
|
674
|
-
|
697
|
+
HASH_UPDATE_PROC_FOR_NEW = Proc.new do |_key, old_data, new_data|
|
675
698
|
if old_data.is_a? Hash
|
676
|
-
old_data.merge(new_data, &
|
699
|
+
old_data.merge(new_data, &HASH_UPDATE_PROC_FOR_NEW)
|
677
700
|
else
|
678
701
|
new_data
|
679
702
|
end
|
680
703
|
end
|
704
|
+
# def self.hash_update_proc_for_new(_key, old_data, new_data)
|
705
|
+
# if old_data.is_a? Hash
|
706
|
+
# old_data.merge(new_data, &method(:hash_update_proc_for_new))
|
707
|
+
# else
|
708
|
+
# new_data
|
709
|
+
# end
|
710
|
+
# end
|
681
711
|
|
682
712
|
# # run block of code on evey PDF object (PDF objects are class Hash)
|
683
713
|
# def each_object(object, limit_references = true, already_visited = {}, &block)
|
@@ -137,11 +137,14 @@ module CombinePDF
|
|
137
137
|
catalog_object
|
138
138
|
end
|
139
139
|
|
140
|
+
# Deprecation Notice
|
140
141
|
def names_object
|
142
|
+
puts "CombinePDF Deprecation Notice: the protected method `names_object` will be deprecated in the upcoming version. Use `names` instead."
|
141
143
|
@names
|
142
144
|
end
|
143
145
|
|
144
146
|
def outlines_object
|
147
|
+
puts "CombinePDF Deprecation Notice: the protected method `outlines_object` will be deprecated in the upcoming version. Use `oulines` instead."
|
145
148
|
@outlines
|
146
149
|
end
|
147
150
|
# def forms_data
|
@@ -82,6 +82,10 @@ module CombinePDF
|
|
82
82
|
# use, for example:
|
83
83
|
# pdf.viewer_preferences[:HideMenubar] = true
|
84
84
|
attr_reader :viewer_preferences
|
85
|
+
# Access the Outlines PDF object Hash (or reference). Use with care.
|
86
|
+
attr_reader :outlines
|
87
|
+
# Access the Names PDF object Hash (or reference). Use with care.
|
88
|
+
attr_reader :names
|
85
89
|
|
86
90
|
def initialize(parser = nil)
|
87
91
|
# default before setting
|
@@ -207,7 +211,7 @@ module CombinePDF
|
|
207
211
|
# when finished, remove the numbering system and keep only pointers
|
208
212
|
remove_old_ids
|
209
213
|
# output the pdf stream
|
210
|
-
out.join("\n").force_encoding(Encoding::ASCII_8BIT)
|
214
|
+
out.join("\n".force_encoding(Encoding::ASCII_8BIT)).force_encoding(Encoding::ASCII_8BIT)
|
211
215
|
end
|
212
216
|
|
213
217
|
# this method returns all the pages cataloged in the catalog.
|
@@ -302,8 +306,8 @@ module CombinePDF
|
|
302
306
|
if data.is_a? PDF
|
303
307
|
@version = [@version, data.version].max
|
304
308
|
pages_to_add = data.pages
|
305
|
-
actual_value(@names ||= {}.dup).update
|
306
|
-
merge_outlines((@outlines ||= {}.dup), data.
|
309
|
+
actual_value(@names ||= {}.dup).update data.names, &self.class.method(:hash_merge_new_no_page)
|
310
|
+
merge_outlines((@outlines ||= {}.dup), actual_value(data.outlines), location) unless actual_value(data.outlines).empty?
|
307
311
|
if actual_value(@forms_data)
|
308
312
|
actual_value(@forms_data).update actual_value(data.forms_data), &self.class.method(:hash_merge_new_no_page) if data.forms_data
|
309
313
|
else
|
data/lib/combine_pdf/renderer.rb
CHANGED
@@ -29,25 +29,30 @@ module CombinePDF
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
32
|
+
STRING_REPLACEMENT_ARRAY = []
|
33
|
+
256.times {|i| STRING_REPLACEMENT_ARRAY[i] = [i]}
|
34
|
+
8.times { |i| STRING_REPLACEMENT_ARRAY[i] = "\\00#{i.to_s(8)}".bytes.to_a }
|
35
|
+
24.times { |i| STRING_REPLACEMENT_ARRAY[i + 7] = "\\0#{i.to_s(8)}".bytes.to_a }
|
36
|
+
(256 - 127).times { |i| STRING_REPLACEMENT_ARRAY[(i + 127)] ||= "\\#{(i + 127).to_s(8)}".bytes.to_a }
|
37
|
+
STRING_REPLACEMENT_ARRAY[0x0A] = '\\n'.bytes.to_a
|
38
|
+
STRING_REPLACEMENT_ARRAY[0x0D] = '\\r'.bytes.to_a
|
39
|
+
STRING_REPLACEMENT_ARRAY[0x09] = '\\t'.bytes.to_a
|
40
|
+
STRING_REPLACEMENT_ARRAY[0x08] = '\\b'.bytes.to_a
|
41
|
+
STRING_REPLACEMENT_ARRAY[0x0C] = '\\f'.bytes.to_a # form-feed (\f) == 0x0C
|
42
|
+
STRING_REPLACEMENT_ARRAY[0x28] = '\\('.bytes.to_a
|
43
|
+
STRING_REPLACEMENT_ARRAY[0x29] = '\\)'.bytes.to_a
|
44
|
+
STRING_REPLACEMENT_ARRAY[0x5C] = '\\\\'.bytes.to_a
|
42
45
|
|
43
46
|
def format_string_to_pdf(object)
|
47
|
+
obj_bytes = object.bytes.to_a
|
44
48
|
# object.force_encoding(Encoding::ASCII_8BIT)
|
45
|
-
if
|
46
|
-
('(' + ([].tap { |out| object.bytes.to_a.each { |byte| STRING_REPLACEMENT_HASH[byte.chr] ? (STRING_REPLACEMENT_HASH[byte.chr].bytes.each { |b| out << b }) : out << byte } }).pack('C*') + ')').force_encoding(Encoding::ASCII_8BIT)
|
47
|
-
else
|
49
|
+
if object.length == 0 || obj_bytes.min <= 31 || obj_bytes.max >= 127 # || (obj_bytes[0] != 68 object.match(/[^D\:\d\+\-Z\']/))
|
48
50
|
# A hexadecimal string shall be written as a sequence of hexadecimal digits (0–9 and either A–F or a–f)
|
49
51
|
# encoded as ASCII characters and enclosed within angle brackets (using LESS-THAN SIGN (3Ch) and GREATER- THAN SIGN (3Eh)).
|
50
52
|
"<#{object.unpack('H*')[0]}>".force_encoding(Encoding::ASCII_8BIT)
|
53
|
+
else
|
54
|
+
# a good fit for a Literal String or the string is a date (MUST be literal)
|
55
|
+
('(' + ([].tap { |out| obj_bytes.each { |byte| out.concat(STRING_REPLACEMENT_ARRAY[byte]) } } ).pack('C*') + ')').force_encoding(Encoding::ASCII_8BIT)
|
51
56
|
end
|
52
57
|
end
|
53
58
|
|
data/lib/combine_pdf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-rc4
|
@@ -104,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
104
|
version: '0'
|
105
105
|
requirements: []
|
106
106
|
rubyforge_project:
|
107
|
-
rubygems_version: 2.
|
107
|
+
rubygems_version: 2.7.3
|
108
108
|
signing_key:
|
109
109
|
specification_version: 4
|
110
110
|
summary: Combine, stamp and watermark PDF files in pure Ruby.
|