combine_pdf 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/combine_pdf.rb +1 -0
- data/lib/combine_pdf/page_methods.rb +4 -0
- data/lib/combine_pdf/parser.rb +30 -19
- data/lib/combine_pdf/pdf_protected.rb +81 -8
- data/lib/combine_pdf/pdf_public.rb +2 -0
- data/lib/combine_pdf/version.rb +1 -1
- metadata +3 -3
- data/lib/combine_pdf/operations.rb +0 -416
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1379a60fa97dde419575868f841a676254f61cd
|
4
|
+
data.tar.gz: e55e8f95eaefa828499d393147a13ace92fbd70b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2cd478b21fe7634e7abb4552fa1b0770465c7e097b00c7ba842d19e243a529667e8b48fdeb7bae64117e30419e9d7fd7a9aeca39d35f342910df4656f887f6c6
|
7
|
+
data.tar.gz: 72d49c89926cb106d133fb17746ec2374225c2d5e5e72c8a4f1c38779d9f6abc188a82c1aa610ebb15ce8adf6da9419ae6ef175dd3b3fb84e6bc8cb938cd9680
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,16 @@
|
|
2
2
|
|
3
3
|
***
|
4
4
|
|
5
|
+
Change log v.0.2.7
|
6
|
+
|
7
|
+
**Fix**: Fixed an issue where a malformed PDF String could cause the parser to hang.
|
8
|
+
|
9
|
+
**Update**: Inner PDF links (links to pages within the PDF file) will now be preserved when importing a whole PDF (although Outlines, for now, are discarede and their related links will be discarded as well). If the same destination page is inserted more than once (the first version will be preferred).
|
10
|
+
|
11
|
+
**Deprecation Warning**: the `Page_Methods#secure_injection`, `Page_Methods#make_unsecure` and `Page_Methods#make_secure` methods are deprecated. Use `Page_Methods#copy(true)` for safeguarding against font/resource conflicts when "stamping" one PDF page over another.
|
12
|
+
|
13
|
+
***
|
14
|
+
|
5
15
|
Change log v.0.2.6
|
6
16
|
|
7
17
|
**fixed**: Hasan Iskandar fixed issue #30 - Output file cannot be saved from Adobe Reader with "Save As optimizes for Fast Web View" preference enabled. Thank you Hasan.
|
data/lib/combine_pdf.rb
CHANGED
@@ -23,19 +23,23 @@ module CombinePDF
|
|
23
23
|
|
24
24
|
# accessor (getter) for the secure_injection setting
|
25
25
|
def secure_injection
|
26
|
+
warn "**Deprecation Warning**: the `Page_Methods#secure_injection`, `Page_Methods#make_unsecure` and `Page_Methods#make_secure` methods are deprecated. Use `Page_Methods#copy(true)` for safeguarding against font/resource conflicts when 'stamping' one PDF page over another."
|
26
27
|
@secure_injection
|
27
28
|
end
|
28
29
|
# accessor (setter) for the secure_injection setting
|
29
30
|
def secure_injection= safe
|
31
|
+
warn "**Deprecation Warning**: the `Page_Methods#secure_injection`, `Page_Methods#make_unsecure` and `Page_Methods#make_secure` methods are deprecated. Use `Page_Methods#copy(true)` for safeguarding against font/resource conflicts when 'stamping' one PDF page over another."
|
30
32
|
@secure_injection = safe
|
31
33
|
end
|
32
34
|
# sets secure_injection to `true` and returns self, allowing for chaining methods
|
33
35
|
def make_secure
|
36
|
+
warn "**Deprecation Warning**: the `Page_Methods#secure_injection`, `Page_Methods#make_unsecure` and `Page_Methods#make_secure` methods are deprecated. Use `Page_Methods#copy(true)` for safeguarding against font/resource conflicts when 'stamping' one PDF page over another."
|
34
37
|
@secure_injection = true
|
35
38
|
self
|
36
39
|
end
|
37
40
|
# sets secure_injection to `false` and returns self, allowing for chaining methods
|
38
41
|
def make_unsecure
|
42
|
+
warn "**Deprecation Warning**: the `Page_Methods#secure_injection`, `Page_Methods#make_unsecure` and `Page_Methods#make_secure` methods are deprecated. Use `Page_Methods#copy(true)` for safeguarding against font/resource conflicts when 'stamping' one PDF page over another."
|
39
43
|
@secure_injection = false
|
40
44
|
self
|
41
45
|
end
|
data/lib/combine_pdf/parser.rb
CHANGED
@@ -36,7 +36,7 @@ module CombinePDF
|
|
36
36
|
# the info and root objects, as found (if found) in the PDF file.
|
37
37
|
#
|
38
38
|
# they are mainly to used to know if the file is (was) encrypted and to get more details.
|
39
|
-
attr_reader :info_object, :root_object
|
39
|
+
attr_reader :info_object, :root_object, :names_object
|
40
40
|
|
41
41
|
# when creating a parser, it is important to set the data (String) we wish to parse.
|
42
42
|
#
|
@@ -53,6 +53,8 @@ module CombinePDF
|
|
53
53
|
@references = []
|
54
54
|
@root_object = {}
|
55
55
|
@info_object = {}
|
56
|
+
@names_object = {}
|
57
|
+
@strings_dictionary = {} # all strings are one string
|
56
58
|
@version = nil
|
57
59
|
@scanner = nil
|
58
60
|
end
|
@@ -113,6 +115,9 @@ module CombinePDF
|
|
113
115
|
end
|
114
116
|
end
|
115
117
|
|
118
|
+
# Strings were unified, we can let them go..
|
119
|
+
@strings_dictionary.clear
|
120
|
+
|
116
121
|
|
117
122
|
# serialize_objects_and_references.catalog_pages
|
118
123
|
|
@@ -179,10 +184,10 @@ module CombinePDF
|
|
179
184
|
# need to remove end of stream
|
180
185
|
if out.last.is_a? Hash
|
181
186
|
# out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
|
182
|
-
out.last[:raw_stream_content] = str.
|
187
|
+
out.last[:raw_stream_content] = unify_string str.sub(/[\n\r]?[\n\r]endstream\z/, "").force_encoding(Encoding::ASCII_8BIT)
|
183
188
|
else
|
184
189
|
warn "Stream not attached to dictionary!"
|
185
|
-
out << str[
|
190
|
+
out << str.sub(/[\n\r]?[\n\r]endstream\z/, "").force_encoding(Encoding::ASCII_8BIT)
|
186
191
|
end
|
187
192
|
##########################################
|
188
193
|
## parse an Object after finished
|
@@ -199,7 +204,7 @@ module CombinePDF
|
|
199
204
|
##########################################
|
200
205
|
when str = @scanner.scan(/<[0-9a-fA-F]+>/)
|
201
206
|
# warn "Found a hex string"
|
202
|
-
out << [str[1..-2]].pack('H*')
|
207
|
+
out << unify_string([str[1..-2]].pack('H*').force_encoding(Encoding::ASCII_8BIT))
|
203
208
|
##########################################
|
204
209
|
## parse a Literal String
|
205
210
|
##########################################
|
@@ -208,14 +213,20 @@ module CombinePDF
|
|
208
213
|
str = ''.force_encoding(Encoding::ASCII_8BIT)
|
209
214
|
count = 1
|
210
215
|
while count > 0 && @scanner.rest? do
|
211
|
-
|
216
|
+
scn = @scanner.scan_until(/[\(\)]/)
|
217
|
+
unless scn
|
218
|
+
warn "Unknown error parsing string at #{@scanner.pos} for string: #{str}!"
|
219
|
+
count = 0 # error
|
220
|
+
next
|
221
|
+
end
|
222
|
+
|
223
|
+
str += scn.to_s
|
212
224
|
seperator_count = 0
|
213
225
|
seperator_count += 1 while str[-2-seperator_count] == "\\"
|
214
226
|
|
215
227
|
case str[-1]
|
216
228
|
when '('
|
217
|
-
## The following solution
|
218
|
-
|
229
|
+
## The following solution might fail when (string ends with this sign: \\)
|
219
230
|
count += 1 unless seperator_count.odd?
|
220
231
|
when ')'
|
221
232
|
count -= 1 unless seperator_count.odd?
|
@@ -276,7 +287,7 @@ module CombinePDF
|
|
276
287
|
str << str_bytes.shift
|
277
288
|
end
|
278
289
|
end
|
279
|
-
out << str.pack('C*').force_encoding(Encoding::ASCII_8BIT)
|
290
|
+
out << unify_string(str.pack('C*').force_encoding(Encoding::ASCII_8BIT))
|
280
291
|
##########################################
|
281
292
|
## Parse a comment
|
282
293
|
##########################################
|
@@ -368,7 +379,7 @@ module CombinePDF
|
|
368
379
|
|
369
380
|
|
370
381
|
# resets cataloging and pages
|
371
|
-
def catalog_pages(catalogs = nil,
|
382
|
+
def catalog_pages(catalogs = nil, inheritance_hash = {})
|
372
383
|
unless catalogs
|
373
384
|
|
374
385
|
if root_object[:Root]
|
@@ -383,11 +394,11 @@ module CombinePDF
|
|
383
394
|
end
|
384
395
|
case
|
385
396
|
when catalogs.is_a?(Array)
|
386
|
-
catalogs.each {|c| catalog_pages(c,
|
397
|
+
catalogs.each {|c| catalog_pages(c, inheritance_hash ) unless c.nil?}
|
387
398
|
when catalogs.is_a?(Hash)
|
388
399
|
if catalogs[:is_reference_only]
|
389
400
|
if catalogs[:referenced_object]
|
390
|
-
catalog_pages(catalogs[:referenced_object],
|
401
|
+
catalog_pages(catalogs[:referenced_object], inheritance_hash)
|
391
402
|
else
|
392
403
|
warn "couldn't follow reference!!! #{catalogs} not found!"
|
393
404
|
end
|
@@ -424,11 +435,11 @@ module CombinePDF
|
|
424
435
|
catalogs[:Rotate] = catalogs[:Rotate][:referenced_object][:indirect_without_dictionary] if catalogs[:Rotate].is_a?(Hash) && catalogs[:Rotate][:referenced_object].is_a?(Hash) && catalogs[:Rotate][:referenced_object][:indirect_without_dictionary]
|
425
436
|
|
426
437
|
catalogs.instance_eval {extend Page_Methods}
|
427
|
-
catalogs.secure_injection = secure_injection
|
428
438
|
when :Pages
|
429
|
-
catalog_pages(catalogs[:Kids],
|
439
|
+
catalog_pages(catalogs[:Kids], inheritance_hash.dup ) unless catalogs[:Kids].nil?
|
430
440
|
when :Catalog
|
431
|
-
|
441
|
+
@names_object.update( (catalogs[:Names][:referenced_object] || catalogs[:Names]), &self.class.method(:hash_update_proc_for_new) ) if catalogs[:Names]
|
442
|
+
catalog_pages(catalogs[:Pages], inheritance_hash.dup ) unless catalogs[:Pages].nil?
|
432
443
|
end
|
433
444
|
end
|
434
445
|
end
|
@@ -473,11 +484,11 @@ module CombinePDF
|
|
473
484
|
obj.delete(:indirect_reference_id); obj.delete(:indirect_generation_number)
|
474
485
|
end
|
475
486
|
self
|
476
|
-
|
477
|
-
|
478
|
-
#
|
479
|
-
|
480
|
-
|
487
|
+
end
|
488
|
+
|
489
|
+
# All Strings are one String
|
490
|
+
def unify_string str
|
491
|
+
@strings_dictionary[str] ||= str
|
481
492
|
end
|
482
493
|
|
483
494
|
# @private
|
@@ -25,15 +25,15 @@ module CombinePDF
|
|
25
25
|
# this function adds the references contained in "object", but DOESN'T add the object itself.
|
26
26
|
#
|
27
27
|
# this is used for internal operations, such as injectng data using the << operator.
|
28
|
-
def add_referenced(object)
|
28
|
+
def add_referenced(object, dup_pages = true)
|
29
29
|
# add references but not root
|
30
30
|
case
|
31
31
|
when object.is_a?(Array)
|
32
|
-
object.each {|it| add_referenced(it)}
|
32
|
+
object.each {|it| add_referenced(it, dup_pages)}
|
33
33
|
return true
|
34
34
|
when object.is_a?(Hash)
|
35
35
|
# first if statement is actually a workaround for a bug in Acrobat Reader, regarding duplicate pages.
|
36
|
-
if object[:is_reference_only] && object[:referenced_object] && object[:referenced_object].is_a?(Hash) && object[:referenced_object][:Type] == :Page
|
36
|
+
if dup_pages && object[:is_reference_only] && object[:referenced_object] && object[:referenced_object].is_a?(Hash) && object[:referenced_object][:Type] == :Page
|
37
37
|
if @objects.find_index object[:referenced_object]
|
38
38
|
@objects << (object[:referenced_object] = object[:referenced_object].dup)
|
39
39
|
else
|
@@ -49,6 +49,8 @@ module CombinePDF
|
|
49
49
|
# stop this path, there is no need to run over the Hash's keys and values
|
50
50
|
return true
|
51
51
|
else
|
52
|
+
# stop if page propegation is false
|
53
|
+
return true if !dup_pages && object[:referenced_object][:Type] == :Page
|
52
54
|
# @objects.include? object[:referenced_object] is bound to be false
|
53
55
|
# the object wasn't found - add it to the @objects array
|
54
56
|
@objects << object[:referenced_object]
|
@@ -56,8 +58,8 @@ module CombinePDF
|
|
56
58
|
|
57
59
|
end
|
58
60
|
object.each do |k, v|
|
59
|
-
add_referenced(v) unless k == :Parent
|
60
|
-
end
|
61
|
+
add_referenced(v, dup_pages) unless k == :Parent
|
62
|
+
end
|
61
63
|
else
|
62
64
|
return false
|
63
65
|
end
|
@@ -83,8 +85,10 @@ module CombinePDF
|
|
83
85
|
# build new Pages object
|
84
86
|
pages_object = {Type: :Pages, Count: page_list.length, Kids: page_list.map {|p| {referenced_object: p, is_reference_only: true} } }
|
85
87
|
|
88
|
+
# rebuild/rename the names dictionary
|
89
|
+
rebuild_names
|
86
90
|
# build new Catalog object
|
87
|
-
catalog_object = {Type: :Catalog, Pages: {referenced_object: pages_object, is_reference_only: true} }
|
91
|
+
catalog_object = {Type: :Catalog, Pages: {referenced_object: pages_object, is_reference_only: true}, Names: {referenced_object: @names, is_reference_only: true} }
|
88
92
|
catalog_object[:ViewerPreferences] = @viewer_preferences unless @viewer_preferences.empty?
|
89
93
|
|
90
94
|
# point old Pages pointers to new Pages object
|
@@ -103,17 +107,22 @@ module CombinePDF
|
|
103
107
|
catalog_object
|
104
108
|
end
|
105
109
|
|
110
|
+
def names_object
|
111
|
+
@names
|
112
|
+
end
|
113
|
+
|
106
114
|
# @private
|
107
115
|
# this is an alternative to the rebuild_catalog catalog method
|
108
116
|
# this method is used by the to_pdf method, for streamlining the PDF output.
|
109
117
|
# there is no point is calling the method before preparing the output.
|
110
118
|
def rebuild_catalog_and_objects
|
111
119
|
catalog = rebuild_catalog
|
112
|
-
@objects
|
120
|
+
@objects.clear
|
113
121
|
@objects << @info
|
114
122
|
add_referenced @info
|
115
123
|
@objects << catalog
|
116
|
-
add_referenced catalog
|
124
|
+
add_referenced catalog[:Pages]
|
125
|
+
add_referenced catalog[:Names], false
|
117
126
|
catalog
|
118
127
|
end
|
119
128
|
|
@@ -138,6 +147,70 @@ module CombinePDF
|
|
138
147
|
@objects.each {|obj| obj.delete(:indirect_reference_id); obj.delete(:indirect_generation_number)}
|
139
148
|
end
|
140
149
|
|
150
|
+
def rebuild_names name_tree = nil, base = "CombinePDF_0000000"
|
151
|
+
if name_tree
|
152
|
+
dic = []
|
153
|
+
case name_tree
|
154
|
+
when Array
|
155
|
+
if name_tree[0].is_a? String
|
156
|
+
(name_tree.length/2).times do |i|
|
157
|
+
dic << (name_tree[i*2].clear << base.next!)
|
158
|
+
dic << name_tree[(i*2) + 1]
|
159
|
+
end
|
160
|
+
else
|
161
|
+
name_tree.each {|kid| dic.concat rebuild_names(kid, base) }
|
162
|
+
end
|
163
|
+
when Hash
|
164
|
+
if name_tree[:Kids]
|
165
|
+
dic.concat rebuild_names(name_tree[:Kids], base)
|
166
|
+
elsif name_tree[:Names]
|
167
|
+
dic.concat rebuild_names(name_tree[:Names], base)
|
168
|
+
elsif name_tree[:referenced_object]
|
169
|
+
dic.concat rebuild_names(name_tree[:referenced_object], base)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
return dic
|
173
|
+
end
|
174
|
+
@names.keys.each do |k|
|
175
|
+
@names[k] = {referenced_object: { Names: rebuild_names(@names[k], base) } , is_reference_only: true} unless k == :Type
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# @private
|
180
|
+
# this method reviews a Hash an updates it by merging Hash data,
|
181
|
+
# preffering the new over the old.
|
182
|
+
def self.hash_merge_new_no_page key, old_data, new_data
|
183
|
+
if old_data.is_a? Hash
|
184
|
+
return old_data if old_data[:Type] == :Page
|
185
|
+
old_data.merge( new_data, &( @hash_merge_new_no_page_proc ||= self.method(:hash_merge_new_no_page) ) )
|
186
|
+
elsif old_data.is_a? Array
|
187
|
+
old_data + new_data
|
188
|
+
else
|
189
|
+
new_data
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
private
|
195
|
+
|
196
|
+
def renaming_dictionary object = nil, dictionary = {}
|
197
|
+
object ||= @names
|
198
|
+
case object
|
199
|
+
when Array
|
200
|
+
object.length.times {|i| object[i].is_a?(String) ? (dictionary[object[i]] = (dictionary.last || "Random_0001").next) : renaming_dictionary(object[i], dictionary) }
|
201
|
+
when Hash
|
202
|
+
object.values.each {|v| renaming_dictionary v, dictionary }
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def rename_object object, dictionary
|
207
|
+
case object
|
208
|
+
when Array
|
209
|
+
object.length.times {|i| }
|
210
|
+
when Hash
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
141
214
|
end
|
142
215
|
end
|
143
216
|
|
@@ -103,6 +103,7 @@ module CombinePDF
|
|
103
103
|
# set data from parser
|
104
104
|
@version = parser.version if parser.version.is_a? Float
|
105
105
|
@info = parser.info_object || {}
|
106
|
+
@names = parser.names_object || {}
|
106
107
|
|
107
108
|
# general globals
|
108
109
|
@set_start_id = 1
|
@@ -293,6 +294,7 @@ module CombinePDF
|
|
293
294
|
if data.is_a? PDF
|
294
295
|
@version = [@version, data.version].max
|
295
296
|
pages_to_add = data.pages
|
297
|
+
@names.update data.names_object, &self.class.method(:hash_merge_new_no_page)
|
296
298
|
elsif data.is_a?(Array) && (data.select {|o| !(o.is_a?(Hash) && o[:Type] == :Page) } ).empty?
|
297
299
|
pages_to_add = data
|
298
300
|
elsif data.is_a?(Hash) && data[:Type] == :Page
|
data/lib/combine_pdf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-rc4
|
@@ -74,7 +74,6 @@ files:
|
|
74
74
|
- lib/combine_pdf/decrypt.rb
|
75
75
|
- lib/combine_pdf/filter.rb
|
76
76
|
- lib/combine_pdf/fonts.rb
|
77
|
-
- lib/combine_pdf/operations.rb
|
78
77
|
- lib/combine_pdf/page_methods.rb
|
79
78
|
- lib/combine_pdf/parser.rb
|
80
79
|
- lib/combine_pdf/pdf_protected.rb
|
@@ -108,3 +107,4 @@ specification_version: 4
|
|
108
107
|
summary: Combine, stamp and watermark PDF files in pure Ruby.
|
109
108
|
test_files:
|
110
109
|
- test/console
|
110
|
+
has_rdoc:
|
@@ -1,416 +0,0 @@
|
|
1
|
-
module CombinePDF
|
2
|
-
|
3
|
-
################################################################
|
4
|
-
## These are common functions, used within the different classes
|
5
|
-
## These functions aren't open to the public.
|
6
|
-
################################################################
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
# holds a simple content stream that starts a PDF graphic state container - used for wrapping malformed PDF content streams.
|
11
|
-
CONTENT_CONTAINER_START = { is_reference_only: true , referenced_object: {indirect_reference_id: 0, raw_stream_content: 'q'} }
|
12
|
-
# holds a simple content stream that ends a PDF graphic state container - used for wrapping malformed PDF content streams.
|
13
|
-
CONTENT_CONTAINER_MIDDLE = { is_reference_only: true , referenced_object: {indirect_reference_id: 0, raw_stream_content: "Q\nq"} }
|
14
|
-
# holds a simple content stream that ends a PDF graphic state container - used for wrapping malformed PDF content streams.
|
15
|
-
CONTENT_CONTAINER_END = { is_reference_only: true , referenced_object: {indirect_reference_id: 0, raw_stream_content: 'Q'} }
|
16
|
-
|
17
|
-
# @private
|
18
|
-
# @!visibility private
|
19
|
-
#:nodoc: all
|
20
|
-
|
21
|
-
protected
|
22
|
-
|
23
|
-
# @!visibility private
|
24
|
-
|
25
|
-
# This is an internal class. you don't need it.
|
26
|
-
module PDFOperations
|
27
|
-
|
28
|
-
module_function
|
29
|
-
|
30
|
-
# @!visibility private
|
31
|
-
|
32
|
-
def inject_to_page page = {Type: :Page, MediaBox: [0,0,612.0,792.0], Resources: {}, Contents: []}, stream = nil, top = true
|
33
|
-
# make sure both the page reciving the new data and the injected page are of the correct data type.
|
34
|
-
return false unless page.is_a?(Hash) && stream.is_a?(Hash)
|
35
|
-
|
36
|
-
# following the reference chain and assigning a pointer to the correct Resouces object.
|
37
|
-
# (assignments of Strings, Arrays and Hashes are pointers in Ruby, unless the .dup method is called)
|
38
|
-
page[:Resources] ||= {}
|
39
|
-
original_resources = page[:Resources]
|
40
|
-
if original_resources[:is_reference_only]
|
41
|
-
original_resources = original_resources[:referenced_object]
|
42
|
-
raise "Couldn't tap into resources dictionary, as it is a reference and isn't linked." unless original_resources
|
43
|
-
end
|
44
|
-
original_contents = page[:Contents]
|
45
|
-
original_contents = [original_contents] unless original_contents.is_a? Array
|
46
|
-
|
47
|
-
stream[:Resources] ||= {}
|
48
|
-
stream_resources = stream[:Resources]
|
49
|
-
if stream_resources[:is_reference_only]
|
50
|
-
stream_resources = stream_resources[:referenced_object]
|
51
|
-
raise "Couldn't tap into resources dictionary, as it is a reference and isn't linked." unless stream_resources
|
52
|
-
end
|
53
|
-
stream_contents = stream[:Contents]
|
54
|
-
stream_contents = [stream_contents] unless stream_contents.is_a? Array
|
55
|
-
|
56
|
-
# collect keys as objects - this is to make sure that
|
57
|
-
# we are working on the actual resource data, rather then references
|
58
|
-
flatten_resources_dictionaries stream_resources
|
59
|
-
flatten_resources_dictionaries original_resources
|
60
|
-
|
61
|
-
# injecting each of the values in the injected Page
|
62
|
-
stream_resources.each do |key, new_val|
|
63
|
-
unless PRIVATE_HASH_KEYS.include? key # keep CombinePDF structual data intact.
|
64
|
-
if original_resources[key].nil?
|
65
|
-
original_resources[key] = new_val
|
66
|
-
elsif original_resources[key].is_a?(Hash) && new_val.is_a?(Hash)
|
67
|
-
new_val.update original_resources[key] # make sure the old values are respected
|
68
|
-
original_resources[key].update new_val # transfer old and new values to the injected page
|
69
|
-
end #Do nothing if array - ot is the PROC array, which is an issue
|
70
|
-
end
|
71
|
-
end
|
72
|
-
original_resources[:ProcSet] = [:PDF, :Text, :ImageB, :ImageC, :ImageI] # this was recommended by the ISO. 32000-1:2008
|
73
|
-
|
74
|
-
if top # if this is a stamp (overlay)
|
75
|
-
page[:Contents] = original_contents
|
76
|
-
page[:Contents].unshift create_deep_copy(CONTENT_CONTAINER_START)
|
77
|
-
page[:Contents].push create_deep_copy(CONTENT_CONTAINER_MIDDLE)
|
78
|
-
page[:Contents].push *stream_contents
|
79
|
-
page[:Contents].push create_deep_copy(CONTENT_CONTAINER_END)
|
80
|
-
else #if this was a watermark (underlay? would be lost if the page was scanned, as white might not be transparent)
|
81
|
-
page[:Contents] = stream_contents
|
82
|
-
page[:Contents].unshift create_deep_copy(CONTENT_CONTAINER_START)
|
83
|
-
page[:Contents].push create_deep_copy(CONTENT_CONTAINER_MIDDLE)
|
84
|
-
page[:Contents].push *original_contents
|
85
|
-
page[:Contents].push create_deep_copy(CONTENT_CONTAINER_END)
|
86
|
-
end
|
87
|
-
|
88
|
-
page
|
89
|
-
end
|
90
|
-
# copy_and_secure_for_injection(page)
|
91
|
-
# - page is a page in the pages array, i.e.
|
92
|
-
# pdf.pages[0]
|
93
|
-
# takes a page object and:
|
94
|
-
#
|
95
|
-
# makes a deep copy of the page (Ruby defaults to pointers, so this will copy the memory).
|
96
|
-
#
|
97
|
-
# then it will rewrite the content stream with renamed resources, so as to avoid name conflicts.
|
98
|
-
def copy_and_secure_for_injection(page)
|
99
|
-
# copy page
|
100
|
-
new_page = create_deep_copy page
|
101
|
-
|
102
|
-
# initiate dictionary from old names to new names
|
103
|
-
names_dictionary = {}
|
104
|
-
|
105
|
-
# itirate through all keys that are name objects and give them new names (add to dic)
|
106
|
-
# this should be done for every dictionary in :Resources
|
107
|
-
# this is a few steps stage:
|
108
|
-
|
109
|
-
# 1. get resources object
|
110
|
-
resources = new_page[:Resources]
|
111
|
-
if resources[:is_reference_only]
|
112
|
-
resources = resources[:referenced_object]
|
113
|
-
raise "Couldn't tap into resources dictionary, as it is a reference and isn't linked." unless resources
|
114
|
-
end
|
115
|
-
|
116
|
-
# 2. establich direct access to dictionaries and remove reference values
|
117
|
-
flatten_resources_dictionaries resources
|
118
|
-
|
119
|
-
# 3. travel every dictionary to pick up names (keys), change them and add them to the dictionary
|
120
|
-
resources.each do |k,v|
|
121
|
-
if v.is_a?(Hash)
|
122
|
-
new_dictionary = {}
|
123
|
-
new_name = "Combine" + SecureRandom.hex(7) + "PDF"
|
124
|
-
i = 1
|
125
|
-
v.each do |old_key, value|
|
126
|
-
new_key = (new_name + i.to_s).to_sym
|
127
|
-
names_dictionary[old_key] = new_key
|
128
|
-
new_dictionary[new_key] = value
|
129
|
-
i += 1
|
130
|
-
end
|
131
|
-
resources[k] = new_dictionary
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
# now that we have replaced the names in the resources dictionaries,
|
136
|
-
# it is time to replace the names inside the stream
|
137
|
-
# we will need to make sure we have access to the stream injected
|
138
|
-
# we will user PDFFilter.inflate_object
|
139
|
-
(new_page[:Contents].is_a?(Array) ? new_page[:Contents] : [new_page[:Contents] ]).each do |c|
|
140
|
-
stream = c[:referenced_object]
|
141
|
-
PDFFilter.inflate_object stream
|
142
|
-
names_dictionary.each do |old_key, new_key|
|
143
|
-
stream[:raw_stream_content].gsub! _object_to_pdf(old_key), _object_to_pdf(new_key) ##### PRAY(!) that the parsed datawill be correctly reproduced!
|
144
|
-
end
|
145
|
-
# patch back to PDF defaults, for OCRed PDF files.
|
146
|
-
# stream[:raw_stream_content] = "q\nq\nq\nDeviceRGB CS\nDeviceRGB cs\n0 0 0 rg\n0 0 0 RG\n0 Tr\n%s\nQ\nQ\nQ\n" % stream[:raw_stream_content]
|
147
|
-
# the following was removed for Acrobat Reader compatability: DeviceRGB CS\nDeviceRGB cs\n
|
148
|
-
stream[:raw_stream_content] = "q\nq\nq\n0 0 0 rg\n0 0 0 RG\n0 Tr\n1 0 0 1 0 0 cm\n%s\nQ\nQ\nQ\n" % stream[:raw_stream_content]
|
149
|
-
end
|
150
|
-
|
151
|
-
new_page
|
152
|
-
end
|
153
|
-
def flatten_resources_dictionaries(resources)
|
154
|
-
resources.each do |k,v|
|
155
|
-
if v.is_a?(Hash) && v[:is_reference_only]
|
156
|
-
if v[:referenced_object]
|
157
|
-
resources[k] = resources[k][:referenced_object].dup
|
158
|
-
resources[k].delete(:indirect_reference_id)
|
159
|
-
resources[k].delete(:indirect_generation_number)
|
160
|
-
elsif v[:indirect_without_dictionary]
|
161
|
-
resources[k] = resources[k][:indirect_without_dictionary]
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|
166
|
-
|
167
|
-
# returns the PDF Object Hash holding the acutal data (if exists) or the original hash (if it wasn't a reference)
|
168
|
-
#
|
169
|
-
# works only AFTER references have been connected.
|
170
|
-
def get_referenced object
|
171
|
-
object[:referenced_object] || object
|
172
|
-
end
|
173
|
-
|
174
|
-
|
175
|
-
# Ruby normally assigns pointes.
|
176
|
-
# noramlly:
|
177
|
-
# a = [1,2,3] # => [1,2,3]
|
178
|
-
# b = a # => [1,2,3]
|
179
|
-
# a << 4 # => [1,2,3,4]
|
180
|
-
# b # => [1,2,3,4]
|
181
|
-
# This method makes sure that the memory is copied instead of a pointer assigned.
|
182
|
-
# this works using recursion, so that arrays and hashes within arrays and hashes are also copied and not pointed to.
|
183
|
-
# One needs to be careful of infinit loops using this function.
|
184
|
-
def create_deep_copy object
|
185
|
-
if object.is_a?(Array)
|
186
|
-
return object.map { |e| create_deep_copy e }
|
187
|
-
elsif object.is_a?(Hash)
|
188
|
-
return {}.tap {|out| object.each {|k,v| out[create_deep_copy(k)] = create_deep_copy(v) unless k == :Parent} }
|
189
|
-
elsif object.is_a?(String)
|
190
|
-
return object.dup
|
191
|
-
else
|
192
|
-
return object # objects that aren't Strings, Arrays or Hashes (such as Symbols and Fixnums) won't be edited inplace.
|
193
|
-
end
|
194
|
-
end
|
195
|
-
# removes id and generation number values, for better comparrison
|
196
|
-
# and avoiding object duplication
|
197
|
-
# objects:: one or more objects in a PDF file/page.
|
198
|
-
def remove_old_ids objects
|
199
|
-
_each_object(objects) {|obj| obj.delete(:indirect_reference_id); obj.delete(:indirect_generation_number)}
|
200
|
-
end
|
201
|
-
def get_refernced_object(objects_array = [], reference_hash = {})
|
202
|
-
objects_array.each do |stored_object|
|
203
|
-
return stored_object if ( stored_object.is_a?(Hash) &&
|
204
|
-
reference_hash[:indirect_reference_id] == stored_object[:indirect_reference_id] &&
|
205
|
-
reference_hash[:indirect_generation_number] == stored_object[:indirect_generation_number] )
|
206
|
-
end
|
207
|
-
warn "didn't find reference #{reference_hash}"
|
208
|
-
nil
|
209
|
-
end
|
210
|
-
def change_references_to_actual_values(objects_array = [], hash_with_references = {})
|
211
|
-
hash_with_references.each do |k,v|
|
212
|
-
if v.is_a?(Hash) && v[:is_reference_only]
|
213
|
-
hash_with_references[k] = PDFOperations.get_refernced_object( objects_array, v)
|
214
|
-
hash_with_references[k] = hash_with_references[k][:indirect_without_dictionary] if hash_with_references[k].is_a?(Hash) && hash_with_references[k][:indirect_without_dictionary]
|
215
|
-
warn "Couldn't connect all values from references - didn't find reference #{hash_with_references}!!!" if hash_with_references[k] == nil
|
216
|
-
hash_with_references[k] = v unless hash_with_references[k]
|
217
|
-
end
|
218
|
-
end
|
219
|
-
hash_with_references
|
220
|
-
end
|
221
|
-
def change_connected_references_to_actual_values(hash_with_references = {})
|
222
|
-
if hash_with_references.is_a?(Hash)
|
223
|
-
hash_with_references.each do |k,v|
|
224
|
-
if v.is_a?(Hash) && v[:is_reference_only]
|
225
|
-
if v[:indirect_without_dictionary]
|
226
|
-
hash_with_references[k] = v[:indirect_without_dictionary]
|
227
|
-
elsif v[:referenced_object]
|
228
|
-
hash_with_references[k] = v[:referenced_object]
|
229
|
-
else
|
230
|
-
raise "Cannot change references to values, as they are disconnected!"
|
231
|
-
end
|
232
|
-
end
|
233
|
-
end
|
234
|
-
hash_with_references.each {|k, v| change_connected_references_to_actual_values(v) if v.is_a?(Hash) || v.is_a?(Array)}
|
235
|
-
elsif hash_with_references.is_a?(Array)
|
236
|
-
hash_with_references.each {|item| change_connected_references_to_actual_values(item) if item.is_a?(Hash) || item.is_a?(Array)}
|
237
|
-
end
|
238
|
-
hash_with_references
|
239
|
-
end
|
240
|
-
def connect_references_and_actual_values(objects_array = [], hash_with_references = {})
|
241
|
-
ret = true
|
242
|
-
hash_with_references.each do |k,v|
|
243
|
-
if v.is_a?(Hash) && v[:is_reference_only]
|
244
|
-
ref_obj = PDFOperations.get_refernced_object( objects_array, v)
|
245
|
-
hash_with_references[k] = ref_obj[:indirect_without_dictionary] if ref_obj.is_a?(Hash) && ref_obj[:indirect_without_dictionary]
|
246
|
-
ret = false
|
247
|
-
end
|
248
|
-
end
|
249
|
-
ret
|
250
|
-
end
|
251
|
-
|
252
|
-
|
253
|
-
def _each_object(object, limit_references = true, first_call = true, &block)
|
254
|
-
# #####################
|
255
|
-
# ## v.1.2 needs optimazation
|
256
|
-
# case
|
257
|
-
# when object.is_a?(Array)
|
258
|
-
# object.each {|obj| _each_object(obj, limit_references, &block)}
|
259
|
-
# when object.is_a?(Hash)
|
260
|
-
# yield(object)
|
261
|
-
# object.each do |k,v|
|
262
|
-
# unless (limit_references && k == :referenced_object)
|
263
|
-
# unless k == :Parent
|
264
|
-
# _each_object(v, limit_references, &block)
|
265
|
-
# end
|
266
|
-
# end
|
267
|
-
# end
|
268
|
-
# end
|
269
|
-
#####################
|
270
|
-
## v.2.1 needs optimazation
|
271
|
-
## version 2.1 is slightly faster then v.1.2
|
272
|
-
@already_visited = [] if first_call
|
273
|
-
unless limit_references
|
274
|
-
@already_visited << object.object_id
|
275
|
-
end
|
276
|
-
case
|
277
|
-
when object.is_a?(Array)
|
278
|
-
object.each {|obj| _each_object(obj, limit_references, false, &block)}
|
279
|
-
when object.is_a?(Hash)
|
280
|
-
yield(object)
|
281
|
-
unless limit_references && object[:is_reference_only]
|
282
|
-
object.each do |k,v|
|
283
|
-
_each_object(v, limit_references, false, &block) unless @already_visited.include? v.object_id
|
284
|
-
end
|
285
|
-
end
|
286
|
-
end
|
287
|
-
end
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
# Formats an object into PDF format. This is used my the PDF object to format the PDF file and it is used in the secure injection which is still being developed.
|
292
|
-
def _object_to_pdf object
|
293
|
-
case
|
294
|
-
when object.nil?
|
295
|
-
return "null"
|
296
|
-
when object.is_a?(String)
|
297
|
-
return _format_string_to_pdf object
|
298
|
-
when object.is_a?(Symbol)
|
299
|
-
return _format_name_to_pdf object
|
300
|
-
when object.is_a?(Array)
|
301
|
-
return _format_array_to_pdf object
|
302
|
-
when object.is_a?(Fixnum), object.is_a?(Float), object.is_a?(TrueClass), object.is_a?(FalseClass)
|
303
|
-
return object.to_s + " "
|
304
|
-
when object.is_a?(Hash)
|
305
|
-
return _format_hash_to_pdf object
|
306
|
-
else
|
307
|
-
return ''
|
308
|
-
end
|
309
|
-
end
|
310
|
-
|
311
|
-
def _format_string_to_pdf(object)
|
312
|
-
if @string_output == :literal #if format is set to Literal
|
313
|
-
#### can be better...
|
314
|
-
replacement_hash = {
|
315
|
-
"\x0A" => "\\n",
|
316
|
-
"\x0D" => "\\r",
|
317
|
-
"\x09" => "\\t",
|
318
|
-
"\x08" => "\\b",
|
319
|
-
"\xFF" => "\\f",
|
320
|
-
"\x28" => "\\(",
|
321
|
-
"\x29" => "\\)",
|
322
|
-
"\x5C" => "\\\\"
|
323
|
-
}
|
324
|
-
32.times {|i| replacement_hash[i.chr] ||= "\\#{i}"}
|
325
|
-
(256-128).times {|i| replacement_hash[(i + 127).chr] ||= "\\#{i+127}"}
|
326
|
-
("(" + ([].tap {|out| object.bytes.each {|byte| replacement_hash[ byte.chr ] ? (replacement_hash[ byte.chr ].bytes.each {|b| out << b}) : out << byte } }).pack('C*') + ")").force_encoding(Encoding::ASCII_8BIT)
|
327
|
-
else
|
328
|
-
# A hexadecimal string shall be written as a sequence of hexadecimal digits (0–9 and either A–F or a–f)
|
329
|
-
# encoded as ASCII characters and enclosed within angle brackets (using LESS-THAN SIGN (3Ch) and GREATER- THAN SIGN (3Eh)).
|
330
|
-
("<" + object.unpack('H*')[0] + ">").force_encoding(Encoding::ASCII_8BIT)
|
331
|
-
end
|
332
|
-
end
|
333
|
-
def _format_name_to_pdf(object)
|
334
|
-
# a name object is an atomic symbol uniquely defined by a sequence of ANY characters (8-bit values) except null (character code 0).
|
335
|
-
# print name as a simple string. all characters between ~ and ! (except #) can be raw
|
336
|
-
# the rest will have a number sign and their HEX equivalant
|
337
|
-
# from the standard:
|
338
|
-
# When writing a name in a PDF file, a SOLIDUS (2Fh) (/) shall be used to introduce a name. The SOLIDUS is not part of the name but is a prefix indicating that what follows is a sequence of characters representing the name in the PDF file and shall follow these rules:
|
339
|
-
# a) A NUMBER SIGN (23h) (#) in a name shall be written by using its 2-digit hexadecimal code (23), preceded by the NUMBER SIGN.
|
340
|
-
# b) Any character in a name that is a regular character (other than NUMBER SIGN) shall be written as itself or by using its 2-digit hexadecimal code, preceded by the NUMBER SIGN.
|
341
|
-
# c) Any character that is not a regular character shall be written using its 2-digit hexadecimal code, preceded by the NUMBER SIGN only.
|
342
|
-
# [0x00, 0x09, 0x0a, 0x0c, 0x0d, 0x20, 0x28, 0x29, 0x3c, 0x3e, 0x5b, 0x5d, 0x7b, 0x7d, 0x2f, 0x25]
|
343
|
-
out = object.to_s.bytes.to_a.map do |b|
|
344
|
-
case b
|
345
|
-
when 0..15
|
346
|
-
'#0' + b.to_s(16)
|
347
|
-
when 15..32, 35, 37, 40, 41, 47, 60, 62, 91, 93, 123, 125, 127..256
|
348
|
-
'#' + b.to_s(16)
|
349
|
-
else
|
350
|
-
b.chr
|
351
|
-
end
|
352
|
-
end
|
353
|
-
"/" + out.join()
|
354
|
-
end
|
355
|
-
def _format_array_to_pdf(object)
|
356
|
-
# An array shall be written as a sequence of objects enclosed in SQUARE BRACKETS (using LEFT SQUARE BRACKET (5Bh) and RIGHT SQUARE BRACKET (5Dh)).
|
357
|
-
# EXAMPLE [549 3.14 false (Ralph) /SomeName]
|
358
|
-
("[" + (object.collect {|item| _object_to_pdf(item)}).join(' ') + "]").force_encoding(Encoding::ASCII_8BIT)
|
359
|
-
|
360
|
-
end
|
361
|
-
|
362
|
-
def _format_hash_to_pdf(object)
|
363
|
-
# if the object is only a reference:
|
364
|
-
# special conditions apply, and there is only the setting of the reference (if needed) and output
|
365
|
-
if object[:is_reference_only]
|
366
|
-
#
|
367
|
-
if object[:referenced_object] && object[:referenced_object].is_a?(Hash)
|
368
|
-
object[:indirect_reference_id] = object[:referenced_object][:indirect_reference_id]
|
369
|
-
object[:indirect_generation_number] = object[:referenced_object][:indirect_generation_number]
|
370
|
-
end
|
371
|
-
object[:indirect_reference_id] ||= 0
|
372
|
-
object[:indirect_generation_number] ||= 0
|
373
|
-
return "#{object[:indirect_reference_id].to_s} #{object[:indirect_generation_number].to_s} R".force_encoding(Encoding::ASCII_8BIT)
|
374
|
-
end
|
375
|
-
|
376
|
-
# if the object is indirect...
|
377
|
-
out = []
|
378
|
-
if object[:indirect_reference_id]
|
379
|
-
object[:indirect_reference_id] ||= 0
|
380
|
-
object[:indirect_generation_number] ||= 0
|
381
|
-
out << "#{object[:indirect_reference_id].to_s} #{object[:indirect_generation_number].to_s} obj\n".force_encoding(Encoding::ASCII_8BIT)
|
382
|
-
if object[:indirect_without_dictionary]
|
383
|
-
out << _object_to_pdf(object[:indirect_without_dictionary])
|
384
|
-
out << "\nendobj\n"
|
385
|
-
return out.join().force_encoding(Encoding::ASCII_8BIT)
|
386
|
-
end
|
387
|
-
end
|
388
|
-
# correct stream length, if the object is a stream.
|
389
|
-
object[:Length] = object[:raw_stream_content].bytesize if object[:raw_stream_content]
|
390
|
-
|
391
|
-
# if the object is not a simple object, it is a dictionary
|
392
|
-
# A dictionary shall be written as a sequence of key-value pairs enclosed in double angle brackets (<<...>>)
|
393
|
-
# (using LESS-THAN SIGNs (3Ch) and GREATER-THAN SIGNs (3Eh)).
|
394
|
-
out << "<<\n".force_encoding(Encoding::ASCII_8BIT)
|
395
|
-
object.each do |key, value|
|
396
|
-
out << "#{_object_to_pdf key} #{_object_to_pdf value}\n".force_encoding(Encoding::ASCII_8BIT) unless PRIVATE_HASH_KEYS.include? key
|
397
|
-
end
|
398
|
-
out << ">>".force_encoding(Encoding::ASCII_8BIT)
|
399
|
-
out << "\nstream\n#{object[:raw_stream_content]}\nendstream".force_encoding(Encoding::ASCII_8BIT) if object[:raw_stream_content]
|
400
|
-
out << "\nendobj\n" if object[:indirect_reference_id]
|
401
|
-
out.join().force_encoding(Encoding::ASCII_8BIT)
|
402
|
-
end
|
403
|
-
end
|
404
|
-
end
|
405
|
-
|
406
|
-
#########################################################
|
407
|
-
# this file is part of the CombinePDF library and the code
|
408
|
-
# is subject to the same license (MIT).
|
409
|
-
#########################################################
|
410
|
-
# PDF object types cross reference:
|
411
|
-
# Indirect objects, references, dictionaries and streams are Hash
|
412
|
-
# arrays are Array
|
413
|
-
# strings are String
|
414
|
-
# names are Symbols (String.to_sym)
|
415
|
-
# numbers are Fixnum or Float
|
416
|
-
# boolean are TrueClass or FalseClass
|