combine_pdf 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/combine_pdf.rb +467 -0
- data/lib/combine_pdf/combine_pdf_basic_writer.rb +110 -0
- data/lib/combine_pdf/combine_pdf_decrypt.rb +198 -0
- data/lib/combine_pdf/combine_pdf_filter.rb +72 -0
- data/lib/combine_pdf/combine_pdf_parser.rb +315 -0
- data/lib/combine_pdf/combine_pdf_pdf.rb +396 -0
- metadata +66 -0
@@ -0,0 +1,110 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
########################################################
|
3
|
+
## Thoughts from reading the ISO 32000-1:2008
|
4
|
+
## this file is part of the CombinePDF library and the code
|
5
|
+
## is subject to the same license.
|
6
|
+
########################################################
|
7
|
+
|
8
|
+
module CombinePDF
|
9
|
+
|
10
|
+
class PDFWriter
|
11
|
+
|
12
|
+
def initialize(media_box = [0.0, 0.0, 612.0, 792.0])
|
13
|
+
@content_stream = {}
|
14
|
+
@media_box = media_box
|
15
|
+
end
|
16
|
+
|
17
|
+
########################################################
|
18
|
+
## textbox
|
19
|
+
## - font_name: :font_name
|
20
|
+
## The PostScript names of 14 Type 1 fonts, known as the standard 14 fonts, are as follows:
|
21
|
+
## Times-Roman, Helvetica, Courier, Symbol, Times-Bold, Helvetica-Bold, Courier-Bold, ZapfDingbats, Times-Italic, Helvetica- Oblique, Courier-Oblique, Times-BoldItalic, Helvetica-BoldOblique, Courier-BoldOblique
|
22
|
+
## - text_color: [R, G, B]
|
23
|
+
## an array with three floats, each in a value between 0 to 1.
|
24
|
+
## First value is Red, second Green and last is Blue (RGB color system)
|
25
|
+
def add_text_box(text, args = {})
|
26
|
+
options = {
|
27
|
+
text_alignment: :center,
|
28
|
+
text_color: [1,1,1],
|
29
|
+
# text_stroke: nil,
|
30
|
+
font_name: :Helvetica,
|
31
|
+
font_type: :Type1,
|
32
|
+
font_object: nil,
|
33
|
+
font_size: 12,
|
34
|
+
border_color: nil,
|
35
|
+
border_width: nil,
|
36
|
+
border_radius: nil,
|
37
|
+
background_color: nil,
|
38
|
+
opacity: 1,
|
39
|
+
x: 0,
|
40
|
+
y: 0,
|
41
|
+
length: -1,
|
42
|
+
height: -1,
|
43
|
+
}
|
44
|
+
# create font object
|
45
|
+
font_object = { Type: :Font, Subtype: options[:font_type], BaseFont: options[:font_name]}
|
46
|
+
if options[:font_object].is_a?(Hash) && options[:font_object][:indirect_reference_id] && options[:font_object][:indirect_generation_number] && (options[:font_object][:is_reference_only] != true)
|
47
|
+
font_object = {is_reference_only: true, referenced_object: font_object}
|
48
|
+
end
|
49
|
+
|
50
|
+
# create resources object
|
51
|
+
font_name = ("MyFont" + rand(99) ).to_sym
|
52
|
+
resources_object = {Resources: {Font: { font_name => font_object } } }
|
53
|
+
# create box stream
|
54
|
+
|
55
|
+
# reset x,y by text alignment - x,y are calculated from the buttom left
|
56
|
+
# each unit (1) is 1/72 Inch
|
57
|
+
x = options[:x]
|
58
|
+
y = options[:y]
|
59
|
+
# create text stream
|
60
|
+
text_stream = ""
|
61
|
+
text_stream << "BT\n" # the Begine Text marker
|
62
|
+
text_stream << PDFOperations._format_name_to_pdf(font_name) # Set font name
|
63
|
+
text_stream << " #{options[:font_size].to_f} Tf\n" # set font size and add font operator
|
64
|
+
text_stream << "#{options[:text_color][0]} #{options[:text_color][0]} #{options[:text_color][0]} rg\n" # sets the color state
|
65
|
+
text_stream << " #{options[:opacity].to_f} ca\n" # set opacity (alpha) for graphic state.
|
66
|
+
text_stream << "#{x} #{y} Td\n" # set location for text object
|
67
|
+
text_stream << PDFOperations._format_string_to_pdf(text) # insert the string in PDF format
|
68
|
+
text_stream << " Tj\n ET\n" # the Text object operator and the End Text marker
|
69
|
+
end
|
70
|
+
|
71
|
+
########################################################
|
72
|
+
## add_content_to_pages(pages = [], location = :above)
|
73
|
+
## pages - a page hash or an array of pages
|
74
|
+
## location - :above to place content over existing content or :below to place content under existing content
|
75
|
+
def add_content_to_pages(pages = [], location = :above)
|
76
|
+
if pages.is_a?(Array)
|
77
|
+
pages.each {|p| add_content_to_pages p, location}
|
78
|
+
elsif pages.is_a?(Hash)
|
79
|
+
#####
|
80
|
+
##add content stream to page
|
81
|
+
end
|
82
|
+
end
|
83
|
+
########################################################
|
84
|
+
## make_into_page()
|
85
|
+
## takes no arguments and returns the contents stream within a page (to be added as an indipendent page to the PDF object)
|
86
|
+
def make_into_page
|
87
|
+
{Type: :Page, }
|
88
|
+
end
|
89
|
+
|
90
|
+
########################################################
|
91
|
+
## to_pdf()
|
92
|
+
## prints out the content stream as raw PDF
|
93
|
+
## file_name - the name of the file to which to save the data (will be overwritten).
|
94
|
+
## if file_name is given, save to file.
|
95
|
+
def to_pdf( file_name = nil)
|
96
|
+
pdf = PDF.new
|
97
|
+
pdf << make_into_page
|
98
|
+
if file_name
|
99
|
+
pdf.save file_name
|
100
|
+
else
|
101
|
+
pdf.to_pdf
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
|
@@ -0,0 +1,198 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
########################################################
|
3
|
+
## Thoughts from reading the ISO 32000-1:2008
|
4
|
+
## this file is part of the CombinePDF library and the code
|
5
|
+
## is subject to the same license.
|
6
|
+
########################################################
|
7
|
+
|
8
|
+
module CombinePDF
|
9
|
+
class PDFDecrypt
|
10
|
+
|
11
|
+
def initialize objects=[], root_doctionary = {}
|
12
|
+
@objects = objects
|
13
|
+
@encryption_dictionary = root_doctionary[:Encrypt]
|
14
|
+
raise "Cannot decrypt an encrypted file without an encryption dictionary!" unless @encryption_dictionary
|
15
|
+
@root_doctionary = root_doctionary
|
16
|
+
@padding_key = [ 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41,
|
17
|
+
0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
|
18
|
+
0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80,
|
19
|
+
0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A ]
|
20
|
+
@key_crypt_first_iv_store = nil
|
21
|
+
@encryption_iv = nil
|
22
|
+
PDFOperations.change_references_to_actual_values @objects, @encryption_dictionary
|
23
|
+
end
|
24
|
+
def set_general_key(password = "")
|
25
|
+
# 1) make sure the initial key is 32 byte long (if no password, uses padding).
|
26
|
+
key = (password.bytes[0..32] + @padding_key)[0..31].pack('C*').force_encoding(Encoding::ASCII_8BIT)
|
27
|
+
# 2) add the value of the encryption dictionary’s O entry
|
28
|
+
key << @encryption_dictionary[:O].to_s
|
29
|
+
# 3) Convert the integer value of the P entry to a 32-bit unsigned binary number
|
30
|
+
# and pass these bytes low-order byte first
|
31
|
+
key << [@encryption_dictionary[:P]].pack('i')
|
32
|
+
# 4) Pass the first element of the file’s file identifier array
|
33
|
+
# (the value of the ID entry in the document’s trailer dictionary
|
34
|
+
key << @root_doctionary[:ID][0]
|
35
|
+
# # 4(a) (Security handlers of revision 4 or greater)
|
36
|
+
# # if document metadata is not being encrypted, add 4 bytes with the value 0xFFFFFFFF.
|
37
|
+
if @encryption_dictionary[:R] >= 4
|
38
|
+
unless @encryption_dictionary[:EncryptMetadata] == false #default is true and nil != false
|
39
|
+
key << "\x00\x00\x00\x00"
|
40
|
+
else
|
41
|
+
key << "\xFF\xFF\xFF\xFF"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
# 5) pass everything as a MD5 hash
|
45
|
+
key = Digest::MD5.digest(key)
|
46
|
+
# 5(a) h) (Security handlers of revision 3 or greater) Do the following 50 times:
|
47
|
+
# Take the output from the previous MD5 hash and
|
48
|
+
# pass the first n bytes of the output as input into a new MD5 hash,
|
49
|
+
# where n is the number of bytes of the encryption key as defined by the value of
|
50
|
+
# the encryption dictionary’s Length entry.
|
51
|
+
if @encryption_dictionary[:R] >= 3
|
52
|
+
50.times do|i|
|
53
|
+
key = Digest::MD5.digest(key[0...@encryption_dictionary[:Length]])
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# 6) Set the encryption key to the first n bytes of the output from the final MD5 hash,
|
57
|
+
# where n shall always be 5 for security handlers of revision 2 but,
|
58
|
+
# for security handlers of revision 3 or greater,
|
59
|
+
# shall depend on the value of the encryption dictionary’s Length entry.
|
60
|
+
if @encryption_dictionary[:R] >= 3
|
61
|
+
@key = key[0..(@encryption_dictionary[:Length]/8)]
|
62
|
+
else
|
63
|
+
@key = key[0..4]
|
64
|
+
end
|
65
|
+
@key
|
66
|
+
end
|
67
|
+
def decrypt
|
68
|
+
raise_encrypted_error @encryption_dictionary unless @encryption_dictionary[:Filter] == :Standard
|
69
|
+
@key = set_general_key
|
70
|
+
case @encryption_dictionary[:V]
|
71
|
+
when 1,2
|
72
|
+
warn "trying to decrypt with RC4."
|
73
|
+
# raise_encrypted_error
|
74
|
+
_perform_decrypt_proc_ @objects, self.method(:decrypt_RC4)
|
75
|
+
else
|
76
|
+
raise_encrypted_error
|
77
|
+
end
|
78
|
+
#rebuild stream lengths?
|
79
|
+
@objects
|
80
|
+
end
|
81
|
+
def decrypt_none(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
|
82
|
+
"encrypted"
|
83
|
+
end
|
84
|
+
def decrypt_RC4(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
|
85
|
+
## start decryption using padding strings
|
86
|
+
object_key = @key.dup
|
87
|
+
object_key << [encrypted_id].pack('i')[0..2]
|
88
|
+
object_key << [encrypted_generation].pack('i')[0..1]
|
89
|
+
# (0..2).each { |e| object_key << (encrypted_id >> e*8 & 0xFF ) }
|
90
|
+
# (0..1).each { |e| object_key << (encrypted_generation >> e*8 & 0xFF ) }
|
91
|
+
key_length = object_key.length < 16 ? object_key.length : 16
|
92
|
+
rc4 = RC4.new( Digest::MD5.digest(object_key)[(0...key_length)] )
|
93
|
+
rc4.decrypt(encrypted)
|
94
|
+
end
|
95
|
+
def decrypt_AES(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
|
96
|
+
## extract encryption_iv if it wasn't extracted yet
|
97
|
+
unless @encryption_iv
|
98
|
+
@encryption_iv = encrypted[0..15].to_i
|
99
|
+
#raise "Tryed decrypting using AES and couldn't extract iv" if @encryption_iv == 0
|
100
|
+
@encryption_iv = 0.chr * 16
|
101
|
+
#encrypted = encrypted[16..-1]
|
102
|
+
end
|
103
|
+
## start decryption using padding strings
|
104
|
+
object_key = @key.dup
|
105
|
+
(0..2).each { |e| object_key << (encrypted_id >> e*8 & 0xFF ) }
|
106
|
+
(0..1).each { |e| object_key << (encrypted_generation >> e*8 & 0xFF ) }
|
107
|
+
object_key << "sAlT"
|
108
|
+
key_length = object_key.length < 16 ? object_key.length : 16
|
109
|
+
cipher = OpenSSL::Cipher::Cipher.new("aes-#{object_key.length << 3}-cbc").decrypt
|
110
|
+
cipher.padding = 0
|
111
|
+
(cipher.update(encrypted) + cipher.final).unpack("C*")
|
112
|
+
end
|
113
|
+
def _perform_decrypt_proc_ (object, decrypt_proc, encrypted_id = nil, encrypted_generation = nil, encrypted_filter = nil)
|
114
|
+
case
|
115
|
+
when object.is_a?(Array)
|
116
|
+
object.map! { |item| _perform_decrypt_proc_(item, decrypt_proc, encrypted_id, encrypted_generation, encrypted_filter) }
|
117
|
+
when object.is_a?(Hash)
|
118
|
+
encrypted_id ||= object[:indirect_reference_id]
|
119
|
+
encrypted_generation ||= object[:indirect_generation_number]
|
120
|
+
encrypted_filter ||= object[:Filter]
|
121
|
+
if object[:raw_stream_content]
|
122
|
+
stream_length = object[:Length]
|
123
|
+
if stream_length.is_a?(Hash) && stream_length[:is_reference_only]
|
124
|
+
stream_length = PDFOperations.get_refernced_object( @objects, stream_length)[:indirect_without_dictionary]
|
125
|
+
end
|
126
|
+
actual_length = object[:raw_stream_content].length
|
127
|
+
warn "Stream registeded length was #{object[:Length].to_s} and the actual length was #{actual_length}." if actual_length < stream_length
|
128
|
+
length = [ stream_length, actual_length].min
|
129
|
+
object[:raw_stream_content] = decrypt_proc.call( (object[:raw_stream_content][0...length]), encrypted_id, encrypted_generation, encrypted_filter)
|
130
|
+
end
|
131
|
+
object.each {|k, v| object[k] = _perform_decrypt_proc_(v, decrypt_proc, encrypted_id, encrypted_generation, encrypted_filter) if k != :raw_stream_content && (v.is_a?(Hash) || v.is_a?(Array) || v.is_a?(String))} # assumes no decrypting is ever performed on keys
|
132
|
+
when object.is_a?(String)
|
133
|
+
return decrypt_proc.call(object, encrypted_id, encrypted_generation, encrypted_filter)
|
134
|
+
else
|
135
|
+
return object
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
def raise_encrypted_error object = nil
|
141
|
+
object ||= @encryption_dictionary.to_s.split(',').join("\n")
|
142
|
+
warn "Data raising exception:\n #{object.to_s.split(',').join("\n")}"
|
143
|
+
raise "File is encrypted - not supported."
|
144
|
+
end
|
145
|
+
end
|
146
|
+
#####################################################
|
147
|
+
## The following isn't my code!!!!
|
148
|
+
## It is subject to a different license and copyright.
|
149
|
+
## This was the code for the RC4 Gem,
|
150
|
+
## ... I had a bad internet connection so I ended up
|
151
|
+
## copying it from the web page I had in my cache.
|
152
|
+
## This wonderful work was done by Caige Nichols.
|
153
|
+
#####################################################
|
154
|
+
|
155
|
+
class RC4
|
156
|
+
def initialize(str)
|
157
|
+
begin
|
158
|
+
raise SyntaxError, "RC4: Key supplied is blank" if str.eql?('')
|
159
|
+
|
160
|
+
@q1, @q2 = 0, 0
|
161
|
+
@key = []
|
162
|
+
str.each_byte { |elem| @key << elem } while @key.size < 256
|
163
|
+
@key.slice!(256..@key.size-1) if @key.size >= 256
|
164
|
+
@s = (0..255).to_a
|
165
|
+
j = 0
|
166
|
+
0.upto(255) do |i|
|
167
|
+
j = (j + @s[i] + @key[i] ) % 256
|
168
|
+
@s[i], @s[j] = @s[j], @s[i]
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def encrypt!(text)
|
174
|
+
process text
|
175
|
+
end
|
176
|
+
|
177
|
+
def encrypt(text)
|
178
|
+
process text.dup
|
179
|
+
end
|
180
|
+
|
181
|
+
alias_method :decrypt, :encrypt
|
182
|
+
|
183
|
+
private
|
184
|
+
|
185
|
+
def process(text)
|
186
|
+
text.unpack("C*").map { |c| c ^ round }.pack("C*")
|
187
|
+
end
|
188
|
+
|
189
|
+
def round
|
190
|
+
@q1 = (@q1 + 1) % 256
|
191
|
+
@q2 = (@q2 + @s[@q1]) % 256
|
192
|
+
@s[@q1], @s[@q2] = @s[@q2], @s[@q1]
|
193
|
+
@s[(@s[@q1]+@s[@q2]) % 256]
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
198
|
+
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
########################################################
|
3
|
+
## Thoughts from reading the ISO 32000-1:2008
|
4
|
+
## this file is part of the CombinePDF library and the code
|
5
|
+
## is subject to the same license.
|
6
|
+
########################################################
|
7
|
+
|
8
|
+
module CombinePDF
|
9
|
+
|
10
|
+
module PDFFilter
|
11
|
+
module_function
|
12
|
+
|
13
|
+
def deflate_object object = nil
|
14
|
+
false
|
15
|
+
end
|
16
|
+
|
17
|
+
def inflate_object object = nil, filter = :none
|
18
|
+
filter_array = object[:Filter]
|
19
|
+
if filter_array.is_a?(Hash) && filter_array[:is_reference_only]
|
20
|
+
filter_array = filter_array[:referenced_object]
|
21
|
+
end
|
22
|
+
if filter_array.is_a?(Symbol)
|
23
|
+
filter_array = [filter_array]
|
24
|
+
end
|
25
|
+
filter_array = [] if filter_array.nil?
|
26
|
+
params_array = object[:DecodeParms]
|
27
|
+
if params_array.is_a?(Hash) && params_array[:is_reference_only]
|
28
|
+
params_array = params_array[:referenced_object]
|
29
|
+
end
|
30
|
+
unless params_array.is_a?(Array)
|
31
|
+
params_array = [params_array]
|
32
|
+
end
|
33
|
+
while filter_array[0]
|
34
|
+
case filter_array[0]
|
35
|
+
when :FlateDecode
|
36
|
+
raise_unsupported_error params_array[0] unless params_array[0].nil?
|
37
|
+
if params_array[0] && params_array[0][:Predictor].to_i > 1
|
38
|
+
bits = params_array[0][:BitsPerComponent] || 8
|
39
|
+
predictor = params_array[0][:Predictor].to_i
|
40
|
+
columns = params_array[0][:Columns] || 1
|
41
|
+
if (2..9).include? params_array[0][:Predictor].to_i
|
42
|
+
####
|
43
|
+
# prepare TIFF group
|
44
|
+
elsif (10..15).include? params_array[0][:Predictor].to_i == 2
|
45
|
+
####
|
46
|
+
# prepare PNG group
|
47
|
+
end
|
48
|
+
else
|
49
|
+
object[:raw_stream_content] = Zlib::Inflate.inflate object[:raw_stream_content]
|
50
|
+
object[:Length] = object[:raw_stream_content].bytesize
|
51
|
+
end
|
52
|
+
when nil
|
53
|
+
true
|
54
|
+
else
|
55
|
+
return false
|
56
|
+
end
|
57
|
+
params_array.shift
|
58
|
+
filter_array.shift
|
59
|
+
end
|
60
|
+
object.delete(:Filter)
|
61
|
+
true
|
62
|
+
end
|
63
|
+
def raise_unsupported_error (object = {})
|
64
|
+
raise "Filter #{object} unsupported. couldn't deflate object"
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
|
@@ -0,0 +1,315 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
########################################################
|
3
|
+
## Thoughts from reading the ISO 32000-1:2008
|
4
|
+
## this file is part of the CombinePDF library and the code
|
5
|
+
## is subject to the same license.
|
6
|
+
########################################################
|
7
|
+
module CombinePDF
|
8
|
+
|
9
|
+
########################################################
|
10
|
+
## This is the Parser class.
|
11
|
+
## It takes PDF data and parses it, returning an array
|
12
|
+
## of data.
|
13
|
+
## That array can be used to initialize a PDF object.
|
14
|
+
## The Parser class doesn't involve itself with the
|
15
|
+
## file version.
|
16
|
+
########################################################
|
17
|
+
|
18
|
+
class PDFParser
|
19
|
+
# LITERAL_STRING_REPLACEMENT_HASH = {
|
20
|
+
# 110 => 10, # "\\n".bytes = [92, 110] "\n".ord = 10
|
21
|
+
# 114 => 13, #r
|
22
|
+
# 116 => 9, #t
|
23
|
+
# 98 => 8, #b
|
24
|
+
# 102 => 255, #f
|
25
|
+
# 40 => 40, #(
|
26
|
+
# 41 => 41, #)
|
27
|
+
# 92 => 92 #\
|
28
|
+
# }
|
29
|
+
attr_reader :parsed, :version, :info_object, :root_object
|
30
|
+
def initialize (string)
|
31
|
+
raise TypeError, "couldn't parse and data, expecting type String" unless string.is_a? String
|
32
|
+
@string_to_parse = string.force_encoding(Encoding::ASCII_8BIT)
|
33
|
+
@literal_strings = []
|
34
|
+
@hex_strings = []
|
35
|
+
@streams = []
|
36
|
+
@parsed = []
|
37
|
+
@root_object = {}
|
38
|
+
@info_object = {}
|
39
|
+
@version = nil
|
40
|
+
@scanner = nil
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse
|
44
|
+
return @parsed unless @parsed.empty?
|
45
|
+
@scanner = StringScanner.new @string_to_parse
|
46
|
+
@scanner.pos = 0
|
47
|
+
if @scanner.scan /\%PDF\-[\d\-\.]+/
|
48
|
+
@version = @scanner.matched.scan(/[\d\.]+/)[0].to_f
|
49
|
+
end
|
50
|
+
|
51
|
+
warn "Starting to parse PDF data."
|
52
|
+
@parsed = _parse_
|
53
|
+
|
54
|
+
if @root_object == {}
|
55
|
+
xref_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :XRef}
|
56
|
+
xref_streams.each do |xref_dictionary|
|
57
|
+
@root_object.merge! xref_dictionary
|
58
|
+
end
|
59
|
+
end
|
60
|
+
raise "root is unknown - cannot determine if file is Encrypted" if @root_object == {}
|
61
|
+
warn "Injecting actual values into root object: #{@root_object}."
|
62
|
+
PDFOperations.change_references_to_actual_values @parsed, @root_object
|
63
|
+
|
64
|
+
if @root_object[:Encrypt]
|
65
|
+
warn "PDF is Encrypted! Attempting to unencrypt - not yet fully supported."
|
66
|
+
decryptor = PDFDecrypt.new @parsed, @root_object
|
67
|
+
decryptor.decrypt
|
68
|
+
#do we really need to apply to @parsed? No, there is no need.
|
69
|
+
end
|
70
|
+
if @version >= 1.5 # code placement for object streams
|
71
|
+
## search for objects streams
|
72
|
+
object_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :ObjStm}
|
73
|
+
unless object_streams.empty?
|
74
|
+
warn "PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects."
|
75
|
+
|
76
|
+
object_streams.each do |o|
|
77
|
+
warn "Attempting #{o.select {|k,v| k != :raw_stream_content}}"
|
78
|
+
## un-encode (using the correct filter) the object streams
|
79
|
+
PDFFilter.inflate_object o
|
80
|
+
## extract objects from stream to top level arry @parsed
|
81
|
+
@scanner = StringScanner.new o[:raw_stream_content]
|
82
|
+
stream_data = _parse_
|
83
|
+
id_array = []
|
84
|
+
while stream_data[0].is_a? Fixnum
|
85
|
+
id_array << stream_data.shift
|
86
|
+
stream_data.shift
|
87
|
+
end
|
88
|
+
while stream_data[0].is_a? Hash
|
89
|
+
stream_data[0][:indirect_reference_id] = id_array.shift
|
90
|
+
stream_data[0][:indirect_generation_number] = 0
|
91
|
+
@parsed << stream_data.shift
|
92
|
+
end
|
93
|
+
end
|
94
|
+
# ## remove object streams
|
95
|
+
@parsed.reject! {|obj| object_streams << obj if obj.is_a?(Hash) && obj[:Type] == :ObjStm}
|
96
|
+
# ## remove XREF dictionaries
|
97
|
+
@parsed.reject! {|obj| object_streams << obj if obj.is_a?(Hash) && obj[:Type] == :XRef}
|
98
|
+
end
|
99
|
+
end
|
100
|
+
PDFOperations.change_references_to_actual_values @parsed, @root_object
|
101
|
+
@info_object = @root_object[:Info]
|
102
|
+
if @info_object && @info_object.is_a?(Hash)
|
103
|
+
@parsed.delete @info_object
|
104
|
+
PDFOperations.change_references_to_actual_values @parsed, @info_object
|
105
|
+
PRIVATE_HASH_KEYS.each {|key| @info_object.delete key}
|
106
|
+
else
|
107
|
+
@info_object = {}
|
108
|
+
end
|
109
|
+
warn "setting parsed collection and returning collection."
|
110
|
+
@parsed
|
111
|
+
end
|
112
|
+
|
113
|
+
protected
|
114
|
+
|
115
|
+
def _parse_
|
116
|
+
out = []
|
117
|
+
str = ''
|
118
|
+
# warn "Scaning for objects, starting at #{@scanner.pos}: #{@scanner.peek(10)}"
|
119
|
+
while @scanner.rest? do
|
120
|
+
case
|
121
|
+
##########################################
|
122
|
+
## parse an Array
|
123
|
+
##########################################
|
124
|
+
when @scanner.scan(/\[/)
|
125
|
+
out << _parse_
|
126
|
+
##########################################
|
127
|
+
## parse a Dictionary
|
128
|
+
##########################################
|
129
|
+
when @scanner.scan(/<</)
|
130
|
+
data = _parse_
|
131
|
+
obj = {}
|
132
|
+
obj[data.shift] = data.shift while data[0]
|
133
|
+
out << obj
|
134
|
+
##########################################
|
135
|
+
## return content of array or dictionary
|
136
|
+
##########################################
|
137
|
+
when @scanner.scan(/\]/), @scanner.scan(/>>/)
|
138
|
+
return out
|
139
|
+
##########################################
|
140
|
+
## parse a Stream
|
141
|
+
##########################################
|
142
|
+
when @scanner.scan(/stream[\r]?[\n]/)
|
143
|
+
str = @scanner.scan_until(/endstream/)
|
144
|
+
# need to remove end of stream
|
145
|
+
if out.last.is_a? Hash
|
146
|
+
out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
|
147
|
+
# out.last[:raw_stream_content] = str.gsub(/[\n\r]?[\n\r]?endstream/, "")
|
148
|
+
else
|
149
|
+
warn "Stream not attached to dictionary!"
|
150
|
+
out << str[0...-10].force_encoding(Encoding::ASCII_8BIT)
|
151
|
+
end
|
152
|
+
##########################################
|
153
|
+
## parse an Object after finished
|
154
|
+
##########################################
|
155
|
+
when str = @scanner.scan(/endobj/)
|
156
|
+
# warn "Proccessing Object"
|
157
|
+
#what to do when this is an object?
|
158
|
+
if out.last.is_a? Hash
|
159
|
+
out << out.pop.merge({indirect_generation_number: out.pop, indirect_reference_id: out.pop})
|
160
|
+
else
|
161
|
+
out << {indirect_without_dictionary: out.pop, indirect_generation_number: out.pop, indirect_reference_id: out.pop}
|
162
|
+
end
|
163
|
+
##########################################
|
164
|
+
## parse a Hex String
|
165
|
+
##########################################
|
166
|
+
when str = @scanner.scan(/<[0-9a-f]+>/)
|
167
|
+
# warn "Found a hex string"
|
168
|
+
out << [str[1..-2]].pack('H*')
|
169
|
+
##########################################
|
170
|
+
## parse a Literal String
|
171
|
+
##########################################
|
172
|
+
when @scanner.scan(/\(/)
|
173
|
+
# warn "Found a literal string"
|
174
|
+
str = ''
|
175
|
+
count = 1
|
176
|
+
while count > 0 && @scanner.rest? do
|
177
|
+
str += @scanner.scan_until(/[\(\)]/).to_s
|
178
|
+
seperator_count = 0
|
179
|
+
seperator_count += 1 while str[-2-seperator_count] == "\\"
|
180
|
+
|
181
|
+
case str[-1]
|
182
|
+
when '('
|
183
|
+
## The following solution fails when (string ends with this sign: \\)
|
184
|
+
|
185
|
+
count += 1 unless seperator_count.odd?
|
186
|
+
when ')'
|
187
|
+
count -= 1 unless seperator_count.odd?
|
188
|
+
else
|
189
|
+
warn "Unknown error parsing string at #{@scanner.pos}!"
|
190
|
+
cout = 0 # error
|
191
|
+
end
|
192
|
+
end
|
193
|
+
# The PDF formatted string is: str[0..-2]
|
194
|
+
# now staring to convert to regular string
|
195
|
+
str_bytes = str[0..-2].bytes
|
196
|
+
str = []
|
197
|
+
until str_bytes.empty?
|
198
|
+
case str_bytes[0]
|
199
|
+
when 13 # eol - \r
|
200
|
+
# An end-of-line marker appearing within a literal string without a preceding REVERSE SOLIDUS
|
201
|
+
# shall be treated as a byte value of (0Ah),
|
202
|
+
# irrespective of whether the end-of-line marker was a CARRIAGE RETURN (0Dh), a LINE FEED (0Ah), or both.
|
203
|
+
str_bytes.shift
|
204
|
+
str_bytes.shift if str_bytes[0] == 10
|
205
|
+
str << 10
|
206
|
+
when 10 # eol - \n
|
207
|
+
# An end-of-line marker appearing within a literal string without a preceding REVERSE SOLIDUS
|
208
|
+
# shall be treated as a byte value of (0Ah),
|
209
|
+
# irrespective of whether the end-of-line marker was a CARRIAGE RETURN (0Dh), a LINE FEED (0Ah), or both.
|
210
|
+
str_bytes.shift
|
211
|
+
str_bytes.shift if str_bytes[0] == 13
|
212
|
+
str << 10
|
213
|
+
when 92 # "\\".ord == 92
|
214
|
+
str_bytes.shift
|
215
|
+
rep = str_bytes.shift
|
216
|
+
case rep
|
217
|
+
when 110 #n
|
218
|
+
str << 10 #new line
|
219
|
+
when 114 #r
|
220
|
+
str << 13 # CR
|
221
|
+
when 116 #t
|
222
|
+
str << 9 #tab
|
223
|
+
when 98 #b
|
224
|
+
str << 8
|
225
|
+
when 102 #f
|
226
|
+
str << 255
|
227
|
+
when 48..57 #decimal notation for byte?
|
228
|
+
rep = rep.chr
|
229
|
+
rep += str_bytes.shift.chr if str_bytes[0].between?(48,57)
|
230
|
+
rep += str_bytes.shift.chr if str_bytes[0].between?(48,57) && ((rep + str_bytes[0].chr).to_i <= 255)
|
231
|
+
str << rep.to_i
|
232
|
+
when 10 # new line, ignore
|
233
|
+
str_bytes.shift if str_bytes[0] == 13
|
234
|
+
true
|
235
|
+
when 13 # new line (or double notation for new line), ignore
|
236
|
+
str_bytes.shift if str_bytes[0] == 10
|
237
|
+
true
|
238
|
+
else
|
239
|
+
str << rep
|
240
|
+
end
|
241
|
+
else
|
242
|
+
str << str_bytes.shift
|
243
|
+
end
|
244
|
+
end
|
245
|
+
out << str.pack('C*')
|
246
|
+
##########################################
|
247
|
+
## Parse a comment
|
248
|
+
##########################################
|
249
|
+
when str = @scanner.scan(/\%/)
|
250
|
+
#is a comment, skip until new line
|
251
|
+
@scanner.skip_until /[\n\r]+/
|
252
|
+
##########################################
|
253
|
+
## Parse a Name
|
254
|
+
##########################################
|
255
|
+
# old, probably working version: when str = @scanner.scan(/\/[\#\w\d\.\+\-\\\?\,]+/)
|
256
|
+
# I don't know how to write the /[\x21-\x7e___subtract_certain_hex_values_here____]+/
|
257
|
+
# all allowed regular caracters between ! and ~ : /[\x21-\x24\x26\x27\x2a-\x2e\x30-\x3b\x3d\x3f-\x5a\x5c\x5e-\x7a\x7c\x7e]+
|
258
|
+
# all characters that aren't white space or special: /[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+
|
259
|
+
when str = @scanner.scan(/\/[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+/)
|
260
|
+
out << ( str[1..-1].gsub(/\#[0-9a-fA-F]{2}/) {|a| a[1..2].hex.chr } ).to_sym
|
261
|
+
##########################################
|
262
|
+
## Parse a Number
|
263
|
+
##########################################
|
264
|
+
when str = @scanner.scan(/[\+\-\.\d]+/)
|
265
|
+
str.match(/\./) ? (out << str.to_f) : (out << str.to_i)
|
266
|
+
##########################################
|
267
|
+
## Parse an Object Reference
|
268
|
+
##########################################
|
269
|
+
when @scanner.scan(/R/)
|
270
|
+
out << { is_reference_only: true, indirect_generation_number: out.pop, indirect_reference_id: out.pop}
|
271
|
+
##########################################
|
272
|
+
## Parse Bool - true and after false
|
273
|
+
##########################################
|
274
|
+
when @scanner.scan(/true/)
|
275
|
+
out << true
|
276
|
+
when @scanner.scan(/false/)
|
277
|
+
out << false
|
278
|
+
##########################################
|
279
|
+
## Parse NULL - null
|
280
|
+
##########################################
|
281
|
+
when @scanner.scan(/null/)
|
282
|
+
out << nil
|
283
|
+
##########################################
|
284
|
+
## XREF - check for encryption... anything else?
|
285
|
+
##########################################
|
286
|
+
when @scanner.scan(/xref/)
|
287
|
+
##########
|
288
|
+
## get root object to check for encryption
|
289
|
+
@scanner.scan_until(/(trailer)|(\%EOF)/)
|
290
|
+
|
291
|
+
if @scanner.matched[-1] == 'r'
|
292
|
+
if @scanner.skip_until(/<</)
|
293
|
+
data = _parse_
|
294
|
+
@root_object = {}
|
295
|
+
@root_object[data.shift] = data.shift while data[0]
|
296
|
+
end
|
297
|
+
##########
|
298
|
+
## skip untill end of segment, maked by %%EOF
|
299
|
+
@scanner.skip_until(/\%\%EOF/)
|
300
|
+
end
|
301
|
+
|
302
|
+
when @scanner.scan(/[\s]+/) , @scanner.scan(/obj[\s]*/)
|
303
|
+
# do nothing
|
304
|
+
# warn "White Space, do nothing"
|
305
|
+
nil
|
306
|
+
else
|
307
|
+
# always advance
|
308
|
+
# warn "Advnacing for unknown reason..."
|
309
|
+
@scanner.pos = @scanner.pos + 1
|
310
|
+
end
|
311
|
+
end
|
312
|
+
out
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|