combine_pdf 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,110 @@
1
+ # -*- encoding : utf-8 -*-
2
+ ########################################################
3
+ ## Thoughts from reading the ISO 32000-1:2008
4
+ ## this file is part of the CombinePDF library and the code
5
+ ## is subject to the same license.
6
+ ########################################################
7
+
8
+ module CombinePDF
9
+
10
+ class PDFWriter
11
+
12
+ def initialize(media_box = [0.0, 0.0, 612.0, 792.0])
13
+ @content_stream = {}
14
+ @media_box = media_box
15
+ end
16
+
17
+ ########################################################
18
+ ## textbox
19
+ ## - font_name: :font_name
20
+ ## The PostScript names of 14 Type 1 fonts, known as the standard 14 fonts, are as follows:
21
+ ## Times-Roman, Helvetica, Courier, Symbol, Times-Bold, Helvetica-Bold, Courier-Bold, ZapfDingbats, Times-Italic, Helvetica- Oblique, Courier-Oblique, Times-BoldItalic, Helvetica-BoldOblique, Courier-BoldOblique
22
+ ## - text_color: [R, G, B]
23
+ ## an array with three floats, each in a value between 0 to 1.
24
+ ## First value is Red, second Green and last is Blue (RGB color system)
25
+ def add_text_box(text, args = {})
26
+ options = {
27
+ text_alignment: :center,
28
+ text_color: [1,1,1],
29
+ # text_stroke: nil,
30
+ font_name: :Helvetica,
31
+ font_type: :Type1,
32
+ font_object: nil,
33
+ font_size: 12,
34
+ border_color: nil,
35
+ border_width: nil,
36
+ border_radius: nil,
37
+ background_color: nil,
38
+ opacity: 1,
39
+ x: 0,
40
+ y: 0,
41
+ length: -1,
42
+ height: -1,
43
+ }
44
+ # create font object
45
+ font_object = { Type: :Font, Subtype: options[:font_type], BaseFont: options[:font_name]}
46
+ if options[:font_object].is_a?(Hash) && options[:font_object][:indirect_reference_id] && options[:font_object][:indirect_generation_number] && (options[:font_object][:is_reference_only] != true)
47
+ font_object = {is_reference_only: true, referenced_object: font_object}
48
+ end
49
+
50
+ # create resources object
51
+ font_name = ("MyFont" + rand(99) ).to_sym
52
+ resources_object = {Resources: {Font: { font_name => font_object } } }
53
+ # create box stream
54
+
55
+ # reset x,y by text alignment - x,y are calculated from the buttom left
56
+ # each unit (1) is 1/72 Inch
57
+ x = options[:x]
58
+ y = options[:y]
59
+ # create text stream
60
+ text_stream = ""
61
+ text_stream << "BT\n" # the Begine Text marker
62
+ text_stream << PDFOperations._format_name_to_pdf(font_name) # Set font name
63
+ text_stream << " #{options[:font_size].to_f} Tf\n" # set font size and add font operator
64
+ text_stream << "#{options[:text_color][0]} #{options[:text_color][0]} #{options[:text_color][0]} rg\n" # sets the color state
65
+ text_stream << " #{options[:opacity].to_f} ca\n" # set opacity (alpha) for graphic state.
66
+ text_stream << "#{x} #{y} Td\n" # set location for text object
67
+ text_stream << PDFOperations._format_string_to_pdf(text) # insert the string in PDF format
68
+ text_stream << " Tj\n ET\n" # the Text object operator and the End Text marker
69
+ end
70
+
71
+ ########################################################
72
+ ## add_content_to_pages(pages = [], location = :above)
73
+ ## pages - a page hash or an array of pages
74
+ ## location - :above to place content over existing content or :below to place content under existing content
75
+ def add_content_to_pages(pages = [], location = :above)
76
+ if pages.is_a?(Array)
77
+ pages.each {|p| add_content_to_pages p, location}
78
+ elsif pages.is_a?(Hash)
79
+ #####
80
+ ##add content stream to page
81
+ end
82
+ end
83
+ ########################################################
84
+ ## make_into_page()
85
+ ## takes no arguments and returns the contents stream within a page (to be added as an indipendent page to the PDF object)
86
+ def make_into_page
87
+ {Type: :Page, }
88
+ end
89
+
90
+ ########################################################
91
+ ## to_pdf()
92
+ ## prints out the content stream as raw PDF
93
+ ## file_name - the name of the file to which to save the data (will be overwritten).
94
+ ## if file_name is given, save to file.
95
+ def to_pdf( file_name = nil)
96
+ pdf = PDF.new
97
+ pdf << make_into_page
98
+ if file_name
99
+ pdf.save file_name
100
+ else
101
+ pdf.to_pdf
102
+ end
103
+ end
104
+
105
+ end
106
+
107
+ end
108
+
109
+
110
+
@@ -0,0 +1,198 @@
1
+ # -*- encoding : utf-8 -*-
2
+ ########################################################
3
+ ## Thoughts from reading the ISO 32000-1:2008
4
+ ## this file is part of the CombinePDF library and the code
5
+ ## is subject to the same license.
6
+ ########################################################
7
+
8
+ module CombinePDF
9
+ class PDFDecrypt
10
+
11
+ def initialize objects=[], root_doctionary = {}
12
+ @objects = objects
13
+ @encryption_dictionary = root_doctionary[:Encrypt]
14
+ raise "Cannot decrypt an encrypted file without an encryption dictionary!" unless @encryption_dictionary
15
+ @root_doctionary = root_doctionary
16
+ @padding_key = [ 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41,
17
+ 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
18
+ 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80,
19
+ 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A ]
20
+ @key_crypt_first_iv_store = nil
21
+ @encryption_iv = nil
22
+ PDFOperations.change_references_to_actual_values @objects, @encryption_dictionary
23
+ end
24
+ def set_general_key(password = "")
25
+ # 1) make sure the initial key is 32 byte long (if no password, uses padding).
26
+ key = (password.bytes[0..32] + @padding_key)[0..31].pack('C*').force_encoding(Encoding::ASCII_8BIT)
27
+ # 2) add the value of the encryption dictionary’s O entry
28
+ key << @encryption_dictionary[:O].to_s
29
+ # 3) Convert the integer value of the P entry to a 32-bit unsigned binary number
30
+ # and pass these bytes low-order byte first
31
+ key << [@encryption_dictionary[:P]].pack('i')
32
+ # 4) Pass the first element of the file’s file identifier array
33
+ # (the value of the ID entry in the document’s trailer dictionary
34
+ key << @root_doctionary[:ID][0]
35
+ # # 4(a) (Security handlers of revision 4 or greater)
36
+ # # if document metadata is not being encrypted, add 4 bytes with the value 0xFFFFFFFF.
37
+ if @encryption_dictionary[:R] >= 4
38
+ unless @encryption_dictionary[:EncryptMetadata] == false #default is true and nil != false
39
+ key << "\x00\x00\x00\x00"
40
+ else
41
+ key << "\xFF\xFF\xFF\xFF"
42
+ end
43
+ end
44
+ # 5) pass everything as a MD5 hash
45
+ key = Digest::MD5.digest(key)
46
+ # 5(a) h) (Security handlers of revision 3 or greater) Do the following 50 times:
47
+ # Take the output from the previous MD5 hash and
48
+ # pass the first n bytes of the output as input into a new MD5 hash,
49
+ # where n is the number of bytes of the encryption key as defined by the value of
50
+ # the encryption dictionary’s Length entry.
51
+ if @encryption_dictionary[:R] >= 3
52
+ 50.times do|i|
53
+ key = Digest::MD5.digest(key[0...@encryption_dictionary[:Length]])
54
+ end
55
+ end
56
+ # 6) Set the encryption key to the first n bytes of the output from the final MD5 hash,
57
+ # where n shall always be 5 for security handlers of revision 2 but,
58
+ # for security handlers of revision 3 or greater,
59
+ # shall depend on the value of the encryption dictionary’s Length entry.
60
+ if @encryption_dictionary[:R] >= 3
61
+ @key = key[0..(@encryption_dictionary[:Length]/8)]
62
+ else
63
+ @key = key[0..4]
64
+ end
65
+ @key
66
+ end
67
+ def decrypt
68
+ raise_encrypted_error @encryption_dictionary unless @encryption_dictionary[:Filter] == :Standard
69
+ @key = set_general_key
70
+ case @encryption_dictionary[:V]
71
+ when 1,2
72
+ warn "trying to decrypt with RC4."
73
+ # raise_encrypted_error
74
+ _perform_decrypt_proc_ @objects, self.method(:decrypt_RC4)
75
+ else
76
+ raise_encrypted_error
77
+ end
78
+ #rebuild stream lengths?
79
+ @objects
80
+ end
81
+ def decrypt_none(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
82
+ "encrypted"
83
+ end
84
+ def decrypt_RC4(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
85
+ ## start decryption using padding strings
86
+ object_key = @key.dup
87
+ object_key << [encrypted_id].pack('i')[0..2]
88
+ object_key << [encrypted_generation].pack('i')[0..1]
89
+ # (0..2).each { |e| object_key << (encrypted_id >> e*8 & 0xFF ) }
90
+ # (0..1).each { |e| object_key << (encrypted_generation >> e*8 & 0xFF ) }
91
+ key_length = object_key.length < 16 ? object_key.length : 16
92
+ rc4 = RC4.new( Digest::MD5.digest(object_key)[(0...key_length)] )
93
+ rc4.decrypt(encrypted)
94
+ end
95
+ def decrypt_AES(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
96
+ ## extract encryption_iv if it wasn't extracted yet
97
+ unless @encryption_iv
98
+ @encryption_iv = encrypted[0..15].to_i
99
+ #raise "Tryed decrypting using AES and couldn't extract iv" if @encryption_iv == 0
100
+ @encryption_iv = 0.chr * 16
101
+ #encrypted = encrypted[16..-1]
102
+ end
103
+ ## start decryption using padding strings
104
+ object_key = @key.dup
105
+ (0..2).each { |e| object_key << (encrypted_id >> e*8 & 0xFF ) }
106
+ (0..1).each { |e| object_key << (encrypted_generation >> e*8 & 0xFF ) }
107
+ object_key << "sAlT"
108
+ key_length = object_key.length < 16 ? object_key.length : 16
109
+ cipher = OpenSSL::Cipher::Cipher.new("aes-#{object_key.length << 3}-cbc").decrypt
110
+ cipher.padding = 0
111
+ (cipher.update(encrypted) + cipher.final).unpack("C*")
112
+ end
113
+ def _perform_decrypt_proc_ (object, decrypt_proc, encrypted_id = nil, encrypted_generation = nil, encrypted_filter = nil)
114
+ case
115
+ when object.is_a?(Array)
116
+ object.map! { |item| _perform_decrypt_proc_(item, decrypt_proc, encrypted_id, encrypted_generation, encrypted_filter) }
117
+ when object.is_a?(Hash)
118
+ encrypted_id ||= object[:indirect_reference_id]
119
+ encrypted_generation ||= object[:indirect_generation_number]
120
+ encrypted_filter ||= object[:Filter]
121
+ if object[:raw_stream_content]
122
+ stream_length = object[:Length]
123
+ if stream_length.is_a?(Hash) && stream_length[:is_reference_only]
124
+ stream_length = PDFOperations.get_refernced_object( @objects, stream_length)[:indirect_without_dictionary]
125
+ end
126
+ actual_length = object[:raw_stream_content].length
127
+ warn "Stream registeded length was #{object[:Length].to_s} and the actual length was #{actual_length}." if actual_length < stream_length
128
+ length = [ stream_length, actual_length].min
129
+ object[:raw_stream_content] = decrypt_proc.call( (object[:raw_stream_content][0...length]), encrypted_id, encrypted_generation, encrypted_filter)
130
+ end
131
+ object.each {|k, v| object[k] = _perform_decrypt_proc_(v, decrypt_proc, encrypted_id, encrypted_generation, encrypted_filter) if k != :raw_stream_content && (v.is_a?(Hash) || v.is_a?(Array) || v.is_a?(String))} # assumes no decrypting is ever performed on keys
132
+ when object.is_a?(String)
133
+ return decrypt_proc.call(object, encrypted_id, encrypted_generation, encrypted_filter)
134
+ else
135
+ return object
136
+ end
137
+
138
+ end
139
+
140
+ def raise_encrypted_error object = nil
141
+ object ||= @encryption_dictionary.to_s.split(',').join("\n")
142
+ warn "Data raising exception:\n #{object.to_s.split(',').join("\n")}"
143
+ raise "File is encrypted - not supported."
144
+ end
145
+ end
146
+ #####################################################
147
+ ## The following isn't my code!!!!
148
+ ## It is subject to a different license and copyright.
149
+ ## This was the code for the RC4 Gem,
150
+ ## ... I had a bad internet connection so I ended up
151
+ ## copying it from the web page I had in my cache.
152
+ ## This wonderful work was done by Caige Nichols.
153
+ #####################################################
154
+
155
+ class RC4
156
+ def initialize(str)
157
+ begin
158
+ raise SyntaxError, "RC4: Key supplied is blank" if str.eql?('')
159
+
160
+ @q1, @q2 = 0, 0
161
+ @key = []
162
+ str.each_byte { |elem| @key << elem } while @key.size < 256
163
+ @key.slice!(256..@key.size-1) if @key.size >= 256
164
+ @s = (0..255).to_a
165
+ j = 0
166
+ 0.upto(255) do |i|
167
+ j = (j + @s[i] + @key[i] ) % 256
168
+ @s[i], @s[j] = @s[j], @s[i]
169
+ end
170
+ end
171
+ end
172
+
173
+ def encrypt!(text)
174
+ process text
175
+ end
176
+
177
+ def encrypt(text)
178
+ process text.dup
179
+ end
180
+
181
+ alias_method :decrypt, :encrypt
182
+
183
+ private
184
+
185
+ def process(text)
186
+ text.unpack("C*").map { |c| c ^ round }.pack("C*")
187
+ end
188
+
189
+ def round
190
+ @q1 = (@q1 + 1) % 256
191
+ @q2 = (@q2 + @s[@q1]) % 256
192
+ @s[@q1], @s[@q2] = @s[@q2], @s[@q1]
193
+ @s[(@s[@q1]+@s[@q2]) % 256]
194
+ end
195
+ end
196
+
197
+ end
198
+
@@ -0,0 +1,72 @@
1
+ # -*- encoding : utf-8 -*-
2
+ ########################################################
3
+ ## Thoughts from reading the ISO 32000-1:2008
4
+ ## this file is part of the CombinePDF library and the code
5
+ ## is subject to the same license.
6
+ ########################################################
7
+
8
+ module CombinePDF
9
+
10
+ module PDFFilter
11
+ module_function
12
+
13
+ def deflate_object object = nil
14
+ false
15
+ end
16
+
17
+ def inflate_object object = nil, filter = :none
18
+ filter_array = object[:Filter]
19
+ if filter_array.is_a?(Hash) && filter_array[:is_reference_only]
20
+ filter_array = filter_array[:referenced_object]
21
+ end
22
+ if filter_array.is_a?(Symbol)
23
+ filter_array = [filter_array]
24
+ end
25
+ filter_array = [] if filter_array.nil?
26
+ params_array = object[:DecodeParms]
27
+ if params_array.is_a?(Hash) && params_array[:is_reference_only]
28
+ params_array = params_array[:referenced_object]
29
+ end
30
+ unless params_array.is_a?(Array)
31
+ params_array = [params_array]
32
+ end
33
+ while filter_array[0]
34
+ case filter_array[0]
35
+ when :FlateDecode
36
+ raise_unsupported_error params_array[0] unless params_array[0].nil?
37
+ if params_array[0] && params_array[0][:Predictor].to_i > 1
38
+ bits = params_array[0][:BitsPerComponent] || 8
39
+ predictor = params_array[0][:Predictor].to_i
40
+ columns = params_array[0][:Columns] || 1
41
+ if (2..9).include? params_array[0][:Predictor].to_i
42
+ ####
43
+ # prepare TIFF group
44
+ elsif (10..15).include? params_array[0][:Predictor].to_i == 2
45
+ ####
46
+ # prepare PNG group
47
+ end
48
+ else
49
+ object[:raw_stream_content] = Zlib::Inflate.inflate object[:raw_stream_content]
50
+ object[:Length] = object[:raw_stream_content].bytesize
51
+ end
52
+ when nil
53
+ true
54
+ else
55
+ return false
56
+ end
57
+ params_array.shift
58
+ filter_array.shift
59
+ end
60
+ object.delete(:Filter)
61
+ true
62
+ end
63
+ def raise_unsupported_error (object = {})
64
+ raise "Filter #{object} unsupported. couldn't deflate object"
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+
71
+
72
+
@@ -0,0 +1,315 @@
1
+ # -*- encoding : utf-8 -*-
2
+ ########################################################
3
+ ## Thoughts from reading the ISO 32000-1:2008
4
+ ## this file is part of the CombinePDF library and the code
5
+ ## is subject to the same license.
6
+ ########################################################
7
+ module CombinePDF
8
+
9
+ ########################################################
10
+ ## This is the Parser class.
11
+ ## It takes PDF data and parses it, returning an array
12
+ ## of data.
13
+ ## That array can be used to initialize a PDF object.
14
+ ## The Parser class doesn't involve itself with the
15
+ ## file version.
16
+ ########################################################
17
+
18
+ class PDFParser
19
+ # LITERAL_STRING_REPLACEMENT_HASH = {
20
+ # 110 => 10, # "\\n".bytes = [92, 110] "\n".ord = 10
21
+ # 114 => 13, #r
22
+ # 116 => 9, #t
23
+ # 98 => 8, #b
24
+ # 102 => 255, #f
25
+ # 40 => 40, #(
26
+ # 41 => 41, #)
27
+ # 92 => 92 #\
28
+ # }
29
+ attr_reader :parsed, :version, :info_object, :root_object
30
+ def initialize (string)
31
+ raise TypeError, "couldn't parse and data, expecting type String" unless string.is_a? String
32
+ @string_to_parse = string.force_encoding(Encoding::ASCII_8BIT)
33
+ @literal_strings = []
34
+ @hex_strings = []
35
+ @streams = []
36
+ @parsed = []
37
+ @root_object = {}
38
+ @info_object = {}
39
+ @version = nil
40
+ @scanner = nil
41
+ end
42
+
43
+ def parse
44
+ return @parsed unless @parsed.empty?
45
+ @scanner = StringScanner.new @string_to_parse
46
+ @scanner.pos = 0
47
+ if @scanner.scan /\%PDF\-[\d\-\.]+/
48
+ @version = @scanner.matched.scan(/[\d\.]+/)[0].to_f
49
+ end
50
+
51
+ warn "Starting to parse PDF data."
52
+ @parsed = _parse_
53
+
54
+ if @root_object == {}
55
+ xref_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :XRef}
56
+ xref_streams.each do |xref_dictionary|
57
+ @root_object.merge! xref_dictionary
58
+ end
59
+ end
60
+ raise "root is unknown - cannot determine if file is Encrypted" if @root_object == {}
61
+ warn "Injecting actual values into root object: #{@root_object}."
62
+ PDFOperations.change_references_to_actual_values @parsed, @root_object
63
+
64
+ if @root_object[:Encrypt]
65
+ warn "PDF is Encrypted! Attempting to unencrypt - not yet fully supported."
66
+ decryptor = PDFDecrypt.new @parsed, @root_object
67
+ decryptor.decrypt
68
+ #do we really need to apply to @parsed? No, there is no need.
69
+ end
70
+ if @version >= 1.5 # code placement for object streams
71
+ ## search for objects streams
72
+ object_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :ObjStm}
73
+ unless object_streams.empty?
74
+ warn "PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects."
75
+
76
+ object_streams.each do |o|
77
+ warn "Attempting #{o.select {|k,v| k != :raw_stream_content}}"
78
+ ## un-encode (using the correct filter) the object streams
79
+ PDFFilter.inflate_object o
80
+ ## extract objects from stream to top level arry @parsed
81
+ @scanner = StringScanner.new o[:raw_stream_content]
82
+ stream_data = _parse_
83
+ id_array = []
84
+ while stream_data[0].is_a? Fixnum
85
+ id_array << stream_data.shift
86
+ stream_data.shift
87
+ end
88
+ while stream_data[0].is_a? Hash
89
+ stream_data[0][:indirect_reference_id] = id_array.shift
90
+ stream_data[0][:indirect_generation_number] = 0
91
+ @parsed << stream_data.shift
92
+ end
93
+ end
94
+ # ## remove object streams
95
+ @parsed.reject! {|obj| object_streams << obj if obj.is_a?(Hash) && obj[:Type] == :ObjStm}
96
+ # ## remove XREF dictionaries
97
+ @parsed.reject! {|obj| object_streams << obj if obj.is_a?(Hash) && obj[:Type] == :XRef}
98
+ end
99
+ end
100
+ PDFOperations.change_references_to_actual_values @parsed, @root_object
101
+ @info_object = @root_object[:Info]
102
+ if @info_object && @info_object.is_a?(Hash)
103
+ @parsed.delete @info_object
104
+ PDFOperations.change_references_to_actual_values @parsed, @info_object
105
+ PRIVATE_HASH_KEYS.each {|key| @info_object.delete key}
106
+ else
107
+ @info_object = {}
108
+ end
109
+ warn "setting parsed collection and returning collection."
110
+ @parsed
111
+ end
112
+
113
+ protected
114
+
115
+ def _parse_
116
+ out = []
117
+ str = ''
118
+ # warn "Scaning for objects, starting at #{@scanner.pos}: #{@scanner.peek(10)}"
119
+ while @scanner.rest? do
120
+ case
121
+ ##########################################
122
+ ## parse an Array
123
+ ##########################################
124
+ when @scanner.scan(/\[/)
125
+ out << _parse_
126
+ ##########################################
127
+ ## parse a Dictionary
128
+ ##########################################
129
+ when @scanner.scan(/<</)
130
+ data = _parse_
131
+ obj = {}
132
+ obj[data.shift] = data.shift while data[0]
133
+ out << obj
134
+ ##########################################
135
+ ## return content of array or dictionary
136
+ ##########################################
137
+ when @scanner.scan(/\]/), @scanner.scan(/>>/)
138
+ return out
139
+ ##########################################
140
+ ## parse a Stream
141
+ ##########################################
142
+ when @scanner.scan(/stream[\r]?[\n]/)
143
+ str = @scanner.scan_until(/endstream/)
144
+ # need to remove end of stream
145
+ if out.last.is_a? Hash
146
+ out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
147
+ # out.last[:raw_stream_content] = str.gsub(/[\n\r]?[\n\r]?endstream/, "")
148
+ else
149
+ warn "Stream not attached to dictionary!"
150
+ out << str[0...-10].force_encoding(Encoding::ASCII_8BIT)
151
+ end
152
+ ##########################################
153
+ ## parse an Object after finished
154
+ ##########################################
155
+ when str = @scanner.scan(/endobj/)
156
+ # warn "Proccessing Object"
157
+ #what to do when this is an object?
158
+ if out.last.is_a? Hash
159
+ out << out.pop.merge({indirect_generation_number: out.pop, indirect_reference_id: out.pop})
160
+ else
161
+ out << {indirect_without_dictionary: out.pop, indirect_generation_number: out.pop, indirect_reference_id: out.pop}
162
+ end
163
+ ##########################################
164
+ ## parse a Hex String
165
+ ##########################################
166
+ when str = @scanner.scan(/<[0-9a-f]+>/)
167
+ # warn "Found a hex string"
168
+ out << [str[1..-2]].pack('H*')
169
+ ##########################################
170
+ ## parse a Literal String
171
+ ##########################################
172
+ when @scanner.scan(/\(/)
173
+ # warn "Found a literal string"
174
+ str = ''
175
+ count = 1
176
+ while count > 0 && @scanner.rest? do
177
+ str += @scanner.scan_until(/[\(\)]/).to_s
178
+ seperator_count = 0
179
+ seperator_count += 1 while str[-2-seperator_count] == "\\"
180
+
181
+ case str[-1]
182
+ when '('
183
+ ## The following solution fails when (string ends with this sign: \\)
184
+
185
+ count += 1 unless seperator_count.odd?
186
+ when ')'
187
+ count -= 1 unless seperator_count.odd?
188
+ else
189
+ warn "Unknown error parsing string at #{@scanner.pos}!"
190
+ cout = 0 # error
191
+ end
192
+ end
193
+ # The PDF formatted string is: str[0..-2]
194
+ # now staring to convert to regular string
195
+ str_bytes = str[0..-2].bytes
196
+ str = []
197
+ until str_bytes.empty?
198
+ case str_bytes[0]
199
+ when 13 # eol - \r
200
+ # An end-of-line marker appearing within a literal string without a preceding REVERSE SOLIDUS
201
+ # shall be treated as a byte value of (0Ah),
202
+ # irrespective of whether the end-of-line marker was a CARRIAGE RETURN (0Dh), a LINE FEED (0Ah), or both.
203
+ str_bytes.shift
204
+ str_bytes.shift if str_bytes[0] == 10
205
+ str << 10
206
+ when 10 # eol - \n
207
+ # An end-of-line marker appearing within a literal string without a preceding REVERSE SOLIDUS
208
+ # shall be treated as a byte value of (0Ah),
209
+ # irrespective of whether the end-of-line marker was a CARRIAGE RETURN (0Dh), a LINE FEED (0Ah), or both.
210
+ str_bytes.shift
211
+ str_bytes.shift if str_bytes[0] == 13
212
+ str << 10
213
+ when 92 # "\\".ord == 92
214
+ str_bytes.shift
215
+ rep = str_bytes.shift
216
+ case rep
217
+ when 110 #n
218
+ str << 10 #new line
219
+ when 114 #r
220
+ str << 13 # CR
221
+ when 116 #t
222
+ str << 9 #tab
223
+ when 98 #b
224
+ str << 8
225
+ when 102 #f
226
+ str << 255
227
+ when 48..57 #decimal notation for byte?
228
+ rep = rep.chr
229
+ rep += str_bytes.shift.chr if str_bytes[0].between?(48,57)
230
+ rep += str_bytes.shift.chr if str_bytes[0].between?(48,57) && ((rep + str_bytes[0].chr).to_i <= 255)
231
+ str << rep.to_i
232
+ when 10 # new line, ignore
233
+ str_bytes.shift if str_bytes[0] == 13
234
+ true
235
+ when 13 # new line (or double notation for new line), ignore
236
+ str_bytes.shift if str_bytes[0] == 10
237
+ true
238
+ else
239
+ str << rep
240
+ end
241
+ else
242
+ str << str_bytes.shift
243
+ end
244
+ end
245
+ out << str.pack('C*')
246
+ ##########################################
247
+ ## Parse a comment
248
+ ##########################################
249
+ when str = @scanner.scan(/\%/)
250
+ #is a comment, skip until new line
251
+ @scanner.skip_until /[\n\r]+/
252
+ ##########################################
253
+ ## Parse a Name
254
+ ##########################################
255
+ # old, probably working version: when str = @scanner.scan(/\/[\#\w\d\.\+\-\\\?\,]+/)
256
+ # I don't know how to write the /[\x21-\x7e___subtract_certain_hex_values_here____]+/
257
+ # all allowed regular caracters between ! and ~ : /[\x21-\x24\x26\x27\x2a-\x2e\x30-\x3b\x3d\x3f-\x5a\x5c\x5e-\x7a\x7c\x7e]+
258
+ # all characters that aren't white space or special: /[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+
259
+ when str = @scanner.scan(/\/[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+/)
260
+ out << ( str[1..-1].gsub(/\#[0-9a-fA-F]{2}/) {|a| a[1..2].hex.chr } ).to_sym
261
+ ##########################################
262
+ ## Parse a Number
263
+ ##########################################
264
+ when str = @scanner.scan(/[\+\-\.\d]+/)
265
+ str.match(/\./) ? (out << str.to_f) : (out << str.to_i)
266
+ ##########################################
267
+ ## Parse an Object Reference
268
+ ##########################################
269
+ when @scanner.scan(/R/)
270
+ out << { is_reference_only: true, indirect_generation_number: out.pop, indirect_reference_id: out.pop}
271
+ ##########################################
272
+ ## Parse Bool - true and after false
273
+ ##########################################
274
+ when @scanner.scan(/true/)
275
+ out << true
276
+ when @scanner.scan(/false/)
277
+ out << false
278
+ ##########################################
279
+ ## Parse NULL - null
280
+ ##########################################
281
+ when @scanner.scan(/null/)
282
+ out << nil
283
+ ##########################################
284
+ ## XREF - check for encryption... anything else?
285
+ ##########################################
286
+ when @scanner.scan(/xref/)
287
+ ##########
288
+ ## get root object to check for encryption
289
+ @scanner.scan_until(/(trailer)|(\%EOF)/)
290
+
291
+ if @scanner.matched[-1] == 'r'
292
+ if @scanner.skip_until(/<</)
293
+ data = _parse_
294
+ @root_object = {}
295
+ @root_object[data.shift] = data.shift while data[0]
296
+ end
297
+ ##########
298
+ ## skip untill end of segment, maked by %%EOF
299
+ @scanner.skip_until(/\%\%EOF/)
300
+ end
301
+
302
+ when @scanner.scan(/[\s]+/) , @scanner.scan(/obj[\s]*/)
303
+ # do nothing
304
+ # warn "White Space, do nothing"
305
+ nil
306
+ else
307
+ # always advance
308
+ # warn "Advnacing for unknown reason..."
309
+ @scanner.pos = @scanner.pos + 1
310
+ end
311
+ end
312
+ out
313
+ end
314
+ end
315
+ end