combine_pdf 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ # -*- encoding : utf-8 -*-
2
+ ########################################################
3
+ ## Thoughts from reading the ISO 32000-1:2008
4
+ ## this file is part of the CombinePDF library and the code
5
+ ## is subject to the same license.
6
+ ########################################################
7
+
8
+ module CombinePDF
9
+
10
+ class PDFWriter
11
+
12
+ def initialize(media_box = [0.0, 0.0, 612.0, 792.0])
13
+ @content_stream = {}
14
+ @media_box = media_box
15
+ end
16
+
17
+ ########################################################
18
+ ## textbox
19
+ ## - font_name: :font_name
20
+ ## The PostScript names of 14 Type 1 fonts, known as the standard 14 fonts, are as follows:
21
+ ## Times-Roman, Helvetica, Courier, Symbol, Times-Bold, Helvetica-Bold, Courier-Bold, ZapfDingbats, Times-Italic, Helvetica- Oblique, Courier-Oblique, Times-BoldItalic, Helvetica-BoldOblique, Courier-BoldOblique
22
+ ## - text_color: [R, G, B]
23
+ ## an array with three floats, each in a value between 0 to 1.
24
+ ## First value is Red, second Green and last is Blue (RGB color system)
25
+ def add_text_box(text, args = {})
26
+ options = {
27
+ text_alignment: :center,
28
+ text_color: [1,1,1],
29
+ # text_stroke: nil,
30
+ font_name: :Helvetica,
31
+ font_type: :Type1,
32
+ font_object: nil,
33
+ font_size: 12,
34
+ border_color: nil,
35
+ border_width: nil,
36
+ border_radius: nil,
37
+ background_color: nil,
38
+ opacity: 1,
39
+ x: 0,
40
+ y: 0,
41
+ length: -1,
42
+ height: -1,
43
+ }
44
+ # create font object
45
+ font_object = { Type: :Font, Subtype: options[:font_type], BaseFont: options[:font_name]}
46
+ if options[:font_object].is_a?(Hash) && options[:font_object][:indirect_reference_id] && options[:font_object][:indirect_generation_number] && (options[:font_object][:is_reference_only] != true)
47
+ font_object = {is_reference_only: true, referenced_object: font_object}
48
+ end
49
+
50
+ # create resources object
51
+ font_name = ("MyFont" + rand(99) ).to_sym
52
+ resources_object = {Resources: {Font: { font_name => font_object } } }
53
+ # create box stream
54
+
55
+ # reset x,y by text alignment - x,y are calculated from the buttom left
56
+ # each unit (1) is 1/72 Inch
57
+ x = options[:x]
58
+ y = options[:y]
59
+ # create text stream
60
+ text_stream = ""
61
+ text_stream << "BT\n" # the Begine Text marker
62
+ text_stream << PDFOperations._format_name_to_pdf(font_name) # Set font name
63
+ text_stream << " #{options[:font_size].to_f} Tf\n" # set font size and add font operator
64
+ text_stream << "#{options[:text_color][0]} #{options[:text_color][0]} #{options[:text_color][0]} rg\n" # sets the color state
65
+ text_stream << " #{options[:opacity].to_f} ca\n" # set opacity (alpha) for graphic state.
66
+ text_stream << "#{x} #{y} Td\n" # set location for text object
67
+ text_stream << PDFOperations._format_string_to_pdf(text) # insert the string in PDF format
68
+ text_stream << " Tj\n ET\n" # the Text object operator and the End Text marker
69
+ end
70
+
71
+ ########################################################
72
+ ## add_content_to_pages(pages = [], location = :above)
73
+ ## pages - a page hash or an array of pages
74
+ ## location - :above to place content over existing content or :below to place content under existing content
75
+ def add_content_to_pages(pages = [], location = :above)
76
+ if pages.is_a?(Array)
77
+ pages.each {|p| add_content_to_pages p, location}
78
+ elsif pages.is_a?(Hash)
79
+ #####
80
+ ##add content stream to page
81
+ end
82
+ end
83
+ ########################################################
84
+ ## make_into_page()
85
+ ## takes no arguments and returns the contents stream within a page (to be added as an indipendent page to the PDF object)
86
+ def make_into_page
87
+ {Type: :Page, }
88
+ end
89
+
90
+ ########################################################
91
+ ## to_pdf()
92
+ ## prints out the content stream as raw PDF
93
+ ## file_name - the name of the file to which to save the data (will be overwritten).
94
+ ## if file_name is given, save to file.
95
+ def to_pdf( file_name = nil)
96
+ pdf = PDF.new
97
+ pdf << make_into_page
98
+ if file_name
99
+ pdf.save file_name
100
+ else
101
+ pdf.to_pdf
102
+ end
103
+ end
104
+
105
+ end
106
+
107
+ end
108
+
109
+
110
+
@@ -0,0 +1,198 @@
1
+ # -*- encoding : utf-8 -*-
2
+ ########################################################
3
+ ## Thoughts from reading the ISO 32000-1:2008
4
+ ## this file is part of the CombinePDF library and the code
5
+ ## is subject to the same license.
6
+ ########################################################
7
+
8
+ module CombinePDF
9
+ class PDFDecrypt
10
+
11
+ def initialize objects=[], root_doctionary = {}
12
+ @objects = objects
13
+ @encryption_dictionary = root_doctionary[:Encrypt]
14
+ raise "Cannot decrypt an encrypted file without an encryption dictionary!" unless @encryption_dictionary
15
+ @root_doctionary = root_doctionary
16
+ @padding_key = [ 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41,
17
+ 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
18
+ 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80,
19
+ 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A ]
20
+ @key_crypt_first_iv_store = nil
21
+ @encryption_iv = nil
22
+ PDFOperations.change_references_to_actual_values @objects, @encryption_dictionary
23
+ end
24
+ def set_general_key(password = "")
25
+ # 1) make sure the initial key is 32 byte long (if no password, uses padding).
26
+ key = (password.bytes[0..32] + @padding_key)[0..31].pack('C*').force_encoding(Encoding::ASCII_8BIT)
27
+ # 2) add the value of the encryption dictionary’s O entry
28
+ key << @encryption_dictionary[:O].to_s
29
+ # 3) Convert the integer value of the P entry to a 32-bit unsigned binary number
30
+ # and pass these bytes low-order byte first
31
+ key << [@encryption_dictionary[:P]].pack('i')
32
+ # 4) Pass the first element of the file’s file identifier array
33
+ # (the value of the ID entry in the document’s trailer dictionary
34
+ key << @root_doctionary[:ID][0]
35
+ # # 4(a) (Security handlers of revision 4 or greater)
36
+ # # if document metadata is not being encrypted, add 4 bytes with the value 0xFFFFFFFF.
37
+ if @encryption_dictionary[:R] >= 4
38
+ unless @encryption_dictionary[:EncryptMetadata] == false #default is true and nil != false
39
+ key << "\x00\x00\x00\x00"
40
+ else
41
+ key << "\xFF\xFF\xFF\xFF"
42
+ end
43
+ end
44
+ # 5) pass everything as a MD5 hash
45
+ key = Digest::MD5.digest(key)
46
+ # 5(a) h) (Security handlers of revision 3 or greater) Do the following 50 times:
47
+ # Take the output from the previous MD5 hash and
48
+ # pass the first n bytes of the output as input into a new MD5 hash,
49
+ # where n is the number of bytes of the encryption key as defined by the value of
50
+ # the encryption dictionary’s Length entry.
51
+ if @encryption_dictionary[:R] >= 3
52
+ 50.times do|i|
53
+ key = Digest::MD5.digest(key[0...@encryption_dictionary[:Length]])
54
+ end
55
+ end
56
+ # 6) Set the encryption key to the first n bytes of the output from the final MD5 hash,
57
+ # where n shall always be 5 for security handlers of revision 2 but,
58
+ # for security handlers of revision 3 or greater,
59
+ # shall depend on the value of the encryption dictionary’s Length entry.
60
+ if @encryption_dictionary[:R] >= 3
61
+ @key = key[0..(@encryption_dictionary[:Length]/8)]
62
+ else
63
+ @key = key[0..4]
64
+ end
65
+ @key
66
+ end
67
+ def decrypt
68
+ raise_encrypted_error @encryption_dictionary unless @encryption_dictionary[:Filter] == :Standard
69
+ @key = set_general_key
70
+ case @encryption_dictionary[:V]
71
+ when 1,2
72
+ warn "trying to decrypt with RC4."
73
+ # raise_encrypted_error
74
+ _perform_decrypt_proc_ @objects, self.method(:decrypt_RC4)
75
+ else
76
+ raise_encrypted_error
77
+ end
78
+ #rebuild stream lengths?
79
+ @objects
80
+ end
81
+ def decrypt_none(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
82
+ "encrypted"
83
+ end
84
+ def decrypt_RC4(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
85
+ ## start decryption using padding strings
86
+ object_key = @key.dup
87
+ object_key << [encrypted_id].pack('i')[0..2]
88
+ object_key << [encrypted_generation].pack('i')[0..1]
89
+ # (0..2).each { |e| object_key << (encrypted_id >> e*8 & 0xFF ) }
90
+ # (0..1).each { |e| object_key << (encrypted_generation >> e*8 & 0xFF ) }
91
+ key_length = object_key.length < 16 ? object_key.length : 16
92
+ rc4 = RC4.new( Digest::MD5.digest(object_key)[(0...key_length)] )
93
+ rc4.decrypt(encrypted)
94
+ end
95
+ def decrypt_AES(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
96
+ ## extract encryption_iv if it wasn't extracted yet
97
+ unless @encryption_iv
98
+ @encryption_iv = encrypted[0..15].to_i
99
+ #raise "Tryed decrypting using AES and couldn't extract iv" if @encryption_iv == 0
100
+ @encryption_iv = 0.chr * 16
101
+ #encrypted = encrypted[16..-1]
102
+ end
103
+ ## start decryption using padding strings
104
+ object_key = @key.dup
105
+ (0..2).each { |e| object_key << (encrypted_id >> e*8 & 0xFF ) }
106
+ (0..1).each { |e| object_key << (encrypted_generation >> e*8 & 0xFF ) }
107
+ object_key << "sAlT"
108
+ key_length = object_key.length < 16 ? object_key.length : 16
109
+ cipher = OpenSSL::Cipher::Cipher.new("aes-#{object_key.length << 3}-cbc").decrypt
110
+ cipher.padding = 0
111
+ (cipher.update(encrypted) + cipher.final).unpack("C*")
112
+ end
113
+ def _perform_decrypt_proc_ (object, decrypt_proc, encrypted_id = nil, encrypted_generation = nil, encrypted_filter = nil)
114
+ case
115
+ when object.is_a?(Array)
116
+ object.map! { |item| _perform_decrypt_proc_(item, decrypt_proc, encrypted_id, encrypted_generation, encrypted_filter) }
117
+ when object.is_a?(Hash)
118
+ encrypted_id ||= object[:indirect_reference_id]
119
+ encrypted_generation ||= object[:indirect_generation_number]
120
+ encrypted_filter ||= object[:Filter]
121
+ if object[:raw_stream_content]
122
+ stream_length = object[:Length]
123
+ if stream_length.is_a?(Hash) && stream_length[:is_reference_only]
124
+ stream_length = PDFOperations.get_refernced_object( @objects, stream_length)[:indirect_without_dictionary]
125
+ end
126
+ actual_length = object[:raw_stream_content].length
127
+ warn "Stream registeded length was #{object[:Length].to_s} and the actual length was #{actual_length}." if actual_length < stream_length
128
+ length = [ stream_length, actual_length].min
129
+ object[:raw_stream_content] = decrypt_proc.call( (object[:raw_stream_content][0...length]), encrypted_id, encrypted_generation, encrypted_filter)
130
+ end
131
+ object.each {|k, v| object[k] = _perform_decrypt_proc_(v, decrypt_proc, encrypted_id, encrypted_generation, encrypted_filter) if k != :raw_stream_content && (v.is_a?(Hash) || v.is_a?(Array) || v.is_a?(String))} # assumes no decrypting is ever performed on keys
132
+ when object.is_a?(String)
133
+ return decrypt_proc.call(object, encrypted_id, encrypted_generation, encrypted_filter)
134
+ else
135
+ return object
136
+ end
137
+
138
+ end
139
+
140
+ def raise_encrypted_error object = nil
141
+ object ||= @encryption_dictionary.to_s.split(',').join("\n")
142
+ warn "Data raising exception:\n #{object.to_s.split(',').join("\n")}"
143
+ raise "File is encrypted - not supported."
144
+ end
145
+ end
146
+ #####################################################
147
+ ## The following isn't my code!!!!
148
+ ## It is subject to a different license and copyright.
149
+ ## This was the code for the RC4 Gem,
150
+ ## ... I had a bad internet connection so I ended up
151
+ ## copying it from the web page I had in my cache.
152
+ ## This wonderful work was done by Caige Nichols.
153
+ #####################################################
154
+
155
+ class RC4
156
+ def initialize(str)
157
+ begin
158
+ raise SyntaxError, "RC4: Key supplied is blank" if str.eql?('')
159
+
160
+ @q1, @q2 = 0, 0
161
+ @key = []
162
+ str.each_byte { |elem| @key << elem } while @key.size < 256
163
+ @key.slice!(256..@key.size-1) if @key.size >= 256
164
+ @s = (0..255).to_a
165
+ j = 0
166
+ 0.upto(255) do |i|
167
+ j = (j + @s[i] + @key[i] ) % 256
168
+ @s[i], @s[j] = @s[j], @s[i]
169
+ end
170
+ end
171
+ end
172
+
173
+ def encrypt!(text)
174
+ process text
175
+ end
176
+
177
+ def encrypt(text)
178
+ process text.dup
179
+ end
180
+
181
+ alias_method :decrypt, :encrypt
182
+
183
+ private
184
+
185
+ def process(text)
186
+ text.unpack("C*").map { |c| c ^ round }.pack("C*")
187
+ end
188
+
189
+ def round
190
+ @q1 = (@q1 + 1) % 256
191
+ @q2 = (@q2 + @s[@q1]) % 256
192
+ @s[@q1], @s[@q2] = @s[@q2], @s[@q1]
193
+ @s[(@s[@q1]+@s[@q2]) % 256]
194
+ end
195
+ end
196
+
197
+ end
198
+
@@ -0,0 +1,72 @@
1
+ # -*- encoding : utf-8 -*-
2
+ ########################################################
3
+ ## Thoughts from reading the ISO 32000-1:2008
4
+ ## this file is part of the CombinePDF library and the code
5
+ ## is subject to the same license.
6
+ ########################################################
7
+
8
+ module CombinePDF
9
+
10
+ module PDFFilter
11
+ module_function
12
+
13
+ def deflate_object object = nil
14
+ false
15
+ end
16
+
17
+ def inflate_object object = nil, filter = :none
18
+ filter_array = object[:Filter]
19
+ if filter_array.is_a?(Hash) && filter_array[:is_reference_only]
20
+ filter_array = filter_array[:referenced_object]
21
+ end
22
+ if filter_array.is_a?(Symbol)
23
+ filter_array = [filter_array]
24
+ end
25
+ filter_array = [] if filter_array.nil?
26
+ params_array = object[:DecodeParms]
27
+ if params_array.is_a?(Hash) && params_array[:is_reference_only]
28
+ params_array = params_array[:referenced_object]
29
+ end
30
+ unless params_array.is_a?(Array)
31
+ params_array = [params_array]
32
+ end
33
+ while filter_array[0]
34
+ case filter_array[0]
35
+ when :FlateDecode
36
+ raise_unsupported_error params_array[0] unless params_array[0].nil?
37
+ if params_array[0] && params_array[0][:Predictor].to_i > 1
38
+ bits = params_array[0][:BitsPerComponent] || 8
39
+ predictor = params_array[0][:Predictor].to_i
40
+ columns = params_array[0][:Columns] || 1
41
+ if (2..9).include? params_array[0][:Predictor].to_i
42
+ ####
43
+ # prepare TIFF group
44
+ elsif (10..15).include? params_array[0][:Predictor].to_i == 2
45
+ ####
46
+ # prepare PNG group
47
+ end
48
+ else
49
+ object[:raw_stream_content] = Zlib::Inflate.inflate object[:raw_stream_content]
50
+ object[:Length] = object[:raw_stream_content].bytesize
51
+ end
52
+ when nil
53
+ true
54
+ else
55
+ return false
56
+ end
57
+ params_array.shift
58
+ filter_array.shift
59
+ end
60
+ object.delete(:Filter)
61
+ true
62
+ end
63
+ def raise_unsupported_error (object = {})
64
+ raise "Filter #{object} unsupported. couldn't deflate object"
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+
71
+
72
+
@@ -0,0 +1,315 @@
1
+ # -*- encoding : utf-8 -*-
2
+ ########################################################
3
+ ## Thoughts from reading the ISO 32000-1:2008
4
+ ## this file is part of the CombinePDF library and the code
5
+ ## is subject to the same license.
6
+ ########################################################
7
+ module CombinePDF
8
+
9
+ ########################################################
10
+ ## This is the Parser class.
11
+ ## It takes PDF data and parses it, returning an array
12
+ ## of data.
13
+ ## That array can be used to initialize a PDF object.
14
+ ## The Parser class doesn't involve itself with the
15
+ ## file version.
16
+ ########################################################
17
+
18
+ class PDFParser
19
+ # LITERAL_STRING_REPLACEMENT_HASH = {
20
+ # 110 => 10, # "\\n".bytes = [92, 110] "\n".ord = 10
21
+ # 114 => 13, #r
22
+ # 116 => 9, #t
23
+ # 98 => 8, #b
24
+ # 102 => 255, #f
25
+ # 40 => 40, #(
26
+ # 41 => 41, #)
27
+ # 92 => 92 #\
28
+ # }
29
+ attr_reader :parsed, :version, :info_object, :root_object
30
+ def initialize (string)
31
+ raise TypeError, "couldn't parse and data, expecting type String" unless string.is_a? String
32
+ @string_to_parse = string.force_encoding(Encoding::ASCII_8BIT)
33
+ @literal_strings = []
34
+ @hex_strings = []
35
+ @streams = []
36
+ @parsed = []
37
+ @root_object = {}
38
+ @info_object = {}
39
+ @version = nil
40
+ @scanner = nil
41
+ end
42
+
43
+ def parse
44
+ return @parsed unless @parsed.empty?
45
+ @scanner = StringScanner.new @string_to_parse
46
+ @scanner.pos = 0
47
+ if @scanner.scan /\%PDF\-[\d\-\.]+/
48
+ @version = @scanner.matched.scan(/[\d\.]+/)[0].to_f
49
+ end
50
+
51
+ warn "Starting to parse PDF data."
52
+ @parsed = _parse_
53
+
54
+ if @root_object == {}
55
+ xref_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :XRef}
56
+ xref_streams.each do |xref_dictionary|
57
+ @root_object.merge! xref_dictionary
58
+ end
59
+ end
60
+ raise "root is unknown - cannot determine if file is Encrypted" if @root_object == {}
61
+ warn "Injecting actual values into root object: #{@root_object}."
62
+ PDFOperations.change_references_to_actual_values @parsed, @root_object
63
+
64
+ if @root_object[:Encrypt]
65
+ warn "PDF is Encrypted! Attempting to unencrypt - not yet fully supported."
66
+ decryptor = PDFDecrypt.new @parsed, @root_object
67
+ decryptor.decrypt
68
+ #do we really need to apply to @parsed? No, there is no need.
69
+ end
70
+ if @version >= 1.5 # code placement for object streams
71
+ ## search for objects streams
72
+ object_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :ObjStm}
73
+ unless object_streams.empty?
74
+ warn "PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects."
75
+
76
+ object_streams.each do |o|
77
+ warn "Attempting #{o.select {|k,v| k != :raw_stream_content}}"
78
+ ## un-encode (using the correct filter) the object streams
79
+ PDFFilter.inflate_object o
80
+ ## extract objects from stream to top level arry @parsed
81
+ @scanner = StringScanner.new o[:raw_stream_content]
82
+ stream_data = _parse_
83
+ id_array = []
84
+ while stream_data[0].is_a? Fixnum
85
+ id_array << stream_data.shift
86
+ stream_data.shift
87
+ end
88
+ while stream_data[0].is_a? Hash
89
+ stream_data[0][:indirect_reference_id] = id_array.shift
90
+ stream_data[0][:indirect_generation_number] = 0
91
+ @parsed << stream_data.shift
92
+ end
93
+ end
94
+ # ## remove object streams
95
+ @parsed.reject! {|obj| object_streams << obj if obj.is_a?(Hash) && obj[:Type] == :ObjStm}
96
+ # ## remove XREF dictionaries
97
+ @parsed.reject! {|obj| object_streams << obj if obj.is_a?(Hash) && obj[:Type] == :XRef}
98
+ end
99
+ end
100
+ PDFOperations.change_references_to_actual_values @parsed, @root_object
101
+ @info_object = @root_object[:Info]
102
+ if @info_object && @info_object.is_a?(Hash)
103
+ @parsed.delete @info_object
104
+ PDFOperations.change_references_to_actual_values @parsed, @info_object
105
+ PRIVATE_HASH_KEYS.each {|key| @info_object.delete key}
106
+ else
107
+ @info_object = {}
108
+ end
109
+ warn "setting parsed collection and returning collection."
110
+ @parsed
111
+ end
112
+
113
+ protected
114
+
115
+ def _parse_
116
+ out = []
117
+ str = ''
118
+ # warn "Scaning for objects, starting at #{@scanner.pos}: #{@scanner.peek(10)}"
119
+ while @scanner.rest? do
120
+ case
121
+ ##########################################
122
+ ## parse an Array
123
+ ##########################################
124
+ when @scanner.scan(/\[/)
125
+ out << _parse_
126
+ ##########################################
127
+ ## parse a Dictionary
128
+ ##########################################
129
+ when @scanner.scan(/<</)
130
+ data = _parse_
131
+ obj = {}
132
+ obj[data.shift] = data.shift while data[0]
133
+ out << obj
134
+ ##########################################
135
+ ## return content of array or dictionary
136
+ ##########################################
137
+ when @scanner.scan(/\]/), @scanner.scan(/>>/)
138
+ return out
139
+ ##########################################
140
+ ## parse a Stream
141
+ ##########################################
142
+ when @scanner.scan(/stream[\r]?[\n]/)
143
+ str = @scanner.scan_until(/endstream/)
144
+ # need to remove end of stream
145
+ if out.last.is_a? Hash
146
+ out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
147
+ # out.last[:raw_stream_content] = str.gsub(/[\n\r]?[\n\r]?endstream/, "")
148
+ else
149
+ warn "Stream not attached to dictionary!"
150
+ out << str[0...-10].force_encoding(Encoding::ASCII_8BIT)
151
+ end
152
+ ##########################################
153
+ ## parse an Object after finished
154
+ ##########################################
155
+ when str = @scanner.scan(/endobj/)
156
+ # warn "Proccessing Object"
157
+ #what to do when this is an object?
158
+ if out.last.is_a? Hash
159
+ out << out.pop.merge({indirect_generation_number: out.pop, indirect_reference_id: out.pop})
160
+ else
161
+ out << {indirect_without_dictionary: out.pop, indirect_generation_number: out.pop, indirect_reference_id: out.pop}
162
+ end
163
+ ##########################################
164
+ ## parse a Hex String
165
+ ##########################################
166
+ when str = @scanner.scan(/<[0-9a-f]+>/)
167
+ # warn "Found a hex string"
168
+ out << [str[1..-2]].pack('H*')
169
+ ##########################################
170
+ ## parse a Literal String
171
+ ##########################################
172
+ when @scanner.scan(/\(/)
173
+ # warn "Found a literal string"
174
+ str = ''
175
+ count = 1
176
+ while count > 0 && @scanner.rest? do
177
+ str += @scanner.scan_until(/[\(\)]/).to_s
178
+ seperator_count = 0
179
+ seperator_count += 1 while str[-2-seperator_count] == "\\"
180
+
181
+ case str[-1]
182
+ when '('
183
+ ## The following solution fails when (string ends with this sign: \\)
184
+
185
+ count += 1 unless seperator_count.odd?
186
+ when ')'
187
+ count -= 1 unless seperator_count.odd?
188
+ else
189
+ warn "Unknown error parsing string at #{@scanner.pos}!"
190
+ cout = 0 # error
191
+ end
192
+ end
193
+ # The PDF formatted string is: str[0..-2]
194
+ # now staring to convert to regular string
195
+ str_bytes = str[0..-2].bytes
196
+ str = []
197
+ until str_bytes.empty?
198
+ case str_bytes[0]
199
+ when 13 # eol - \r
200
+ # An end-of-line marker appearing within a literal string without a preceding REVERSE SOLIDUS
201
+ # shall be treated as a byte value of (0Ah),
202
+ # irrespective of whether the end-of-line marker was a CARRIAGE RETURN (0Dh), a LINE FEED (0Ah), or both.
203
+ str_bytes.shift
204
+ str_bytes.shift if str_bytes[0] == 10
205
+ str << 10
206
+ when 10 # eol - \n
207
+ # An end-of-line marker appearing within a literal string without a preceding REVERSE SOLIDUS
208
+ # shall be treated as a byte value of (0Ah),
209
+ # irrespective of whether the end-of-line marker was a CARRIAGE RETURN (0Dh), a LINE FEED (0Ah), or both.
210
+ str_bytes.shift
211
+ str_bytes.shift if str_bytes[0] == 13
212
+ str << 10
213
+ when 92 # "\\".ord == 92
214
+ str_bytes.shift
215
+ rep = str_bytes.shift
216
+ case rep
217
+ when 110 #n
218
+ str << 10 #new line
219
+ when 114 #r
220
+ str << 13 # CR
221
+ when 116 #t
222
+ str << 9 #tab
223
+ when 98 #b
224
+ str << 8
225
+ when 102 #f
226
+ str << 255
227
+ when 48..57 #decimal notation for byte?
228
+ rep = rep.chr
229
+ rep += str_bytes.shift.chr if str_bytes[0].between?(48,57)
230
+ rep += str_bytes.shift.chr if str_bytes[0].between?(48,57) && ((rep + str_bytes[0].chr).to_i <= 255)
231
+ str << rep.to_i
232
+ when 10 # new line, ignore
233
+ str_bytes.shift if str_bytes[0] == 13
234
+ true
235
+ when 13 # new line (or double notation for new line), ignore
236
+ str_bytes.shift if str_bytes[0] == 10
237
+ true
238
+ else
239
+ str << rep
240
+ end
241
+ else
242
+ str << str_bytes.shift
243
+ end
244
+ end
245
+ out << str.pack('C*')
246
+ ##########################################
247
+ ## Parse a comment
248
+ ##########################################
249
+ when str = @scanner.scan(/\%/)
250
+ #is a comment, skip until new line
251
+ @scanner.skip_until /[\n\r]+/
252
+ ##########################################
253
+ ## Parse a Name
254
+ ##########################################
255
+ # old, probably working version: when str = @scanner.scan(/\/[\#\w\d\.\+\-\\\?\,]+/)
256
+ # I don't know how to write the /[\x21-\x7e___subtract_certain_hex_values_here____]+/
257
+ # all allowed regular caracters between ! and ~ : /[\x21-\x24\x26\x27\x2a-\x2e\x30-\x3b\x3d\x3f-\x5a\x5c\x5e-\x7a\x7c\x7e]+
258
+ # all characters that aren't white space or special: /[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+
259
+ when str = @scanner.scan(/\/[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+/)
260
+ out << ( str[1..-1].gsub(/\#[0-9a-fA-F]{2}/) {|a| a[1..2].hex.chr } ).to_sym
261
+ ##########################################
262
+ ## Parse a Number
263
+ ##########################################
264
+ when str = @scanner.scan(/[\+\-\.\d]+/)
265
+ str.match(/\./) ? (out << str.to_f) : (out << str.to_i)
266
+ ##########################################
267
+ ## Parse an Object Reference
268
+ ##########################################
269
+ when @scanner.scan(/R/)
270
+ out << { is_reference_only: true, indirect_generation_number: out.pop, indirect_reference_id: out.pop}
271
+ ##########################################
272
+ ## Parse Bool - true and after false
273
+ ##########################################
274
+ when @scanner.scan(/true/)
275
+ out << true
276
+ when @scanner.scan(/false/)
277
+ out << false
278
+ ##########################################
279
+ ## Parse NULL - null
280
+ ##########################################
281
+ when @scanner.scan(/null/)
282
+ out << nil
283
+ ##########################################
284
+ ## XREF - check for encryption... anything else?
285
+ ##########################################
286
+ when @scanner.scan(/xref/)
287
+ ##########
288
+ ## get root object to check for encryption
289
+ @scanner.scan_until(/(trailer)|(\%EOF)/)
290
+
291
+ if @scanner.matched[-1] == 'r'
292
+ if @scanner.skip_until(/<</)
293
+ data = _parse_
294
+ @root_object = {}
295
+ @root_object[data.shift] = data.shift while data[0]
296
+ end
297
+ ##########
298
+ ## skip untill end of segment, maked by %%EOF
299
+ @scanner.skip_until(/\%\%EOF/)
300
+ end
301
+
302
+ when @scanner.scan(/[\s]+/) , @scanner.scan(/obj[\s]*/)
303
+ # do nothing
304
+ # warn "White Space, do nothing"
305
+ nil
306
+ else
307
+ # always advance
308
+ # warn "Advnacing for unknown reason..."
309
+ @scanner.pos = @scanner.pos + 1
310
+ end
311
+ end
312
+ out
313
+ end
314
+ end
315
+ end