python-pickle 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +3 -0
- data/.github/workflows/ruby.yml +27 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +14 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +20 -0
- data/README.md +149 -0
- data/Rakefile +13 -0
- data/gemspec.yml +25 -0
- data/lib/python/pickle/byte_array.rb +40 -0
- data/lib/python/pickle/deserializer.rb +595 -0
- data/lib/python/pickle/exceptions.rb +12 -0
- data/lib/python/pickle/instruction.rb +52 -0
- data/lib/python/pickle/instructions/add_items.rb +26 -0
- data/lib/python/pickle/instructions/append.rb +24 -0
- data/lib/python/pickle/instructions/appends.rb +26 -0
- data/lib/python/pickle/instructions/bin_bytes.rb +32 -0
- data/lib/python/pickle/instructions/bin_bytes8.rb +32 -0
- data/lib/python/pickle/instructions/bin_float.rb +29 -0
- data/lib/python/pickle/instructions/bin_get.rb +27 -0
- data/lib/python/pickle/instructions/bin_int1.rb +29 -0
- data/lib/python/pickle/instructions/bin_put.rb +29 -0
- data/lib/python/pickle/instructions/bin_string.rb +32 -0
- data/lib/python/pickle/instructions/bin_unicode.rb +32 -0
- data/lib/python/pickle/instructions/bin_unicode8.rb +32 -0
- data/lib/python/pickle/instructions/build.rb +24 -0
- data/lib/python/pickle/instructions/byte_array8.rb +32 -0
- data/lib/python/pickle/instructions/dict.rb +17 -0
- data/lib/python/pickle/instructions/dup.rb +24 -0
- data/lib/python/pickle/instructions/empty_dict.rb +26 -0
- data/lib/python/pickle/instructions/empty_list.rb +26 -0
- data/lib/python/pickle/instructions/empty_set.rb +26 -0
- data/lib/python/pickle/instructions/empty_tuple.rb +26 -0
- data/lib/python/pickle/instructions/ext1.rb +29 -0
- data/lib/python/pickle/instructions/ext2.rb +29 -0
- data/lib/python/pickle/instructions/ext4.rb +29 -0
- data/lib/python/pickle/instructions/float.rb +24 -0
- data/lib/python/pickle/instructions/frame.rb +29 -0
- data/lib/python/pickle/instructions/frozen_set.rb +26 -0
- data/lib/python/pickle/instructions/get.rb +27 -0
- data/lib/python/pickle/instructions/global.rb +62 -0
- data/lib/python/pickle/instructions/has_length_and_value.rb +58 -0
- data/lib/python/pickle/instructions/has_value.rb +50 -0
- data/lib/python/pickle/instructions/int.rb +24 -0
- data/lib/python/pickle/instructions/list.rb +24 -0
- data/lib/python/pickle/instructions/long.rb +24 -0
- data/lib/python/pickle/instructions/long1.rb +32 -0
- data/lib/python/pickle/instructions/long4.rb +32 -0
- data/lib/python/pickle/instructions/long_bin_get.rb +27 -0
- data/lib/python/pickle/instructions/mark.rb +24 -0
- data/lib/python/pickle/instructions/memoize.rb +26 -0
- data/lib/python/pickle/instructions/new_false.rb +24 -0
- data/lib/python/pickle/instructions/new_obj.rb +26 -0
- data/lib/python/pickle/instructions/new_obj_ex.rb +26 -0
- data/lib/python/pickle/instructions/new_true.rb +24 -0
- data/lib/python/pickle/instructions/next_buffer.rb +26 -0
- data/lib/python/pickle/instructions/none.rb +24 -0
- data/lib/python/pickle/instructions/pop.rb +24 -0
- data/lib/python/pickle/instructions/pop_mark.rb +24 -0
- data/lib/python/pickle/instructions/proto.rb +29 -0
- data/lib/python/pickle/instructions/put.rb +24 -0
- data/lib/python/pickle/instructions/readonly_buffer.rb +26 -0
- data/lib/python/pickle/instructions/reduce.rb +24 -0
- data/lib/python/pickle/instructions/set_item.rb +24 -0
- data/lib/python/pickle/instructions/set_items.rb +26 -0
- data/lib/python/pickle/instructions/short_bin_bytes.rb +32 -0
- data/lib/python/pickle/instructions/short_bin_string.rb +32 -0
- data/lib/python/pickle/instructions/short_bin_unicode.rb +32 -0
- data/lib/python/pickle/instructions/stack_global.rb +26 -0
- data/lib/python/pickle/instructions/stop.rb +24 -0
- data/lib/python/pickle/instructions/string.rb +24 -0
- data/lib/python/pickle/instructions/tuple.rb +24 -0
- data/lib/python/pickle/instructions/tuple1.rb +24 -0
- data/lib/python/pickle/instructions/tuple2.rb +24 -0
- data/lib/python/pickle/instructions/tuple3.rb +24 -0
- data/lib/python/pickle/protocol.rb +56 -0
- data/lib/python/pickle/protocol0.rb +399 -0
- data/lib/python/pickle/protocol1.rb +183 -0
- data/lib/python/pickle/protocol2.rb +229 -0
- data/lib/python/pickle/protocol3.rb +163 -0
- data/lib/python/pickle/protocol4.rb +285 -0
- data/lib/python/pickle/protocol5.rb +218 -0
- data/lib/python/pickle/py_class.rb +75 -0
- data/lib/python/pickle/py_object.rb +141 -0
- data/lib/python/pickle/tuple.rb +19 -0
- data/lib/python/pickle/version.rb +6 -0
- data/lib/python/pickle.rb +226 -0
- data/python-pickle.gemspec +62 -0
- data/spec/byte_array_spec.rb +54 -0
- data/spec/deserializer_spec.rb +1201 -0
- data/spec/fixtures/ascii_str_v3.pkl +0 -0
- data/spec/fixtures/ascii_str_v4.pkl +0 -0
- data/spec/fixtures/ascii_str_v5.pkl +0 -0
- data/spec/fixtures/bin_str_v0.pkl +3 -0
- data/spec/fixtures/bin_str_v1.pkl +0 -0
- data/spec/fixtures/bin_str_v2.pkl +0 -0
- data/spec/fixtures/bin_str_v3.pkl +0 -0
- data/spec/fixtures/bin_str_v4.pkl +0 -0
- data/spec/fixtures/bin_str_v5.pkl +0 -0
- data/spec/fixtures/bytearray_v0.pkl +10 -0
- data/spec/fixtures/bytearray_v1.pkl +0 -0
- data/spec/fixtures/bytearray_v2.pkl +0 -0
- data/spec/fixtures/bytearray_v3.pkl +0 -0
- data/spec/fixtures/bytearray_v4.pkl +0 -0
- data/spec/fixtures/bytearray_v5.pkl +0 -0
- data/spec/fixtures/class_v0.pkl +4 -0
- data/spec/fixtures/class_v1.pkl +0 -0
- data/spec/fixtures/class_v2.pkl +0 -0
- data/spec/fixtures/class_v3.pkl +0 -0
- data/spec/fixtures/class_v4.pkl +0 -0
- data/spec/fixtures/class_v5.pkl +0 -0
- data/spec/fixtures/dict_v0.pkl +6 -0
- data/spec/fixtures/dict_v1.pkl +0 -0
- data/spec/fixtures/dict_v2.pkl +0 -0
- data/spec/fixtures/dict_v3.pkl +0 -0
- data/spec/fixtures/dict_v4.pkl +0 -0
- data/spec/fixtures/dict_v5.pkl +0 -0
- data/spec/fixtures/escaped_str_v0.pkl +3 -0
- data/spec/fixtures/escaped_str_v1.pkl +0 -0
- data/spec/fixtures/escaped_str_v2.pkl +0 -0
- data/spec/fixtures/false_v0.pkl +2 -0
- data/spec/fixtures/false_v1.pkl +2 -0
- data/spec/fixtures/false_v2.pkl +1 -0
- data/spec/fixtures/false_v3.pkl +1 -0
- data/spec/fixtures/false_v4.pkl +1 -0
- data/spec/fixtures/false_v5.pkl +1 -0
- data/spec/fixtures/float_v0.pkl +2 -0
- data/spec/fixtures/float_v1.pkl +1 -0
- data/spec/fixtures/float_v2.pkl +1 -0
- data/spec/fixtures/float_v3.pkl +1 -0
- data/spec/fixtures/float_v4.pkl +0 -0
- data/spec/fixtures/float_v5.pkl +0 -0
- data/spec/fixtures/function_v0.pkl +4 -0
- data/spec/fixtures/function_v1.pkl +0 -0
- data/spec/fixtures/function_v2.pkl +0 -0
- data/spec/fixtures/function_v3.pkl +0 -0
- data/spec/fixtures/function_v4.pkl +0 -0
- data/spec/fixtures/function_v5.pkl +0 -0
- data/spec/fixtures/hex_str_v0.pkl +3 -0
- data/spec/fixtures/hex_str_v1.pkl +0 -0
- data/spec/fixtures/hex_str_v2.pkl +0 -0
- data/spec/fixtures/int_v0.pkl +2 -0
- data/spec/fixtures/int_v1.pkl +1 -0
- data/spec/fixtures/int_v2.pkl +1 -0
- data/spec/fixtures/int_v3.pkl +1 -0
- data/spec/fixtures/int_v4.pkl +1 -0
- data/spec/fixtures/int_v5.pkl +1 -0
- data/spec/fixtures/list_v0.pkl +7 -0
- data/spec/fixtures/list_v1.pkl +0 -0
- data/spec/fixtures/list_v2.pkl +0 -0
- data/spec/fixtures/list_v3.pkl +0 -0
- data/spec/fixtures/list_v4.pkl +0 -0
- data/spec/fixtures/list_v5.pkl +0 -0
- data/spec/fixtures/long_v0.pkl +2 -0
- data/spec/fixtures/long_v1.pkl +2 -0
- data/spec/fixtures/long_v2.pkl +0 -0
- data/spec/fixtures/long_v3.pkl +0 -0
- data/spec/fixtures/long_v4.pkl +0 -0
- data/spec/fixtures/long_v5.pkl +0 -0
- data/spec/fixtures/nested_dict_v0.pkl +12 -0
- data/spec/fixtures/nested_dict_v1.pkl +0 -0
- data/spec/fixtures/nested_dict_v2.pkl +0 -0
- data/spec/fixtures/nested_dict_v3.pkl +0 -0
- data/spec/fixtures/nested_dict_v4.pkl +0 -0
- data/spec/fixtures/nested_dict_v5.pkl +0 -0
- data/spec/fixtures/nested_list_v0.pkl +9 -0
- data/spec/fixtures/nested_list_v1.pkl +0 -0
- data/spec/fixtures/nested_list_v2.pkl +0 -0
- data/spec/fixtures/nested_list_v3.pkl +0 -0
- data/spec/fixtures/nested_list_v4.pkl +0 -0
- data/spec/fixtures/nested_list_v5.pkl +0 -0
- data/spec/fixtures/none_v0.pkl +1 -0
- data/spec/fixtures/none_v1.pkl +1 -0
- data/spec/fixtures/none_v2.pkl +1 -0
- data/spec/fixtures/none_v3.pkl +1 -0
- data/spec/fixtures/none_v4.pkl +1 -0
- data/spec/fixtures/none_v5.pkl +1 -0
- data/spec/fixtures/object_v0.pkl +19 -0
- data/spec/fixtures/object_v1.pkl +0 -0
- data/spec/fixtures/object_v2.pkl +0 -0
- data/spec/fixtures/object_v3.pkl +0 -0
- data/spec/fixtures/object_v4.pkl +0 -0
- data/spec/fixtures/object_v5.pkl +0 -0
- data/spec/fixtures/str_v0.pkl +3 -0
- data/spec/fixtures/str_v1.pkl +0 -0
- data/spec/fixtures/str_v2.pkl +0 -0
- data/spec/fixtures/str_v3.pkl +0 -0
- data/spec/fixtures/str_v4.pkl +0 -0
- data/spec/fixtures/str_v5.pkl +0 -0
- data/spec/fixtures/true_v0.pkl +2 -0
- data/spec/fixtures/true_v1.pkl +2 -0
- data/spec/fixtures/true_v2.pkl +1 -0
- data/spec/fixtures/true_v3.pkl +1 -0
- data/spec/fixtures/true_v4.pkl +1 -0
- data/spec/fixtures/true_v5.pkl +1 -0
- data/spec/fixtures/unicode_str_v0.pkl +3 -0
- data/spec/fixtures/unicode_str_v1.pkl +0 -0
- data/spec/fixtures/unicode_str_v2.pkl +0 -0
- data/spec/fixtures/unicode_str_v3.pkl +0 -0
- data/spec/fixtures/unicode_str_v4.pkl +0 -0
- data/spec/fixtures/unicode_str_v5.pkl +0 -0
- data/spec/generate_pickles2.py +41 -0
- data/spec/generate_pickles3.py +40 -0
- data/spec/integration/load/protocol0_spec.rb +258 -0
- data/spec/integration/load/protocol1_spec.rb +258 -0
- data/spec/integration/load/protocol2_spec.rb +258 -0
- data/spec/integration/load/protocol3_spec.rb +258 -0
- data/spec/integration/load/protocol4_spec.rb +258 -0
- data/spec/integration/load/protocol5_spec.rb +258 -0
- data/spec/integration/parse/protocol0_spec.rb +467 -0
- data/spec/integration/parse/protocol1_spec.rb +459 -0
- data/spec/integration/parse/protocol2_spec.rb +471 -0
- data/spec/integration/parse/protocol3_spec.rb +407 -0
- data/spec/integration/parse/protocol4_spec.rb +439 -0
- data/spec/integration/parse/protocol5_spec.rb +419 -0
- data/spec/pickle_spec.rb +163 -0
- data/spec/protocol0_read_instruction_examples.rb +211 -0
- data/spec/protocol0_spec.rb +445 -0
- data/spec/protocol1_read_instruction_examples.rb +156 -0
- data/spec/protocol1_spec.rb +59 -0
- data/spec/protocol2_read_instruction_examples.rb +135 -0
- data/spec/protocol2_spec.rb +128 -0
- data/spec/protocol3_read_instruction_examples.rb +29 -0
- data/spec/protocol3_spec.rb +32 -0
- data/spec/protocol4_read_instruction_examples.rb +142 -0
- data/spec/protocol4_spec.rb +58 -0
- data/spec/protocol5_spec.rb +68 -0
- data/spec/py_class_spec.rb +62 -0
- data/spec/py_object_spec.rb +149 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/tuple_spec.rb +18 -0
- metadata +325 -0
@@ -0,0 +1,399 @@
|
|
1
|
+
require 'python/pickle/protocol'
|
2
|
+
require 'python/pickle/instructions/mark'
|
3
|
+
require 'python/pickle/instructions/dict'
|
4
|
+
require 'python/pickle/instructions/string'
|
5
|
+
require 'python/pickle/instructions/put'
|
6
|
+
require 'python/pickle/instructions/get'
|
7
|
+
require 'python/pickle/instructions/float'
|
8
|
+
require 'python/pickle/instructions/int'
|
9
|
+
require 'python/pickle/instructions/long'
|
10
|
+
require 'python/pickle/instructions/set_item'
|
11
|
+
require 'python/pickle/instructions/tuple'
|
12
|
+
require 'python/pickle/instructions/list'
|
13
|
+
require 'python/pickle/instructions/none'
|
14
|
+
require 'python/pickle/instructions/append'
|
15
|
+
require 'python/pickle/instructions/global'
|
16
|
+
require 'python/pickle/instructions/reduce'
|
17
|
+
require 'python/pickle/instructions/build'
|
18
|
+
require 'python/pickle/instructions/pop'
|
19
|
+
require 'python/pickle/instructions/pop_mark'
|
20
|
+
require 'python/pickle/instructions/dup'
|
21
|
+
require 'python/pickle/instructions/stop'
|
22
|
+
require 'python/pickle/exceptions'
|
23
|
+
|
24
|
+
require 'set'
|
25
|
+
|
26
|
+
module Python
|
27
|
+
module Pickle
|
28
|
+
#
|
29
|
+
# Implements reading and writing of Python Pickle protocol 0.
|
30
|
+
#
|
31
|
+
# @api private
|
32
|
+
#
|
33
|
+
class Protocol0 < Protocol
|
34
|
+
|
35
|
+
# Opcodes for Pickle protocol version 0.
|
36
|
+
#
|
37
|
+
# @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
|
38
|
+
OPCODES = Set[
|
39
|
+
40, # MARK
|
40
|
+
46, # STOP
|
41
|
+
48, # POP
|
42
|
+
49, # POP_MARK
|
43
|
+
50, # DUP
|
44
|
+
70, # FLOAT
|
45
|
+
73, # INT
|
46
|
+
76, # LONG
|
47
|
+
78, # NONE
|
48
|
+
82, # REDUCE
|
49
|
+
83, # STRING
|
50
|
+
86, # UNICODE
|
51
|
+
97, # APPEND
|
52
|
+
98, # BUILD
|
53
|
+
99, # GLOBAL
|
54
|
+
100, # DICT
|
55
|
+
103, # GET
|
56
|
+
108, # LIST
|
57
|
+
112, # PUT
|
58
|
+
115, # SETITEM
|
59
|
+
116 # TUPLE
|
60
|
+
]
|
61
|
+
|
62
|
+
#
|
63
|
+
# Reads an instruction from the pickle stream.
|
64
|
+
#
|
65
|
+
# @return [Instruction]
|
66
|
+
# The decoded instruction.
|
67
|
+
#
|
68
|
+
# @raise [InvalidFormat]
|
69
|
+
# The pickle stream could not be parsed.
|
70
|
+
#
|
71
|
+
def read_instruction
|
72
|
+
case (opcode = @io.getbyte)
|
73
|
+
when 40 # MARK
|
74
|
+
Instructions::MARK
|
75
|
+
when 46 # STOP
|
76
|
+
Instructions::STOP
|
77
|
+
when 48 # POP
|
78
|
+
Instructions::POP
|
79
|
+
when 49 # POP_MARK
|
80
|
+
Instructions::POP_MARK
|
81
|
+
when 50 # DUP
|
82
|
+
Instructions::DUP
|
83
|
+
when 70 # FLOAT
|
84
|
+
Instructions::Float.new(read_float)
|
85
|
+
when 73 # INT
|
86
|
+
Instructions::Int.new(read_int)
|
87
|
+
when 76 # LONG
|
88
|
+
Instructions::Long.new(read_long)
|
89
|
+
when 78 # NONE
|
90
|
+
Instructions::NONE
|
91
|
+
when 82 # REDUCE
|
92
|
+
Instructions::REDUCE
|
93
|
+
when 83 # STRING
|
94
|
+
Instructions::String.new(read_string)
|
95
|
+
when 86 # UNICODE
|
96
|
+
Instructions::String.new(read_unicode_string)
|
97
|
+
when 97 # APPEND
|
98
|
+
Instructions::APPEND
|
99
|
+
when 98 # BUILD
|
100
|
+
Instructions::BUILD
|
101
|
+
when 99 # GLOBAL
|
102
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
103
|
+
when 100 # DICT
|
104
|
+
Instructions::DICT
|
105
|
+
when 103 # GET
|
106
|
+
Instructions::Get.new(read_int)
|
107
|
+
when 108 # LIST
|
108
|
+
Instructions::LIST
|
109
|
+
when 112 # PUT
|
110
|
+
Instructions::Put.new(read_int)
|
111
|
+
when 115 # SETITEM
|
112
|
+
Instructions::SETITEM
|
113
|
+
when 116 # TUPLE
|
114
|
+
Instructions::TUPLE
|
115
|
+
else
|
116
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 0")
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
#
|
121
|
+
# Reads a hex number from the pickle stream.
|
122
|
+
#
|
123
|
+
# @param [Integer] digits
|
124
|
+
# The number of digits to read.
|
125
|
+
#
|
126
|
+
# @return [String]
|
127
|
+
# The decoded raw character.
|
128
|
+
#
|
129
|
+
def read_hex_escaped_char
|
130
|
+
string = @io.read(2)
|
131
|
+
|
132
|
+
unless string =~ /\A[0-9a-fA-F]{2}\z/
|
133
|
+
bad_hex = string.inspect[1..-2]
|
134
|
+
|
135
|
+
raise(InvalidFormat,"invalid hex escape character: \"\\x#{bad_hex}\"")
|
136
|
+
end
|
137
|
+
|
138
|
+
return string.to_i(16).chr
|
139
|
+
end
|
140
|
+
|
141
|
+
#
|
142
|
+
# Reads an escaped character from the pickle stream.
|
143
|
+
#
|
144
|
+
# @return [String]
|
145
|
+
# The unescaped raw character.
|
146
|
+
#
|
147
|
+
def read_escaped_char
|
148
|
+
case (letter = @io.getc)
|
149
|
+
when 'x' then read_hex_escaped_char
|
150
|
+
when 't' then "\t"
|
151
|
+
when 'n' then "\n"
|
152
|
+
when 'r' then "\r"
|
153
|
+
when '\\' then '\\'
|
154
|
+
when "'" then "'"
|
155
|
+
else
|
156
|
+
bad_escape = letter.inspect[1..-2]
|
157
|
+
|
158
|
+
raise(InvalidFormat,"invalid backslash escape character: \"\\#{bad_escape}\"")
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
#
|
163
|
+
# Reads a newline terminated string from the pickle string.
|
164
|
+
#
|
165
|
+
# @return [String]
|
166
|
+
# The read string.
|
167
|
+
#
|
168
|
+
# @raise [InvalidFormat]
|
169
|
+
# Encountered a premature end of the stream.
|
170
|
+
#
|
171
|
+
def read_nl_string
|
172
|
+
new_string = String.new
|
173
|
+
|
174
|
+
until @io.eof?
|
175
|
+
case (char = @io.getc)
|
176
|
+
when "\n"
|
177
|
+
return new_string
|
178
|
+
else
|
179
|
+
new_string << char
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
raise(InvalidFormat,"unexpected end of stream after the end of a newline terminated string")
|
184
|
+
end
|
185
|
+
|
186
|
+
#
|
187
|
+
# Reads an ASCII string from the pickle stream.
|
188
|
+
#
|
189
|
+
# @return [String]
|
190
|
+
# The decoded raw string.
|
191
|
+
#
|
192
|
+
def read_string
|
193
|
+
new_string = String.new(encoding: Encoding::ASCII_8BIT)
|
194
|
+
|
195
|
+
unless @io.getc == "'"
|
196
|
+
raise(InvalidFormat,"cannot find beginning single-quote of string")
|
197
|
+
end
|
198
|
+
|
199
|
+
until @io.eof?
|
200
|
+
case (char = @io.getc)
|
201
|
+
when "\\"
|
202
|
+
new_string << read_escaped_char
|
203
|
+
when "'" # end-of-string
|
204
|
+
break
|
205
|
+
else
|
206
|
+
new_string << char
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
newline = @io.getc
|
211
|
+
|
212
|
+
if newline == nil
|
213
|
+
raise(InvalidFormat,"unexpected end of stream after the end of a single-quoted string")
|
214
|
+
elsif newline != "\n"
|
215
|
+
raise(InvalidFormat,"expected a '\\n' character following the string, but was #{newline.inspect}")
|
216
|
+
end
|
217
|
+
|
218
|
+
return new_string
|
219
|
+
end
|
220
|
+
|
221
|
+
#
|
222
|
+
# Reads a short unicode escaped character.
|
223
|
+
#
|
224
|
+
# @return [String]
|
225
|
+
# The decoded UTF-8 character.
|
226
|
+
#
|
227
|
+
# @raise [InvalidFormat]
|
228
|
+
# The unicode escaped character was invalid.
|
229
|
+
#
|
230
|
+
def read_unicode_escaped_char16
|
231
|
+
string = @io.read(4)
|
232
|
+
|
233
|
+
unless string =~ /\A[0-9a-fA-F]{4}\z/
|
234
|
+
bad_unicode = string.inspect[1..-2]
|
235
|
+
|
236
|
+
raise(InvalidFormat,"invalid unicode escape character: \"\\u#{bad_unicode}\"")
|
237
|
+
end
|
238
|
+
|
239
|
+
return string.to_i(16).chr(Encoding::UTF_8)
|
240
|
+
end
|
241
|
+
|
242
|
+
#
|
243
|
+
# Reads a long unicode escaped character.
|
244
|
+
#
|
245
|
+
# @return [String]
|
246
|
+
# The decoded UTF-8 character.
|
247
|
+
#
|
248
|
+
# @raise [InvalidFormat]
|
249
|
+
# The unicode escaped character was invalid.
|
250
|
+
#
|
251
|
+
def read_unicode_escaped_char32
|
252
|
+
string = @io.read(8)
|
253
|
+
|
254
|
+
unless string =~ /\A[0-9a-fA-F]{8}\z/
|
255
|
+
bad_unicode = string.inspect[1..-2]
|
256
|
+
|
257
|
+
raise(InvalidFormat,"invalid unicode escape character: \"\\U#{bad_unicode}\"")
|
258
|
+
end
|
259
|
+
|
260
|
+
return string.to_i(16).chr(Encoding::UTF_8)
|
261
|
+
end
|
262
|
+
|
263
|
+
#
|
264
|
+
# Reads a unicode escaped character from the pickle stream.
|
265
|
+
#
|
266
|
+
# @return [String]
|
267
|
+
# The unescaped raw unicode character.
|
268
|
+
#
|
269
|
+
def read_unicode_escaped_char
|
270
|
+
case (letter = @io.getc)
|
271
|
+
when 'x' then read_hex_escaped_char
|
272
|
+
when 'u' then read_unicode_escaped_char16
|
273
|
+
when 'U' then read_unicode_escaped_char32
|
274
|
+
when "\\" then "\\"
|
275
|
+
else
|
276
|
+
bad_escape = letter.inspect[1..-2]
|
277
|
+
|
278
|
+
raise(InvalidFormat,"invalid unicode escape character: \"\\#{bad_escape}\"")
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
#
|
283
|
+
# Reads a unicode String from the pickle stream.
|
284
|
+
#
|
285
|
+
# @return [String]
|
286
|
+
# The decoded raw unicode String.
|
287
|
+
#
|
288
|
+
def read_unicode_string
|
289
|
+
new_string = String.new(encoding: Encoding::UTF_8)
|
290
|
+
|
291
|
+
until @io.eof?
|
292
|
+
case (char = @io.getc)
|
293
|
+
when "\\" # backslash escaped character
|
294
|
+
new_string << read_unicode_escaped_char
|
295
|
+
when "\n" # end-of-string
|
296
|
+
return new_string
|
297
|
+
else
|
298
|
+
new_string << char
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
raise(InvalidFormat,"unexpected end of stream while parsing unicode string: #{new_string.inspect}")
|
303
|
+
end
|
304
|
+
|
305
|
+
#
|
306
|
+
# Reads a floating-point decimal from the pickle stream.
|
307
|
+
#
|
308
|
+
# @return [Float]
|
309
|
+
# The decoded float.
|
310
|
+
#
|
311
|
+
# @raise [InvalidFormat]
|
312
|
+
# Encountered a non-numeric character or a premature end of the stream.
|
313
|
+
#
|
314
|
+
def read_float
|
315
|
+
new_string = String.new
|
316
|
+
|
317
|
+
until @io.eof?
|
318
|
+
case (char = @io.getc)
|
319
|
+
when /[0-9\.]/
|
320
|
+
new_string << char
|
321
|
+
when "\n" # end-of-float
|
322
|
+
return new_string.to_f
|
323
|
+
else
|
324
|
+
raise(InvalidFormat,"encountered a non-numeric character while reading a float: #{char.inspect}")
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
raise(InvalidFormat,"unexpected end of stream while parsing a float: #{new_string.inspect}")
|
329
|
+
end
|
330
|
+
|
331
|
+
#
|
332
|
+
# Reads an integer from the pickle stream.
|
333
|
+
#
|
334
|
+
# @return [Integer, true, false]
|
335
|
+
# The decoded Integer.
|
336
|
+
# If the integer is `00`, then `false` will be returned.
|
337
|
+
# If the integer is `01`, then `true` will be returned.
|
338
|
+
#
|
339
|
+
# @raise [InvalidFormat]
|
340
|
+
# Encountered a non-numeric character or a premature end of the stream.
|
341
|
+
#
|
342
|
+
def read_int
|
343
|
+
new_string = String.new
|
344
|
+
|
345
|
+
until @io.eof?
|
346
|
+
case (char = @io.getc)
|
347
|
+
when /[0-9]/
|
348
|
+
new_string << char
|
349
|
+
when "\n" # end-of-integer
|
350
|
+
return case new_string
|
351
|
+
when '00' then false
|
352
|
+
when '01' then true
|
353
|
+
else new_string.to_i
|
354
|
+
end
|
355
|
+
else
|
356
|
+
raise(InvalidFormat,"encountered a non-numeric character while reading an integer: #{char.inspect}")
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
raise(InvalidFormat,"unexpected end of stream while parsing an integer: #{new_string.inspect}")
|
361
|
+
end
|
362
|
+
|
363
|
+
#
|
364
|
+
# Reads a long integer.
|
365
|
+
#
|
366
|
+
# @return [Integer]
|
367
|
+
# The decoded Integer.
|
368
|
+
#
|
369
|
+
# @raise [InvalidFormat]
|
370
|
+
# Encountered a non-numeric character or a premature end of the stream.
|
371
|
+
#
|
372
|
+
def read_long
|
373
|
+
new_string = String.new
|
374
|
+
|
375
|
+
until @io.eof?
|
376
|
+
case (char = @io.getc)
|
377
|
+
when /[0-9]/
|
378
|
+
new_string << char
|
379
|
+
when 'L'
|
380
|
+
newline = @io.getc
|
381
|
+
|
382
|
+
if newline == nil
|
383
|
+
raise(InvalidFormat,"unexpected end of stream after the end of an integer")
|
384
|
+
elsif newline != "\n"
|
385
|
+
raise(InvalidFormat,"expected a '\\n' character following the integer, but was #{newline.inspect}")
|
386
|
+
end
|
387
|
+
|
388
|
+
return new_string.to_i
|
389
|
+
else
|
390
|
+
raise(InvalidFormat,"encountered a non-numeric character while reading a long integer: #{char.inspect}")
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
raise(InvalidFormat,"unexpected end of stream while parsing a long integer: #{new_string.inspect}")
|
395
|
+
end
|
396
|
+
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require 'python/pickle/protocol0'
|
2
|
+
require 'python/pickle/instructions/mark'
|
3
|
+
require 'python/pickle/instructions/empty_tuple'
|
4
|
+
require 'python/pickle/instructions/stop'
|
5
|
+
require 'python/pickle/instructions/bin_float'
|
6
|
+
require 'python/pickle/instructions/bin_int1'
|
7
|
+
require 'python/pickle/instructions/int'
|
8
|
+
require 'python/pickle/instructions/long'
|
9
|
+
require 'python/pickle/instructions/none'
|
10
|
+
require 'python/pickle/instructions/reduce'
|
11
|
+
require 'python/pickle/instructions/bin_string'
|
12
|
+
require 'python/pickle/instructions/short_bin_string'
|
13
|
+
require 'python/pickle/instructions/bin_unicode'
|
14
|
+
require 'python/pickle/instructions/global'
|
15
|
+
require 'python/pickle/instructions/empty_list'
|
16
|
+
require 'python/pickle/instructions/append'
|
17
|
+
require 'python/pickle/instructions/bin_get'
|
18
|
+
require 'python/pickle/instructions/long_bin_get'
|
19
|
+
require 'python/pickle/instructions/bin_put'
|
20
|
+
require 'python/pickle/instructions/build'
|
21
|
+
require 'python/pickle/instructions/appends'
|
22
|
+
require 'python/pickle/instructions/set_item'
|
23
|
+
require 'python/pickle/instructions/set_items'
|
24
|
+
require 'python/pickle/instructions/tuple'
|
25
|
+
require 'python/pickle/instructions/empty_dict'
|
26
|
+
|
27
|
+
module Python
|
28
|
+
module Pickle
|
29
|
+
#
|
30
|
+
# Implements reading and writing of Python Pickle protocol 1.
|
31
|
+
#
|
32
|
+
# @api private
|
33
|
+
#
|
34
|
+
class Protocol1 < Protocol0
|
35
|
+
|
36
|
+
# Opcodes for Pickle protocol version 1.
|
37
|
+
#
|
38
|
+
# @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
|
39
|
+
OPCODES = Protocol0::OPCODES + Set[
|
40
|
+
41, # EMPTY_TUPLE
|
41
|
+
71, # BINFLOAT
|
42
|
+
75, # BININT1
|
43
|
+
84, # BINSTRING
|
44
|
+
85, # SHORT_BINSTRING
|
45
|
+
88, # BINUNICODE
|
46
|
+
93, # EMPTY_LIST
|
47
|
+
101, # APPENDS
|
48
|
+
113, # BINPUT
|
49
|
+
117, # SETITEMS
|
50
|
+
125 # EMPTY_DICT
|
51
|
+
]
|
52
|
+
|
53
|
+
#
|
54
|
+
# Reads an instruction from the pickle stream.
|
55
|
+
#
|
56
|
+
# @return [Instruction]
|
57
|
+
# The decoded instruction.
|
58
|
+
#
|
59
|
+
# @raise [InvalidFormat]
|
60
|
+
# The pickle stream could not be parsed.
|
61
|
+
#
|
62
|
+
def read_instruction
|
63
|
+
case (opcode = @io.getbyte)
|
64
|
+
#
|
65
|
+
# Protocol 0 instructions
|
66
|
+
#
|
67
|
+
when 40 # MARK
|
68
|
+
Instructions::MARK
|
69
|
+
when 46 # STOP
|
70
|
+
Instructions::STOP
|
71
|
+
when 48 # POP
|
72
|
+
Instructions::POP
|
73
|
+
when 49 # POP_MARK
|
74
|
+
Instructions::POP_MARK
|
75
|
+
when 50 # DUP
|
76
|
+
Instructions::DUP
|
77
|
+
when 70 # FLOAT
|
78
|
+
Instructions::Float.new(read_float)
|
79
|
+
when 73 # INT
|
80
|
+
Instructions::Int.new(read_int)
|
81
|
+
when 76 # LONG
|
82
|
+
Instructions::Long.new(read_long)
|
83
|
+
when 78 # NONE
|
84
|
+
Instructions::NONE
|
85
|
+
when 82 # REDUCE
|
86
|
+
Instructions::REDUCE
|
87
|
+
when 83 # STRING
|
88
|
+
Instructions::String.new(read_string)
|
89
|
+
when 86 # UNICODE
|
90
|
+
Instructions::String.new(read_unicode_string)
|
91
|
+
when 97 # APPEND
|
92
|
+
Instructions::APPEND
|
93
|
+
when 98 # BUILD
|
94
|
+
Instructions::BUILD
|
95
|
+
when 99 # GLOBAL
|
96
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
97
|
+
when 100 # DICT
|
98
|
+
Instructions::DICT
|
99
|
+
when 103 # GET
|
100
|
+
Instructions::Get.new(read_int)
|
101
|
+
when 108 # LIST
|
102
|
+
Instructions::LIST
|
103
|
+
when 112 # PUT
|
104
|
+
Instructions::Put.new(read_int)
|
105
|
+
when 115 # SETITEM
|
106
|
+
Instructions::SETITEM
|
107
|
+
when 116 # TUPLE
|
108
|
+
Instructions::TUPLE
|
109
|
+
#
|
110
|
+
# Protocol 1 instructions
|
111
|
+
#
|
112
|
+
when 41 # EMPTY_TUPLE
|
113
|
+
Instructions::EMPTY_TUPLE
|
114
|
+
when 71 # BINFLOAT
|
115
|
+
Instructions::BinFloat.new(read_float64_be)
|
116
|
+
when 75 # BININT1
|
117
|
+
Instructions::BinInt1.new(read_uint8)
|
118
|
+
when 84 # BINSTRING
|
119
|
+
length = read_uint32_le
|
120
|
+
string = @io.read(length)
|
121
|
+
|
122
|
+
Instructions::BinString.new(length,string)
|
123
|
+
when 85 # SHORT_BINSTRING
|
124
|
+
length = read_uint8
|
125
|
+
string = @io.read(length)
|
126
|
+
|
127
|
+
Instructions::ShortBinString.new(length,string)
|
128
|
+
when 88 # BINUNICODE
|
129
|
+
length = read_uint32_le
|
130
|
+
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
131
|
+
|
132
|
+
Instructions::BinUnicode.new(length,string)
|
133
|
+
when 93 # EMPTY_LIST
|
134
|
+
Instructions::EMPTY_LIST
|
135
|
+
when 101 # APPENDS
|
136
|
+
Instructions::APPENDS
|
137
|
+
when 104 # BINGET
|
138
|
+
Instructions::BinGet.new(read_uint8)
|
139
|
+
when 106 # LONG_BINGET
|
140
|
+
Instructions::LongBinGet.new(read_uint32_le)
|
141
|
+
when 113 # BINPUT
|
142
|
+
Instructions::BinPut.new(read_uint8)
|
143
|
+
when 117 # SETITEMS
|
144
|
+
Instructions::SETITEMS
|
145
|
+
when 125 # EMPTY_DICT
|
146
|
+
Instructions::EMPTY_DICT
|
147
|
+
else
|
148
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 1")
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
#
|
153
|
+
# Reads a double precision (64bit) floating point number, in network
|
154
|
+
# byte-order (big-endian).
|
155
|
+
#
|
156
|
+
# @return [Float]
|
157
|
+
# The decoded float.
|
158
|
+
#
|
159
|
+
def read_float64_be
|
160
|
+
@io.read(8).unpack1('G')
|
161
|
+
end
|
162
|
+
|
163
|
+
#
|
164
|
+
# Reads a single 8bit unsigned integer (byte).
|
165
|
+
#
|
166
|
+
# @return [Integer]
|
167
|
+
#
|
168
|
+
def read_uint8
|
169
|
+
@io.getbyte
|
170
|
+
end
|
171
|
+
|
172
|
+
#
|
173
|
+
# Reads an unsigned 32bit integer, in little-endian byte-order.
|
174
|
+
#
|
175
|
+
# @return [Integer]
|
176
|
+
#
|
177
|
+
def read_uint32_le
|
178
|
+
@io.read(4).unpack1('L<')
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|