python-pickle 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +3 -0
- data/.github/workflows/ruby.yml +27 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +14 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +20 -0
- data/README.md +149 -0
- data/Rakefile +13 -0
- data/gemspec.yml +25 -0
- data/lib/python/pickle/byte_array.rb +40 -0
- data/lib/python/pickle/deserializer.rb +595 -0
- data/lib/python/pickle/exceptions.rb +12 -0
- data/lib/python/pickle/instruction.rb +52 -0
- data/lib/python/pickle/instructions/add_items.rb +26 -0
- data/lib/python/pickle/instructions/append.rb +24 -0
- data/lib/python/pickle/instructions/appends.rb +26 -0
- data/lib/python/pickle/instructions/bin_bytes.rb +32 -0
- data/lib/python/pickle/instructions/bin_bytes8.rb +32 -0
- data/lib/python/pickle/instructions/bin_float.rb +29 -0
- data/lib/python/pickle/instructions/bin_get.rb +27 -0
- data/lib/python/pickle/instructions/bin_int1.rb +29 -0
- data/lib/python/pickle/instructions/bin_put.rb +29 -0
- data/lib/python/pickle/instructions/bin_string.rb +32 -0
- data/lib/python/pickle/instructions/bin_unicode.rb +32 -0
- data/lib/python/pickle/instructions/bin_unicode8.rb +32 -0
- data/lib/python/pickle/instructions/build.rb +24 -0
- data/lib/python/pickle/instructions/byte_array8.rb +32 -0
- data/lib/python/pickle/instructions/dict.rb +17 -0
- data/lib/python/pickle/instructions/dup.rb +24 -0
- data/lib/python/pickle/instructions/empty_dict.rb +26 -0
- data/lib/python/pickle/instructions/empty_list.rb +26 -0
- data/lib/python/pickle/instructions/empty_set.rb +26 -0
- data/lib/python/pickle/instructions/empty_tuple.rb +26 -0
- data/lib/python/pickle/instructions/ext1.rb +29 -0
- data/lib/python/pickle/instructions/ext2.rb +29 -0
- data/lib/python/pickle/instructions/ext4.rb +29 -0
- data/lib/python/pickle/instructions/float.rb +24 -0
- data/lib/python/pickle/instructions/frame.rb +29 -0
- data/lib/python/pickle/instructions/frozen_set.rb +26 -0
- data/lib/python/pickle/instructions/get.rb +27 -0
- data/lib/python/pickle/instructions/global.rb +62 -0
- data/lib/python/pickle/instructions/has_length_and_value.rb +58 -0
- data/lib/python/pickle/instructions/has_value.rb +50 -0
- data/lib/python/pickle/instructions/int.rb +24 -0
- data/lib/python/pickle/instructions/list.rb +24 -0
- data/lib/python/pickle/instructions/long.rb +24 -0
- data/lib/python/pickle/instructions/long1.rb +32 -0
- data/lib/python/pickle/instructions/long4.rb +32 -0
- data/lib/python/pickle/instructions/long_bin_get.rb +27 -0
- data/lib/python/pickle/instructions/mark.rb +24 -0
- data/lib/python/pickle/instructions/memoize.rb +26 -0
- data/lib/python/pickle/instructions/new_false.rb +24 -0
- data/lib/python/pickle/instructions/new_obj.rb +26 -0
- data/lib/python/pickle/instructions/new_obj_ex.rb +26 -0
- data/lib/python/pickle/instructions/new_true.rb +24 -0
- data/lib/python/pickle/instructions/next_buffer.rb +26 -0
- data/lib/python/pickle/instructions/none.rb +24 -0
- data/lib/python/pickle/instructions/pop.rb +24 -0
- data/lib/python/pickle/instructions/pop_mark.rb +24 -0
- data/lib/python/pickle/instructions/proto.rb +29 -0
- data/lib/python/pickle/instructions/put.rb +24 -0
- data/lib/python/pickle/instructions/readonly_buffer.rb +26 -0
- data/lib/python/pickle/instructions/reduce.rb +24 -0
- data/lib/python/pickle/instructions/set_item.rb +24 -0
- data/lib/python/pickle/instructions/set_items.rb +26 -0
- data/lib/python/pickle/instructions/short_bin_bytes.rb +32 -0
- data/lib/python/pickle/instructions/short_bin_string.rb +32 -0
- data/lib/python/pickle/instructions/short_bin_unicode.rb +32 -0
- data/lib/python/pickle/instructions/stack_global.rb +26 -0
- data/lib/python/pickle/instructions/stop.rb +24 -0
- data/lib/python/pickle/instructions/string.rb +24 -0
- data/lib/python/pickle/instructions/tuple.rb +24 -0
- data/lib/python/pickle/instructions/tuple1.rb +24 -0
- data/lib/python/pickle/instructions/tuple2.rb +24 -0
- data/lib/python/pickle/instructions/tuple3.rb +24 -0
- data/lib/python/pickle/protocol.rb +56 -0
- data/lib/python/pickle/protocol0.rb +399 -0
- data/lib/python/pickle/protocol1.rb +183 -0
- data/lib/python/pickle/protocol2.rb +229 -0
- data/lib/python/pickle/protocol3.rb +163 -0
- data/lib/python/pickle/protocol4.rb +285 -0
- data/lib/python/pickle/protocol5.rb +218 -0
- data/lib/python/pickle/py_class.rb +75 -0
- data/lib/python/pickle/py_object.rb +141 -0
- data/lib/python/pickle/tuple.rb +19 -0
- data/lib/python/pickle/version.rb +6 -0
- data/lib/python/pickle.rb +226 -0
- data/python-pickle.gemspec +62 -0
- data/spec/byte_array_spec.rb +54 -0
- data/spec/deserializer_spec.rb +1201 -0
- data/spec/fixtures/ascii_str_v3.pkl +0 -0
- data/spec/fixtures/ascii_str_v4.pkl +0 -0
- data/spec/fixtures/ascii_str_v5.pkl +0 -0
- data/spec/fixtures/bin_str_v0.pkl +3 -0
- data/spec/fixtures/bin_str_v1.pkl +0 -0
- data/spec/fixtures/bin_str_v2.pkl +0 -0
- data/spec/fixtures/bin_str_v3.pkl +0 -0
- data/spec/fixtures/bin_str_v4.pkl +0 -0
- data/spec/fixtures/bin_str_v5.pkl +0 -0
- data/spec/fixtures/bytearray_v0.pkl +10 -0
- data/spec/fixtures/bytearray_v1.pkl +0 -0
- data/spec/fixtures/bytearray_v2.pkl +0 -0
- data/spec/fixtures/bytearray_v3.pkl +0 -0
- data/spec/fixtures/bytearray_v4.pkl +0 -0
- data/spec/fixtures/bytearray_v5.pkl +0 -0
- data/spec/fixtures/class_v0.pkl +4 -0
- data/spec/fixtures/class_v1.pkl +0 -0
- data/spec/fixtures/class_v2.pkl +0 -0
- data/spec/fixtures/class_v3.pkl +0 -0
- data/spec/fixtures/class_v4.pkl +0 -0
- data/spec/fixtures/class_v5.pkl +0 -0
- data/spec/fixtures/dict_v0.pkl +6 -0
- data/spec/fixtures/dict_v1.pkl +0 -0
- data/spec/fixtures/dict_v2.pkl +0 -0
- data/spec/fixtures/dict_v3.pkl +0 -0
- data/spec/fixtures/dict_v4.pkl +0 -0
- data/spec/fixtures/dict_v5.pkl +0 -0
- data/spec/fixtures/escaped_str_v0.pkl +3 -0
- data/spec/fixtures/escaped_str_v1.pkl +0 -0
- data/spec/fixtures/escaped_str_v2.pkl +0 -0
- data/spec/fixtures/false_v0.pkl +2 -0
- data/spec/fixtures/false_v1.pkl +2 -0
- data/spec/fixtures/false_v2.pkl +1 -0
- data/spec/fixtures/false_v3.pkl +1 -0
- data/spec/fixtures/false_v4.pkl +1 -0
- data/spec/fixtures/false_v5.pkl +1 -0
- data/spec/fixtures/float_v0.pkl +2 -0
- data/spec/fixtures/float_v1.pkl +1 -0
- data/spec/fixtures/float_v2.pkl +1 -0
- data/spec/fixtures/float_v3.pkl +1 -0
- data/spec/fixtures/float_v4.pkl +0 -0
- data/spec/fixtures/float_v5.pkl +0 -0
- data/spec/fixtures/function_v0.pkl +4 -0
- data/spec/fixtures/function_v1.pkl +0 -0
- data/spec/fixtures/function_v2.pkl +0 -0
- data/spec/fixtures/function_v3.pkl +0 -0
- data/spec/fixtures/function_v4.pkl +0 -0
- data/spec/fixtures/function_v5.pkl +0 -0
- data/spec/fixtures/hex_str_v0.pkl +3 -0
- data/spec/fixtures/hex_str_v1.pkl +0 -0
- data/spec/fixtures/hex_str_v2.pkl +0 -0
- data/spec/fixtures/int_v0.pkl +2 -0
- data/spec/fixtures/int_v1.pkl +1 -0
- data/spec/fixtures/int_v2.pkl +1 -0
- data/spec/fixtures/int_v3.pkl +1 -0
- data/spec/fixtures/int_v4.pkl +1 -0
- data/spec/fixtures/int_v5.pkl +1 -0
- data/spec/fixtures/list_v0.pkl +7 -0
- data/spec/fixtures/list_v1.pkl +0 -0
- data/spec/fixtures/list_v2.pkl +0 -0
- data/spec/fixtures/list_v3.pkl +0 -0
- data/spec/fixtures/list_v4.pkl +0 -0
- data/spec/fixtures/list_v5.pkl +0 -0
- data/spec/fixtures/long_v0.pkl +2 -0
- data/spec/fixtures/long_v1.pkl +2 -0
- data/spec/fixtures/long_v2.pkl +0 -0
- data/spec/fixtures/long_v3.pkl +0 -0
- data/spec/fixtures/long_v4.pkl +0 -0
- data/spec/fixtures/long_v5.pkl +0 -0
- data/spec/fixtures/nested_dict_v0.pkl +12 -0
- data/spec/fixtures/nested_dict_v1.pkl +0 -0
- data/spec/fixtures/nested_dict_v2.pkl +0 -0
- data/spec/fixtures/nested_dict_v3.pkl +0 -0
- data/spec/fixtures/nested_dict_v4.pkl +0 -0
- data/spec/fixtures/nested_dict_v5.pkl +0 -0
- data/spec/fixtures/nested_list_v0.pkl +9 -0
- data/spec/fixtures/nested_list_v1.pkl +0 -0
- data/spec/fixtures/nested_list_v2.pkl +0 -0
- data/spec/fixtures/nested_list_v3.pkl +0 -0
- data/spec/fixtures/nested_list_v4.pkl +0 -0
- data/spec/fixtures/nested_list_v5.pkl +0 -0
- data/spec/fixtures/none_v0.pkl +1 -0
- data/spec/fixtures/none_v1.pkl +1 -0
- data/spec/fixtures/none_v2.pkl +1 -0
- data/spec/fixtures/none_v3.pkl +1 -0
- data/spec/fixtures/none_v4.pkl +1 -0
- data/spec/fixtures/none_v5.pkl +1 -0
- data/spec/fixtures/object_v0.pkl +19 -0
- data/spec/fixtures/object_v1.pkl +0 -0
- data/spec/fixtures/object_v2.pkl +0 -0
- data/spec/fixtures/object_v3.pkl +0 -0
- data/spec/fixtures/object_v4.pkl +0 -0
- data/spec/fixtures/object_v5.pkl +0 -0
- data/spec/fixtures/str_v0.pkl +3 -0
- data/spec/fixtures/str_v1.pkl +0 -0
- data/spec/fixtures/str_v2.pkl +0 -0
- data/spec/fixtures/str_v3.pkl +0 -0
- data/spec/fixtures/str_v4.pkl +0 -0
- data/spec/fixtures/str_v5.pkl +0 -0
- data/spec/fixtures/true_v0.pkl +2 -0
- data/spec/fixtures/true_v1.pkl +2 -0
- data/spec/fixtures/true_v2.pkl +1 -0
- data/spec/fixtures/true_v3.pkl +1 -0
- data/spec/fixtures/true_v4.pkl +1 -0
- data/spec/fixtures/true_v5.pkl +1 -0
- data/spec/fixtures/unicode_str_v0.pkl +3 -0
- data/spec/fixtures/unicode_str_v1.pkl +0 -0
- data/spec/fixtures/unicode_str_v2.pkl +0 -0
- data/spec/fixtures/unicode_str_v3.pkl +0 -0
- data/spec/fixtures/unicode_str_v4.pkl +0 -0
- data/spec/fixtures/unicode_str_v5.pkl +0 -0
- data/spec/generate_pickles2.py +41 -0
- data/spec/generate_pickles3.py +40 -0
- data/spec/integration/load/protocol0_spec.rb +258 -0
- data/spec/integration/load/protocol1_spec.rb +258 -0
- data/spec/integration/load/protocol2_spec.rb +258 -0
- data/spec/integration/load/protocol3_spec.rb +258 -0
- data/spec/integration/load/protocol4_spec.rb +258 -0
- data/spec/integration/load/protocol5_spec.rb +258 -0
- data/spec/integration/parse/protocol0_spec.rb +467 -0
- data/spec/integration/parse/protocol1_spec.rb +459 -0
- data/spec/integration/parse/protocol2_spec.rb +471 -0
- data/spec/integration/parse/protocol3_spec.rb +407 -0
- data/spec/integration/parse/protocol4_spec.rb +439 -0
- data/spec/integration/parse/protocol5_spec.rb +419 -0
- data/spec/pickle_spec.rb +163 -0
- data/spec/protocol0_read_instruction_examples.rb +211 -0
- data/spec/protocol0_spec.rb +445 -0
- data/spec/protocol1_read_instruction_examples.rb +156 -0
- data/spec/protocol1_spec.rb +59 -0
- data/spec/protocol2_read_instruction_examples.rb +135 -0
- data/spec/protocol2_spec.rb +128 -0
- data/spec/protocol3_read_instruction_examples.rb +29 -0
- data/spec/protocol3_spec.rb +32 -0
- data/spec/protocol4_read_instruction_examples.rb +142 -0
- data/spec/protocol4_spec.rb +58 -0
- data/spec/protocol5_spec.rb +68 -0
- data/spec/py_class_spec.rb +62 -0
- data/spec/py_object_spec.rb +149 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/tuple_spec.rb +18 -0
- metadata +325 -0
@@ -0,0 +1,399 @@
|
|
1
|
+
require 'python/pickle/protocol'
|
2
|
+
require 'python/pickle/instructions/mark'
|
3
|
+
require 'python/pickle/instructions/dict'
|
4
|
+
require 'python/pickle/instructions/string'
|
5
|
+
require 'python/pickle/instructions/put'
|
6
|
+
require 'python/pickle/instructions/get'
|
7
|
+
require 'python/pickle/instructions/float'
|
8
|
+
require 'python/pickle/instructions/int'
|
9
|
+
require 'python/pickle/instructions/long'
|
10
|
+
require 'python/pickle/instructions/set_item'
|
11
|
+
require 'python/pickle/instructions/tuple'
|
12
|
+
require 'python/pickle/instructions/list'
|
13
|
+
require 'python/pickle/instructions/none'
|
14
|
+
require 'python/pickle/instructions/append'
|
15
|
+
require 'python/pickle/instructions/global'
|
16
|
+
require 'python/pickle/instructions/reduce'
|
17
|
+
require 'python/pickle/instructions/build'
|
18
|
+
require 'python/pickle/instructions/pop'
|
19
|
+
require 'python/pickle/instructions/pop_mark'
|
20
|
+
require 'python/pickle/instructions/dup'
|
21
|
+
require 'python/pickle/instructions/stop'
|
22
|
+
require 'python/pickle/exceptions'
|
23
|
+
|
24
|
+
require 'set'
|
25
|
+
|
26
|
+
module Python
|
27
|
+
module Pickle
|
28
|
+
#
|
29
|
+
# Implements reading and writing of Python Pickle protocol 0.
|
30
|
+
#
|
31
|
+
# @api private
|
32
|
+
#
|
33
|
+
class Protocol0 < Protocol
|
34
|
+
|
35
|
+
# Opcodes for Pickle protocol version 0.
|
36
|
+
#
|
37
|
+
# @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
|
38
|
+
OPCODES = Set[
|
39
|
+
40, # MARK
|
40
|
+
46, # STOP
|
41
|
+
48, # POP
|
42
|
+
49, # POP_MARK
|
43
|
+
50, # DUP
|
44
|
+
70, # FLOAT
|
45
|
+
73, # INT
|
46
|
+
76, # LONG
|
47
|
+
78, # NONE
|
48
|
+
82, # REDUCE
|
49
|
+
83, # STRING
|
50
|
+
86, # UNICODE
|
51
|
+
97, # APPEND
|
52
|
+
98, # BUILD
|
53
|
+
99, # GLOBAL
|
54
|
+
100, # DICT
|
55
|
+
103, # GET
|
56
|
+
108, # LIST
|
57
|
+
112, # PUT
|
58
|
+
115, # SETITEM
|
59
|
+
116 # TUPLE
|
60
|
+
]
|
61
|
+
|
62
|
+
#
|
63
|
+
# Reads an instruction from the pickle stream.
|
64
|
+
#
|
65
|
+
# @return [Instruction]
|
66
|
+
# The decoded instruction.
|
67
|
+
#
|
68
|
+
# @raise [InvalidFormat]
|
69
|
+
# The pickle stream could not be parsed.
|
70
|
+
#
|
71
|
+
def read_instruction
|
72
|
+
case (opcode = @io.getbyte)
|
73
|
+
when 40 # MARK
|
74
|
+
Instructions::MARK
|
75
|
+
when 46 # STOP
|
76
|
+
Instructions::STOP
|
77
|
+
when 48 # POP
|
78
|
+
Instructions::POP
|
79
|
+
when 49 # POP_MARK
|
80
|
+
Instructions::POP_MARK
|
81
|
+
when 50 # DUP
|
82
|
+
Instructions::DUP
|
83
|
+
when 70 # FLOAT
|
84
|
+
Instructions::Float.new(read_float)
|
85
|
+
when 73 # INT
|
86
|
+
Instructions::Int.new(read_int)
|
87
|
+
when 76 # LONG
|
88
|
+
Instructions::Long.new(read_long)
|
89
|
+
when 78 # NONE
|
90
|
+
Instructions::NONE
|
91
|
+
when 82 # REDUCE
|
92
|
+
Instructions::REDUCE
|
93
|
+
when 83 # STRING
|
94
|
+
Instructions::String.new(read_string)
|
95
|
+
when 86 # UNICODE
|
96
|
+
Instructions::String.new(read_unicode_string)
|
97
|
+
when 97 # APPEND
|
98
|
+
Instructions::APPEND
|
99
|
+
when 98 # BUILD
|
100
|
+
Instructions::BUILD
|
101
|
+
when 99 # GLOBAL
|
102
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
103
|
+
when 100 # DICT
|
104
|
+
Instructions::DICT
|
105
|
+
when 103 # GET
|
106
|
+
Instructions::Get.new(read_int)
|
107
|
+
when 108 # LIST
|
108
|
+
Instructions::LIST
|
109
|
+
when 112 # PUT
|
110
|
+
Instructions::Put.new(read_int)
|
111
|
+
when 115 # SETITEM
|
112
|
+
Instructions::SETITEM
|
113
|
+
when 116 # TUPLE
|
114
|
+
Instructions::TUPLE
|
115
|
+
else
|
116
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 0")
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
#
|
121
|
+
# Reads a hex number from the pickle stream.
|
122
|
+
#
|
123
|
+
# @param [Integer] digits
|
124
|
+
# The number of digits to read.
|
125
|
+
#
|
126
|
+
# @return [String]
|
127
|
+
# The decoded raw character.
|
128
|
+
#
|
129
|
+
def read_hex_escaped_char
|
130
|
+
string = @io.read(2)
|
131
|
+
|
132
|
+
unless string =~ /\A[0-9a-fA-F]{2}\z/
|
133
|
+
bad_hex = string.inspect[1..-2]
|
134
|
+
|
135
|
+
raise(InvalidFormat,"invalid hex escape character: \"\\x#{bad_hex}\"")
|
136
|
+
end
|
137
|
+
|
138
|
+
return string.to_i(16).chr
|
139
|
+
end
|
140
|
+
|
141
|
+
#
|
142
|
+
# Reads an escaped character from the pickle stream.
|
143
|
+
#
|
144
|
+
# @return [String]
|
145
|
+
# The unescaped raw character.
|
146
|
+
#
|
147
|
+
def read_escaped_char
|
148
|
+
case (letter = @io.getc)
|
149
|
+
when 'x' then read_hex_escaped_char
|
150
|
+
when 't' then "\t"
|
151
|
+
when 'n' then "\n"
|
152
|
+
when 'r' then "\r"
|
153
|
+
when '\\' then '\\'
|
154
|
+
when "'" then "'"
|
155
|
+
else
|
156
|
+
bad_escape = letter.inspect[1..-2]
|
157
|
+
|
158
|
+
raise(InvalidFormat,"invalid backslash escape character: \"\\#{bad_escape}\"")
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
#
|
163
|
+
# Reads a newline terminated string from the pickle string.
|
164
|
+
#
|
165
|
+
# @return [String]
|
166
|
+
# The read string.
|
167
|
+
#
|
168
|
+
# @raise [InvalidFormat]
|
169
|
+
# Encountered a premature end of the stream.
|
170
|
+
#
|
171
|
+
def read_nl_string
|
172
|
+
new_string = String.new
|
173
|
+
|
174
|
+
until @io.eof?
|
175
|
+
case (char = @io.getc)
|
176
|
+
when "\n"
|
177
|
+
return new_string
|
178
|
+
else
|
179
|
+
new_string << char
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
raise(InvalidFormat,"unexpected end of stream after the end of a newline terminated string")
|
184
|
+
end
|
185
|
+
|
186
|
+
#
|
187
|
+
# Reads an ASCII string from the pickle stream.
|
188
|
+
#
|
189
|
+
# @return [String]
|
190
|
+
# The decoded raw string.
|
191
|
+
#
|
192
|
+
def read_string
|
193
|
+
new_string = String.new(encoding: Encoding::ASCII_8BIT)
|
194
|
+
|
195
|
+
unless @io.getc == "'"
|
196
|
+
raise(InvalidFormat,"cannot find beginning single-quote of string")
|
197
|
+
end
|
198
|
+
|
199
|
+
until @io.eof?
|
200
|
+
case (char = @io.getc)
|
201
|
+
when "\\"
|
202
|
+
new_string << read_escaped_char
|
203
|
+
when "'" # end-of-string
|
204
|
+
break
|
205
|
+
else
|
206
|
+
new_string << char
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
newline = @io.getc
|
211
|
+
|
212
|
+
if newline == nil
|
213
|
+
raise(InvalidFormat,"unexpected end of stream after the end of a single-quoted string")
|
214
|
+
elsif newline != "\n"
|
215
|
+
raise(InvalidFormat,"expected a '\\n' character following the string, but was #{newline.inspect}")
|
216
|
+
end
|
217
|
+
|
218
|
+
return new_string
|
219
|
+
end
|
220
|
+
|
221
|
+
#
|
222
|
+
# Reads a short unicode escaped character.
|
223
|
+
#
|
224
|
+
# @return [String]
|
225
|
+
# The decoded UTF-8 character.
|
226
|
+
#
|
227
|
+
# @raise [InvalidFormat]
|
228
|
+
# The unicode escaped character was invalid.
|
229
|
+
#
|
230
|
+
def read_unicode_escaped_char16
|
231
|
+
string = @io.read(4)
|
232
|
+
|
233
|
+
unless string =~ /\A[0-9a-fA-F]{4}\z/
|
234
|
+
bad_unicode = string.inspect[1..-2]
|
235
|
+
|
236
|
+
raise(InvalidFormat,"invalid unicode escape character: \"\\u#{bad_unicode}\"")
|
237
|
+
end
|
238
|
+
|
239
|
+
return string.to_i(16).chr(Encoding::UTF_8)
|
240
|
+
end
|
241
|
+
|
242
|
+
#
|
243
|
+
# Reads a long unicode escaped character.
|
244
|
+
#
|
245
|
+
# @return [String]
|
246
|
+
# The decoded UTF-8 character.
|
247
|
+
#
|
248
|
+
# @raise [InvalidFormat]
|
249
|
+
# The unicode escaped character was invalid.
|
250
|
+
#
|
251
|
+
def read_unicode_escaped_char32
|
252
|
+
string = @io.read(8)
|
253
|
+
|
254
|
+
unless string =~ /\A[0-9a-fA-F]{8}\z/
|
255
|
+
bad_unicode = string.inspect[1..-2]
|
256
|
+
|
257
|
+
raise(InvalidFormat,"invalid unicode escape character: \"\\U#{bad_unicode}\"")
|
258
|
+
end
|
259
|
+
|
260
|
+
return string.to_i(16).chr(Encoding::UTF_8)
|
261
|
+
end
|
262
|
+
|
263
|
+
#
|
264
|
+
# Reads a unicode escaped character from the pickle stream.
|
265
|
+
#
|
266
|
+
# @return [String]
|
267
|
+
# The unescaped raw unicode character.
|
268
|
+
#
|
269
|
+
def read_unicode_escaped_char
|
270
|
+
case (letter = @io.getc)
|
271
|
+
when 'x' then read_hex_escaped_char
|
272
|
+
when 'u' then read_unicode_escaped_char16
|
273
|
+
when 'U' then read_unicode_escaped_char32
|
274
|
+
when "\\" then "\\"
|
275
|
+
else
|
276
|
+
bad_escape = letter.inspect[1..-2]
|
277
|
+
|
278
|
+
raise(InvalidFormat,"invalid unicode escape character: \"\\#{bad_escape}\"")
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
#
|
283
|
+
# Reads a unicode String from the pickle stream.
|
284
|
+
#
|
285
|
+
# @return [String]
|
286
|
+
# The decoded raw unicode String.
|
287
|
+
#
|
288
|
+
def read_unicode_string
|
289
|
+
new_string = String.new(encoding: Encoding::UTF_8)
|
290
|
+
|
291
|
+
until @io.eof?
|
292
|
+
case (char = @io.getc)
|
293
|
+
when "\\" # backslash escaped character
|
294
|
+
new_string << read_unicode_escaped_char
|
295
|
+
when "\n" # end-of-string
|
296
|
+
return new_string
|
297
|
+
else
|
298
|
+
new_string << char
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
raise(InvalidFormat,"unexpected end of stream while parsing unicode string: #{new_string.inspect}")
|
303
|
+
end
|
304
|
+
|
305
|
+
#
|
306
|
+
# Reads a floating-point decimal from the pickle stream.
|
307
|
+
#
|
308
|
+
# @return [Float]
|
309
|
+
# The decoded float.
|
310
|
+
#
|
311
|
+
# @raise [InvalidFormat]
|
312
|
+
# Encountered a non-numeric character or a premature end of the stream.
|
313
|
+
#
|
314
|
+
def read_float
|
315
|
+
new_string = String.new
|
316
|
+
|
317
|
+
until @io.eof?
|
318
|
+
case (char = @io.getc)
|
319
|
+
when /[0-9\.]/
|
320
|
+
new_string << char
|
321
|
+
when "\n" # end-of-float
|
322
|
+
return new_string.to_f
|
323
|
+
else
|
324
|
+
raise(InvalidFormat,"encountered a non-numeric character while reading a float: #{char.inspect}")
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
raise(InvalidFormat,"unexpected end of stream while parsing a float: #{new_string.inspect}")
|
329
|
+
end
|
330
|
+
|
331
|
+
#
|
332
|
+
# Reads an integer from the pickle stream.
|
333
|
+
#
|
334
|
+
# @return [Integer, true, false]
|
335
|
+
# The decoded Integer.
|
336
|
+
# If the integer is `00`, then `false` will be returned.
|
337
|
+
# If the integer is `01`, then `true` will be returned.
|
338
|
+
#
|
339
|
+
# @raise [InvalidFormat]
|
340
|
+
# Encountered a non-numeric character or a premature end of the stream.
|
341
|
+
#
|
342
|
+
def read_int
|
343
|
+
new_string = String.new
|
344
|
+
|
345
|
+
until @io.eof?
|
346
|
+
case (char = @io.getc)
|
347
|
+
when /[0-9]/
|
348
|
+
new_string << char
|
349
|
+
when "\n" # end-of-integer
|
350
|
+
return case new_string
|
351
|
+
when '00' then false
|
352
|
+
when '01' then true
|
353
|
+
else new_string.to_i
|
354
|
+
end
|
355
|
+
else
|
356
|
+
raise(InvalidFormat,"encountered a non-numeric character while reading an integer: #{char.inspect}")
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
raise(InvalidFormat,"unexpected end of stream while parsing an integer: #{new_string.inspect}")
|
361
|
+
end
|
362
|
+
|
363
|
+
#
|
364
|
+
# Reads a long integer.
|
365
|
+
#
|
366
|
+
# @return [Integer]
|
367
|
+
# The decoded Integer.
|
368
|
+
#
|
369
|
+
# @raise [InvalidFormat]
|
370
|
+
# Encountered a non-numeric character or a premature end of the stream.
|
371
|
+
#
|
372
|
+
def read_long
|
373
|
+
new_string = String.new
|
374
|
+
|
375
|
+
until @io.eof?
|
376
|
+
case (char = @io.getc)
|
377
|
+
when /[0-9]/
|
378
|
+
new_string << char
|
379
|
+
when 'L'
|
380
|
+
newline = @io.getc
|
381
|
+
|
382
|
+
if newline == nil
|
383
|
+
raise(InvalidFormat,"unexpected end of stream after the end of an integer")
|
384
|
+
elsif newline != "\n"
|
385
|
+
raise(InvalidFormat,"expected a '\\n' character following the integer, but was #{newline.inspect}")
|
386
|
+
end
|
387
|
+
|
388
|
+
return new_string.to_i
|
389
|
+
else
|
390
|
+
raise(InvalidFormat,"encountered a non-numeric character while reading a long integer: #{char.inspect}")
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
raise(InvalidFormat,"unexpected end of stream while parsing a long integer: #{new_string.inspect}")
|
395
|
+
end
|
396
|
+
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require 'python/pickle/protocol0'
|
2
|
+
require 'python/pickle/instructions/mark'
|
3
|
+
require 'python/pickle/instructions/empty_tuple'
|
4
|
+
require 'python/pickle/instructions/stop'
|
5
|
+
require 'python/pickle/instructions/bin_float'
|
6
|
+
require 'python/pickle/instructions/bin_int1'
|
7
|
+
require 'python/pickle/instructions/int'
|
8
|
+
require 'python/pickle/instructions/long'
|
9
|
+
require 'python/pickle/instructions/none'
|
10
|
+
require 'python/pickle/instructions/reduce'
|
11
|
+
require 'python/pickle/instructions/bin_string'
|
12
|
+
require 'python/pickle/instructions/short_bin_string'
|
13
|
+
require 'python/pickle/instructions/bin_unicode'
|
14
|
+
require 'python/pickle/instructions/global'
|
15
|
+
require 'python/pickle/instructions/empty_list'
|
16
|
+
require 'python/pickle/instructions/append'
|
17
|
+
require 'python/pickle/instructions/bin_get'
|
18
|
+
require 'python/pickle/instructions/long_bin_get'
|
19
|
+
require 'python/pickle/instructions/bin_put'
|
20
|
+
require 'python/pickle/instructions/build'
|
21
|
+
require 'python/pickle/instructions/appends'
|
22
|
+
require 'python/pickle/instructions/set_item'
|
23
|
+
require 'python/pickle/instructions/set_items'
|
24
|
+
require 'python/pickle/instructions/tuple'
|
25
|
+
require 'python/pickle/instructions/empty_dict'
|
26
|
+
|
27
|
+
module Python
|
28
|
+
module Pickle
|
29
|
+
#
|
30
|
+
# Implements reading and writing of Python Pickle protocol 1.
|
31
|
+
#
|
32
|
+
# @api private
|
33
|
+
#
|
34
|
+
class Protocol1 < Protocol0
|
35
|
+
|
36
|
+
# Opcodes for Pickle protocol version 1.
|
37
|
+
#
|
38
|
+
# @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
|
39
|
+
OPCODES = Protocol0::OPCODES + Set[
|
40
|
+
41, # EMPTY_TUPLE
|
41
|
+
71, # BINFLOAT
|
42
|
+
75, # BININT1
|
43
|
+
84, # BINSTRING
|
44
|
+
85, # SHORT_BINSTRING
|
45
|
+
88, # BINUNICODE
|
46
|
+
93, # EMPTY_LIST
|
47
|
+
101, # APPENDS
|
48
|
+
113, # BINPUT
|
49
|
+
117, # SETITEMS
|
50
|
+
125 # EMPTY_DICT
|
51
|
+
]
|
52
|
+
|
53
|
+
#
|
54
|
+
# Reads an instruction from the pickle stream.
|
55
|
+
#
|
56
|
+
# @return [Instruction]
|
57
|
+
# The decoded instruction.
|
58
|
+
#
|
59
|
+
# @raise [InvalidFormat]
|
60
|
+
# The pickle stream could not be parsed.
|
61
|
+
#
|
62
|
+
def read_instruction
|
63
|
+
case (opcode = @io.getbyte)
|
64
|
+
#
|
65
|
+
# Protocol 0 instructions
|
66
|
+
#
|
67
|
+
when 40 # MARK
|
68
|
+
Instructions::MARK
|
69
|
+
when 46 # STOP
|
70
|
+
Instructions::STOP
|
71
|
+
when 48 # POP
|
72
|
+
Instructions::POP
|
73
|
+
when 49 # POP_MARK
|
74
|
+
Instructions::POP_MARK
|
75
|
+
when 50 # DUP
|
76
|
+
Instructions::DUP
|
77
|
+
when 70 # FLOAT
|
78
|
+
Instructions::Float.new(read_float)
|
79
|
+
when 73 # INT
|
80
|
+
Instructions::Int.new(read_int)
|
81
|
+
when 76 # LONG
|
82
|
+
Instructions::Long.new(read_long)
|
83
|
+
when 78 # NONE
|
84
|
+
Instructions::NONE
|
85
|
+
when 82 # REDUCE
|
86
|
+
Instructions::REDUCE
|
87
|
+
when 83 # STRING
|
88
|
+
Instructions::String.new(read_string)
|
89
|
+
when 86 # UNICODE
|
90
|
+
Instructions::String.new(read_unicode_string)
|
91
|
+
when 97 # APPEND
|
92
|
+
Instructions::APPEND
|
93
|
+
when 98 # BUILD
|
94
|
+
Instructions::BUILD
|
95
|
+
when 99 # GLOBAL
|
96
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
97
|
+
when 100 # DICT
|
98
|
+
Instructions::DICT
|
99
|
+
when 103 # GET
|
100
|
+
Instructions::Get.new(read_int)
|
101
|
+
when 108 # LIST
|
102
|
+
Instructions::LIST
|
103
|
+
when 112 # PUT
|
104
|
+
Instructions::Put.new(read_int)
|
105
|
+
when 115 # SETITEM
|
106
|
+
Instructions::SETITEM
|
107
|
+
when 116 # TUPLE
|
108
|
+
Instructions::TUPLE
|
109
|
+
#
|
110
|
+
# Protocol 1 instructions
|
111
|
+
#
|
112
|
+
when 41 # EMPTY_TUPLE
|
113
|
+
Instructions::EMPTY_TUPLE
|
114
|
+
when 71 # BINFLOAT
|
115
|
+
Instructions::BinFloat.new(read_float64_be)
|
116
|
+
when 75 # BININT1
|
117
|
+
Instructions::BinInt1.new(read_uint8)
|
118
|
+
when 84 # BINSTRING
|
119
|
+
length = read_uint32_le
|
120
|
+
string = @io.read(length)
|
121
|
+
|
122
|
+
Instructions::BinString.new(length,string)
|
123
|
+
when 85 # SHORT_BINSTRING
|
124
|
+
length = read_uint8
|
125
|
+
string = @io.read(length)
|
126
|
+
|
127
|
+
Instructions::ShortBinString.new(length,string)
|
128
|
+
when 88 # BINUNICODE
|
129
|
+
length = read_uint32_le
|
130
|
+
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
131
|
+
|
132
|
+
Instructions::BinUnicode.new(length,string)
|
133
|
+
when 93 # EMPTY_LIST
|
134
|
+
Instructions::EMPTY_LIST
|
135
|
+
when 101 # APPENDS
|
136
|
+
Instructions::APPENDS
|
137
|
+
when 104 # BINGET
|
138
|
+
Instructions::BinGet.new(read_uint8)
|
139
|
+
when 106 # LONG_BINGET
|
140
|
+
Instructions::LongBinGet.new(read_uint32_le)
|
141
|
+
when 113 # BINPUT
|
142
|
+
Instructions::BinPut.new(read_uint8)
|
143
|
+
when 117 # SETITEMS
|
144
|
+
Instructions::SETITEMS
|
145
|
+
when 125 # EMPTY_DICT
|
146
|
+
Instructions::EMPTY_DICT
|
147
|
+
else
|
148
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 1")
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
#
|
153
|
+
# Reads a double precision (64bit) floating point number, in network
|
154
|
+
# byte-order (big-endian).
|
155
|
+
#
|
156
|
+
# @return [Float]
|
157
|
+
# The decoded float.
|
158
|
+
#
|
159
|
+
def read_float64_be
|
160
|
+
@io.read(8).unpack1('G')
|
161
|
+
end
|
162
|
+
|
163
|
+
#
|
164
|
+
# Reads a single 8bit unsigned integer (byte).
|
165
|
+
#
|
166
|
+
# @return [Integer]
|
167
|
+
#
|
168
|
+
def read_uint8
|
169
|
+
@io.getbyte
|
170
|
+
end
|
171
|
+
|
172
|
+
#
|
173
|
+
# Reads an unsigned 32bit integer, in little-endian byte-order.
|
174
|
+
#
|
175
|
+
# @return [Integer]
|
176
|
+
#
|
177
|
+
def read_uint32_le
|
178
|
+
@io.read(4).unpack1('L<')
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|