python-pickle 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +3 -0
- data/.github/workflows/ruby.yml +27 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +14 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +20 -0
- data/README.md +149 -0
- data/Rakefile +13 -0
- data/gemspec.yml +25 -0
- data/lib/python/pickle/byte_array.rb +40 -0
- data/lib/python/pickle/deserializer.rb +595 -0
- data/lib/python/pickle/exceptions.rb +12 -0
- data/lib/python/pickle/instruction.rb +52 -0
- data/lib/python/pickle/instructions/add_items.rb +26 -0
- data/lib/python/pickle/instructions/append.rb +24 -0
- data/lib/python/pickle/instructions/appends.rb +26 -0
- data/lib/python/pickle/instructions/bin_bytes.rb +32 -0
- data/lib/python/pickle/instructions/bin_bytes8.rb +32 -0
- data/lib/python/pickle/instructions/bin_float.rb +29 -0
- data/lib/python/pickle/instructions/bin_get.rb +27 -0
- data/lib/python/pickle/instructions/bin_int1.rb +29 -0
- data/lib/python/pickle/instructions/bin_put.rb +29 -0
- data/lib/python/pickle/instructions/bin_string.rb +32 -0
- data/lib/python/pickle/instructions/bin_unicode.rb +32 -0
- data/lib/python/pickle/instructions/bin_unicode8.rb +32 -0
- data/lib/python/pickle/instructions/build.rb +24 -0
- data/lib/python/pickle/instructions/byte_array8.rb +32 -0
- data/lib/python/pickle/instructions/dict.rb +17 -0
- data/lib/python/pickle/instructions/dup.rb +24 -0
- data/lib/python/pickle/instructions/empty_dict.rb +26 -0
- data/lib/python/pickle/instructions/empty_list.rb +26 -0
- data/lib/python/pickle/instructions/empty_set.rb +26 -0
- data/lib/python/pickle/instructions/empty_tuple.rb +26 -0
- data/lib/python/pickle/instructions/ext1.rb +29 -0
- data/lib/python/pickle/instructions/ext2.rb +29 -0
- data/lib/python/pickle/instructions/ext4.rb +29 -0
- data/lib/python/pickle/instructions/float.rb +24 -0
- data/lib/python/pickle/instructions/frame.rb +29 -0
- data/lib/python/pickle/instructions/frozen_set.rb +26 -0
- data/lib/python/pickle/instructions/get.rb +27 -0
- data/lib/python/pickle/instructions/global.rb +62 -0
- data/lib/python/pickle/instructions/has_length_and_value.rb +58 -0
- data/lib/python/pickle/instructions/has_value.rb +50 -0
- data/lib/python/pickle/instructions/int.rb +24 -0
- data/lib/python/pickle/instructions/list.rb +24 -0
- data/lib/python/pickle/instructions/long.rb +24 -0
- data/lib/python/pickle/instructions/long1.rb +32 -0
- data/lib/python/pickle/instructions/long4.rb +32 -0
- data/lib/python/pickle/instructions/long_bin_get.rb +27 -0
- data/lib/python/pickle/instructions/mark.rb +24 -0
- data/lib/python/pickle/instructions/memoize.rb +26 -0
- data/lib/python/pickle/instructions/new_false.rb +24 -0
- data/lib/python/pickle/instructions/new_obj.rb +26 -0
- data/lib/python/pickle/instructions/new_obj_ex.rb +26 -0
- data/lib/python/pickle/instructions/new_true.rb +24 -0
- data/lib/python/pickle/instructions/next_buffer.rb +26 -0
- data/lib/python/pickle/instructions/none.rb +24 -0
- data/lib/python/pickle/instructions/pop.rb +24 -0
- data/lib/python/pickle/instructions/pop_mark.rb +24 -0
- data/lib/python/pickle/instructions/proto.rb +29 -0
- data/lib/python/pickle/instructions/put.rb +24 -0
- data/lib/python/pickle/instructions/readonly_buffer.rb +26 -0
- data/lib/python/pickle/instructions/reduce.rb +24 -0
- data/lib/python/pickle/instructions/set_item.rb +24 -0
- data/lib/python/pickle/instructions/set_items.rb +26 -0
- data/lib/python/pickle/instructions/short_bin_bytes.rb +32 -0
- data/lib/python/pickle/instructions/short_bin_string.rb +32 -0
- data/lib/python/pickle/instructions/short_bin_unicode.rb +32 -0
- data/lib/python/pickle/instructions/stack_global.rb +26 -0
- data/lib/python/pickle/instructions/stop.rb +24 -0
- data/lib/python/pickle/instructions/string.rb +24 -0
- data/lib/python/pickle/instructions/tuple.rb +24 -0
- data/lib/python/pickle/instructions/tuple1.rb +24 -0
- data/lib/python/pickle/instructions/tuple2.rb +24 -0
- data/lib/python/pickle/instructions/tuple3.rb +24 -0
- data/lib/python/pickle/protocol.rb +56 -0
- data/lib/python/pickle/protocol0.rb +399 -0
- data/lib/python/pickle/protocol1.rb +183 -0
- data/lib/python/pickle/protocol2.rb +229 -0
- data/lib/python/pickle/protocol3.rb +163 -0
- data/lib/python/pickle/protocol4.rb +285 -0
- data/lib/python/pickle/protocol5.rb +218 -0
- data/lib/python/pickle/py_class.rb +75 -0
- data/lib/python/pickle/py_object.rb +141 -0
- data/lib/python/pickle/tuple.rb +19 -0
- data/lib/python/pickle/version.rb +6 -0
- data/lib/python/pickle.rb +226 -0
- data/python-pickle.gemspec +62 -0
- data/spec/byte_array_spec.rb +54 -0
- data/spec/deserializer_spec.rb +1201 -0
- data/spec/fixtures/ascii_str_v3.pkl +0 -0
- data/spec/fixtures/ascii_str_v4.pkl +0 -0
- data/spec/fixtures/ascii_str_v5.pkl +0 -0
- data/spec/fixtures/bin_str_v0.pkl +3 -0
- data/spec/fixtures/bin_str_v1.pkl +0 -0
- data/spec/fixtures/bin_str_v2.pkl +0 -0
- data/spec/fixtures/bin_str_v3.pkl +0 -0
- data/spec/fixtures/bin_str_v4.pkl +0 -0
- data/spec/fixtures/bin_str_v5.pkl +0 -0
- data/spec/fixtures/bytearray_v0.pkl +10 -0
- data/spec/fixtures/bytearray_v1.pkl +0 -0
- data/spec/fixtures/bytearray_v2.pkl +0 -0
- data/spec/fixtures/bytearray_v3.pkl +0 -0
- data/spec/fixtures/bytearray_v4.pkl +0 -0
- data/spec/fixtures/bytearray_v5.pkl +0 -0
- data/spec/fixtures/class_v0.pkl +4 -0
- data/spec/fixtures/class_v1.pkl +0 -0
- data/spec/fixtures/class_v2.pkl +0 -0
- data/spec/fixtures/class_v3.pkl +0 -0
- data/spec/fixtures/class_v4.pkl +0 -0
- data/spec/fixtures/class_v5.pkl +0 -0
- data/spec/fixtures/dict_v0.pkl +6 -0
- data/spec/fixtures/dict_v1.pkl +0 -0
- data/spec/fixtures/dict_v2.pkl +0 -0
- data/spec/fixtures/dict_v3.pkl +0 -0
- data/spec/fixtures/dict_v4.pkl +0 -0
- data/spec/fixtures/dict_v5.pkl +0 -0
- data/spec/fixtures/escaped_str_v0.pkl +3 -0
- data/spec/fixtures/escaped_str_v1.pkl +0 -0
- data/spec/fixtures/escaped_str_v2.pkl +0 -0
- data/spec/fixtures/false_v0.pkl +2 -0
- data/spec/fixtures/false_v1.pkl +2 -0
- data/spec/fixtures/false_v2.pkl +1 -0
- data/spec/fixtures/false_v3.pkl +1 -0
- data/spec/fixtures/false_v4.pkl +1 -0
- data/spec/fixtures/false_v5.pkl +1 -0
- data/spec/fixtures/float_v0.pkl +2 -0
- data/spec/fixtures/float_v1.pkl +1 -0
- data/spec/fixtures/float_v2.pkl +1 -0
- data/spec/fixtures/float_v3.pkl +1 -0
- data/spec/fixtures/float_v4.pkl +0 -0
- data/spec/fixtures/float_v5.pkl +0 -0
- data/spec/fixtures/function_v0.pkl +4 -0
- data/spec/fixtures/function_v1.pkl +0 -0
- data/spec/fixtures/function_v2.pkl +0 -0
- data/spec/fixtures/function_v3.pkl +0 -0
- data/spec/fixtures/function_v4.pkl +0 -0
- data/spec/fixtures/function_v5.pkl +0 -0
- data/spec/fixtures/hex_str_v0.pkl +3 -0
- data/spec/fixtures/hex_str_v1.pkl +0 -0
- data/spec/fixtures/hex_str_v2.pkl +0 -0
- data/spec/fixtures/int_v0.pkl +2 -0
- data/spec/fixtures/int_v1.pkl +1 -0
- data/spec/fixtures/int_v2.pkl +1 -0
- data/spec/fixtures/int_v3.pkl +1 -0
- data/spec/fixtures/int_v4.pkl +1 -0
- data/spec/fixtures/int_v5.pkl +1 -0
- data/spec/fixtures/list_v0.pkl +7 -0
- data/spec/fixtures/list_v1.pkl +0 -0
- data/spec/fixtures/list_v2.pkl +0 -0
- data/spec/fixtures/list_v3.pkl +0 -0
- data/spec/fixtures/list_v4.pkl +0 -0
- data/spec/fixtures/list_v5.pkl +0 -0
- data/spec/fixtures/long_v0.pkl +2 -0
- data/spec/fixtures/long_v1.pkl +2 -0
- data/spec/fixtures/long_v2.pkl +0 -0
- data/spec/fixtures/long_v3.pkl +0 -0
- data/spec/fixtures/long_v4.pkl +0 -0
- data/spec/fixtures/long_v5.pkl +0 -0
- data/spec/fixtures/nested_dict_v0.pkl +12 -0
- data/spec/fixtures/nested_dict_v1.pkl +0 -0
- data/spec/fixtures/nested_dict_v2.pkl +0 -0
- data/spec/fixtures/nested_dict_v3.pkl +0 -0
- data/spec/fixtures/nested_dict_v4.pkl +0 -0
- data/spec/fixtures/nested_dict_v5.pkl +0 -0
- data/spec/fixtures/nested_list_v0.pkl +9 -0
- data/spec/fixtures/nested_list_v1.pkl +0 -0
- data/spec/fixtures/nested_list_v2.pkl +0 -0
- data/spec/fixtures/nested_list_v3.pkl +0 -0
- data/spec/fixtures/nested_list_v4.pkl +0 -0
- data/spec/fixtures/nested_list_v5.pkl +0 -0
- data/spec/fixtures/none_v0.pkl +1 -0
- data/spec/fixtures/none_v1.pkl +1 -0
- data/spec/fixtures/none_v2.pkl +1 -0
- data/spec/fixtures/none_v3.pkl +1 -0
- data/spec/fixtures/none_v4.pkl +1 -0
- data/spec/fixtures/none_v5.pkl +1 -0
- data/spec/fixtures/object_v0.pkl +19 -0
- data/spec/fixtures/object_v1.pkl +0 -0
- data/spec/fixtures/object_v2.pkl +0 -0
- data/spec/fixtures/object_v3.pkl +0 -0
- data/spec/fixtures/object_v4.pkl +0 -0
- data/spec/fixtures/object_v5.pkl +0 -0
- data/spec/fixtures/str_v0.pkl +3 -0
- data/spec/fixtures/str_v1.pkl +0 -0
- data/spec/fixtures/str_v2.pkl +0 -0
- data/spec/fixtures/str_v3.pkl +0 -0
- data/spec/fixtures/str_v4.pkl +0 -0
- data/spec/fixtures/str_v5.pkl +0 -0
- data/spec/fixtures/true_v0.pkl +2 -0
- data/spec/fixtures/true_v1.pkl +2 -0
- data/spec/fixtures/true_v2.pkl +1 -0
- data/spec/fixtures/true_v3.pkl +1 -0
- data/spec/fixtures/true_v4.pkl +1 -0
- data/spec/fixtures/true_v5.pkl +1 -0
- data/spec/fixtures/unicode_str_v0.pkl +3 -0
- data/spec/fixtures/unicode_str_v1.pkl +0 -0
- data/spec/fixtures/unicode_str_v2.pkl +0 -0
- data/spec/fixtures/unicode_str_v3.pkl +0 -0
- data/spec/fixtures/unicode_str_v4.pkl +0 -0
- data/spec/fixtures/unicode_str_v5.pkl +0 -0
- data/spec/generate_pickles2.py +41 -0
- data/spec/generate_pickles3.py +40 -0
- data/spec/integration/load/protocol0_spec.rb +258 -0
- data/spec/integration/load/protocol1_spec.rb +258 -0
- data/spec/integration/load/protocol2_spec.rb +258 -0
- data/spec/integration/load/protocol3_spec.rb +258 -0
- data/spec/integration/load/protocol4_spec.rb +258 -0
- data/spec/integration/load/protocol5_spec.rb +258 -0
- data/spec/integration/parse/protocol0_spec.rb +467 -0
- data/spec/integration/parse/protocol1_spec.rb +459 -0
- data/spec/integration/parse/protocol2_spec.rb +471 -0
- data/spec/integration/parse/protocol3_spec.rb +407 -0
- data/spec/integration/parse/protocol4_spec.rb +439 -0
- data/spec/integration/parse/protocol5_spec.rb +419 -0
- data/spec/pickle_spec.rb +163 -0
- data/spec/protocol0_read_instruction_examples.rb +211 -0
- data/spec/protocol0_spec.rb +445 -0
- data/spec/protocol1_read_instruction_examples.rb +156 -0
- data/spec/protocol1_spec.rb +59 -0
- data/spec/protocol2_read_instruction_examples.rb +135 -0
- data/spec/protocol2_spec.rb +128 -0
- data/spec/protocol3_read_instruction_examples.rb +29 -0
- data/spec/protocol3_spec.rb +32 -0
- data/spec/protocol4_read_instruction_examples.rb +142 -0
- data/spec/protocol4_spec.rb +58 -0
- data/spec/protocol5_spec.rb +68 -0
- data/spec/py_class_spec.rb +62 -0
- data/spec/py_object_spec.rb +149 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/tuple_spec.rb +18 -0
- metadata +325 -0
@@ -0,0 +1,229 @@
|
|
1
|
+
require 'python/pickle/protocol1'
|
2
|
+
require 'python/pickle/instructions/proto'
|
3
|
+
require 'python/pickle/instructions/new_obj'
|
4
|
+
require 'python/pickle/instructions/ext1'
|
5
|
+
require 'python/pickle/instructions/ext2'
|
6
|
+
require 'python/pickle/instructions/ext4'
|
7
|
+
require 'python/pickle/instructions/tuple1'
|
8
|
+
require 'python/pickle/instructions/tuple2'
|
9
|
+
require 'python/pickle/instructions/tuple3'
|
10
|
+
require 'python/pickle/instructions/new_true'
|
11
|
+
require 'python/pickle/instructions/new_false'
|
12
|
+
require 'python/pickle/instructions/long1'
|
13
|
+
require 'python/pickle/instructions/long4'
|
14
|
+
|
15
|
+
module Python
|
16
|
+
module Pickle
|
17
|
+
class Protocol2 < Protocol1
|
18
|
+
|
19
|
+
# Opcodes for Pickle protocol version 2.
|
20
|
+
#
|
21
|
+
# @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
|
22
|
+
OPCODES = Protocol1::OPCODES + Set[
|
23
|
+
128, # PROTO
|
24
|
+
129, # NEWOBJ
|
25
|
+
130, # EXT1
|
26
|
+
131, # EXT2
|
27
|
+
132, # EXT4
|
28
|
+
133, # TUPLE1
|
29
|
+
134, # TUPLE2
|
30
|
+
135, # TUPLE3
|
31
|
+
136, # NEWTRUE
|
32
|
+
137, # NEWFALSE
|
33
|
+
138, # LONG1
|
34
|
+
139 # LONG4
|
35
|
+
]
|
36
|
+
|
37
|
+
#
|
38
|
+
# Reads an instruction from the pickle stream.
|
39
|
+
#
|
40
|
+
# @return [Instruction]
|
41
|
+
# The decoded instruction.
|
42
|
+
#
|
43
|
+
# @raise [InvalidFormat]
|
44
|
+
# The pickle stream could not be parsed.
|
45
|
+
#
|
46
|
+
def read_instruction
|
47
|
+
case (opcode = @io.getbyte)
|
48
|
+
#
|
49
|
+
# Protocol 0 instructions
|
50
|
+
#
|
51
|
+
when 40 # MARK
|
52
|
+
Instructions::MARK
|
53
|
+
when 46 # STOP
|
54
|
+
Instructions::STOP
|
55
|
+
when 48 # POP
|
56
|
+
Instructions::POP
|
57
|
+
when 49 # POP_MARK
|
58
|
+
Instructions::POP_MARK
|
59
|
+
when 50 # DUP
|
60
|
+
Instructions::DUP
|
61
|
+
when 70 # FLOAT
|
62
|
+
Instructions::Float.new(read_float)
|
63
|
+
when 73 # INT
|
64
|
+
Instructions::Int.new(read_int)
|
65
|
+
when 76 # LONG
|
66
|
+
Instructions::Long.new(read_long)
|
67
|
+
when 78 # NONE
|
68
|
+
Instructions::NONE
|
69
|
+
when 82 # REDUCE
|
70
|
+
Instructions::REDUCE
|
71
|
+
when 83 # STRING
|
72
|
+
Instructions::String.new(read_string)
|
73
|
+
when 86 # UNICODE
|
74
|
+
Instructions::String.new(read_unicode_string)
|
75
|
+
when 97 # APPEND
|
76
|
+
Instructions::APPEND
|
77
|
+
when 98 # BUILD
|
78
|
+
Instructions::BUILD
|
79
|
+
when 99 # GLOBAL
|
80
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
81
|
+
when 100 # DICT
|
82
|
+
Instructions::DICT
|
83
|
+
when 103 # GET
|
84
|
+
Instructions::Get.new(read_int)
|
85
|
+
when 108 # LIST
|
86
|
+
Instructions::LIST
|
87
|
+
when 112 # PUT
|
88
|
+
Instructions::Put.new(read_int)
|
89
|
+
when 115 # SETITEM
|
90
|
+
Instructions::SETITEM
|
91
|
+
when 116 # TUPLE
|
92
|
+
Instructions::TUPLE
|
93
|
+
#
|
94
|
+
# Protocol 1 instructions
|
95
|
+
#
|
96
|
+
when 41 # EMPTY_TUPLE
|
97
|
+
Instructions::EMPTY_TUPLE
|
98
|
+
when 71 # BINFLOAT
|
99
|
+
Instructions::BinFloat.new(read_float64_be)
|
100
|
+
when 75 # BININT1
|
101
|
+
Instructions::BinInt1.new(read_uint8)
|
102
|
+
when 84 # BINSTRING
|
103
|
+
length = read_uint32_le
|
104
|
+
string = @io.read(length)
|
105
|
+
|
106
|
+
Instructions::BinString.new(length,string)
|
107
|
+
when 85 # SHORT_BINSTRING
|
108
|
+
length = read_uint8
|
109
|
+
string = @io.read(length)
|
110
|
+
|
111
|
+
Instructions::ShortBinString.new(length,string)
|
112
|
+
when 88 # BINUNICODE
|
113
|
+
length = read_uint32_le
|
114
|
+
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
115
|
+
|
116
|
+
Instructions::BinUnicode.new(length,string)
|
117
|
+
when 93 # EMPTY_LIST
|
118
|
+
Instructions::EMPTY_LIST
|
119
|
+
when 101 # APPENDS
|
120
|
+
Instructions::APPENDS
|
121
|
+
when 104 # BINGET
|
122
|
+
Instructions::BinGet.new(read_uint8)
|
123
|
+
when 106 # LONG_BINGET
|
124
|
+
Instructions::LongBinGet.new(read_uint32_le)
|
125
|
+
when 113 # BINPUT
|
126
|
+
Instructions::BinPut.new(read_uint8)
|
127
|
+
when 117 # SETITEMS
|
128
|
+
Instructions::SETITEMS
|
129
|
+
when 125 # EMPTY_DICT
|
130
|
+
Instructions::EMPTY_DICT
|
131
|
+
#
|
132
|
+
# Protocol 2 instructions
|
133
|
+
#
|
134
|
+
when 128 # PROT
|
135
|
+
Instructions::Proto.new(read_uint8)
|
136
|
+
when 129 # NEWOBJ
|
137
|
+
Instructions::NEWOBJ
|
138
|
+
when 130 # EXT1
|
139
|
+
Instructions::Ext1.new(read_uint8)
|
140
|
+
when 131 # EXT2
|
141
|
+
Instructions::Ext2.new(read_uint16_le)
|
142
|
+
when 132 # EXT4
|
143
|
+
Instructions::Ext4.new(read_uint32_le)
|
144
|
+
when 133 # TUPLE1
|
145
|
+
Instructions::TUPLE1
|
146
|
+
when 134 # TUPLE2
|
147
|
+
Instructions::TUPLE2
|
148
|
+
when 135 # TUPLE3
|
149
|
+
Instructions::TUPLE3
|
150
|
+
when 136 # NEWTRUE
|
151
|
+
Instructions::NEWTRUE
|
152
|
+
when 137 # NEWFALSE
|
153
|
+
Instructions::NEWFALSE
|
154
|
+
when 138 # LONG1
|
155
|
+
length = read_uint8
|
156
|
+
long = read_int_le(length)
|
157
|
+
|
158
|
+
Instructions::Long1.new(length,long)
|
159
|
+
when 139 # LONG4
|
160
|
+
length = read_uint32_le
|
161
|
+
long = read_int_le(length)
|
162
|
+
|
163
|
+
Instructions::Long4.new(length,long)
|
164
|
+
else
|
165
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 2")
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
#
|
170
|
+
# Reads an unsigned 16bit integer in little-endian byte-order.
|
171
|
+
#
|
172
|
+
# @return [Integer]
|
173
|
+
# The decoded integer.
|
174
|
+
#
|
175
|
+
def read_uint16_le
|
176
|
+
@io.read(2).unpack1('S<')
|
177
|
+
end
|
178
|
+
|
179
|
+
#
|
180
|
+
# Reads and unpacks a signed integer of arbitrary length.
|
181
|
+
#
|
182
|
+
# @param [Integer] length
|
183
|
+
# The number of bytes to read.
|
184
|
+
#
|
185
|
+
# @return [Integer]
|
186
|
+
# The decoded long integer.
|
187
|
+
#
|
188
|
+
def read_int_le(length)
|
189
|
+
data = @io.read(length)
|
190
|
+
|
191
|
+
if data.bytesize < length
|
192
|
+
raise(InvalidFormat,"premature end of string")
|
193
|
+
end
|
194
|
+
|
195
|
+
return unpack_int_le(data)
|
196
|
+
end
|
197
|
+
|
198
|
+
#
|
199
|
+
# Decodes a packed twos-complement long value of arbitrary length.
|
200
|
+
#
|
201
|
+
# @param [String] data
|
202
|
+
# The packed long to decode.
|
203
|
+
#
|
204
|
+
# @return [Integer]
|
205
|
+
# The unpacked long.
|
206
|
+
#
|
207
|
+
def unpack_int_le(data)
|
208
|
+
return 0 if data.empty?
|
209
|
+
|
210
|
+
long = 0
|
211
|
+
shift = 0
|
212
|
+
|
213
|
+
data.each_byte do |b|
|
214
|
+
long |= b << shift
|
215
|
+
shift += 8
|
216
|
+
end
|
217
|
+
|
218
|
+
max_signed = (1 << (shift-1))
|
219
|
+
|
220
|
+
if long >= max_signed
|
221
|
+
long -= (1 << shift)
|
222
|
+
end
|
223
|
+
|
224
|
+
return long
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'python/pickle/protocol2'
|
2
|
+
require 'python/pickle/instructions/bin_bytes'
|
3
|
+
require 'python/pickle/instructions/short_bin_bytes'
|
4
|
+
|
5
|
+
module Python
|
6
|
+
module Pickle
|
7
|
+
class Protocol3 < Protocol2
|
8
|
+
# Opcodes for Pickle protocol version 2.
|
9
|
+
#
|
10
|
+
# @see http://formats.kaitai.io/python_pickle/ruby.html
|
11
|
+
OPCODES = Protocol2::OPCODES + Set[
|
12
|
+
66, # BINBYTES
|
13
|
+
67 # SHORT_BINBYTES
|
14
|
+
]
|
15
|
+
|
16
|
+
#
|
17
|
+
# Reads an instruction from the pickle stream.
|
18
|
+
#
|
19
|
+
# @return [Instruction]
|
20
|
+
# The decoded instruction.
|
21
|
+
#
|
22
|
+
# @raise [InvalidFormat]
|
23
|
+
# The pickle stream could not be parsed.
|
24
|
+
#
|
25
|
+
def read_instruction
|
26
|
+
case (opcode = @io.getbyte)
|
27
|
+
#
|
28
|
+
# Protocol 0 instructions
|
29
|
+
#
|
30
|
+
when 40 # MARK
|
31
|
+
Instructions::MARK
|
32
|
+
when 46 # STOP
|
33
|
+
Instructions::STOP
|
34
|
+
when 48 # POP
|
35
|
+
Instructions::POP
|
36
|
+
when 49 # POP_MARK
|
37
|
+
Instructions::POP_MARK
|
38
|
+
when 50 # DUP
|
39
|
+
Instructions::DUP
|
40
|
+
when 70 # FLOAT
|
41
|
+
Instructions::Float.new(read_float)
|
42
|
+
when 73 # INT
|
43
|
+
Instructions::Int.new(read_int)
|
44
|
+
when 76 # LONG
|
45
|
+
Instructions::Long.new(read_long)
|
46
|
+
when 78 # NONE
|
47
|
+
Instructions::NONE
|
48
|
+
when 82 # REDUCE
|
49
|
+
Instructions::REDUCE
|
50
|
+
when 83 # STRING
|
51
|
+
Instructions::String.new(read_string)
|
52
|
+
when 86 # UNICODE
|
53
|
+
Instructions::String.new(read_unicode_string)
|
54
|
+
when 97 # APPEND
|
55
|
+
Instructions::APPEND
|
56
|
+
when 98 # BUILD
|
57
|
+
Instructions::BUILD
|
58
|
+
when 99 # GLOBAL
|
59
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
60
|
+
when 100 # DICT
|
61
|
+
Instructions::DICT
|
62
|
+
when 103 # GET
|
63
|
+
Instructions::Get.new(read_int)
|
64
|
+
when 108 # LIST
|
65
|
+
Instructions::LIST
|
66
|
+
when 112 # PUT
|
67
|
+
Instructions::Put.new(read_int)
|
68
|
+
when 115 # SETITEM
|
69
|
+
Instructions::SETITEM
|
70
|
+
when 116 # TUPLE
|
71
|
+
Instructions::TUPLE
|
72
|
+
#
|
73
|
+
# Protocol 1 instructions
|
74
|
+
#
|
75
|
+
when 41 # EMPTY_TUPLE
|
76
|
+
Instructions::EMPTY_TUPLE
|
77
|
+
when 71 # BINFLOAT
|
78
|
+
Instructions::BinFloat.new(read_float64_be)
|
79
|
+
when 75 # BININT1
|
80
|
+
Instructions::BinInt1.new(read_uint8)
|
81
|
+
when 84 # BINSTRING
|
82
|
+
length = read_uint32_le
|
83
|
+
string = @io.read(length)
|
84
|
+
|
85
|
+
Instructions::BinString.new(length,string)
|
86
|
+
when 85 # SHORT_BINSTRING
|
87
|
+
length = read_uint8
|
88
|
+
string = @io.read(length)
|
89
|
+
|
90
|
+
Instructions::ShortBinString.new(length,string)
|
91
|
+
when 88 # BINUNICODE
|
92
|
+
length = read_uint32_le
|
93
|
+
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
94
|
+
|
95
|
+
Instructions::BinUnicode.new(length,string)
|
96
|
+
when 93 # EMPTY_LIST
|
97
|
+
Instructions::EMPTY_LIST
|
98
|
+
when 101 # APPENDS
|
99
|
+
Instructions::APPENDS
|
100
|
+
when 104 # BINGET
|
101
|
+
Instructions::BinGet.new(read_uint8)
|
102
|
+
when 106 # LONG_BINGET
|
103
|
+
Instructions::LongBinGet.new(read_uint32_le)
|
104
|
+
when 113 # BINPUT
|
105
|
+
Instructions::BinPut.new(read_uint8)
|
106
|
+
when 117 # SETITEMS
|
107
|
+
Instructions::SETITEMS
|
108
|
+
when 125 # EMPTY_DICT
|
109
|
+
Instructions::EMPTY_DICT
|
110
|
+
#
|
111
|
+
# Protocol 2 instructions
|
112
|
+
#
|
113
|
+
when 128 # PROT
|
114
|
+
Instructions::Proto.new(read_uint8)
|
115
|
+
when 129 # NEWOBJ
|
116
|
+
Instructions::NEWOBJ
|
117
|
+
when 130 # EXT1
|
118
|
+
Instructions::Ext1.new(read_uint8)
|
119
|
+
when 131 # EXT2
|
120
|
+
Instructions::Ext2.new(read_uint16_le)
|
121
|
+
when 132 # EXT4
|
122
|
+
Instructions::Ext4.new(read_uint32_le)
|
123
|
+
when 133 # TUPLE1
|
124
|
+
Instructions::TUPLE1
|
125
|
+
when 134 # TUPLE2
|
126
|
+
Instructions::TUPLE2
|
127
|
+
when 135 # TUPLE3
|
128
|
+
Instructions::TUPLE3
|
129
|
+
when 136 # NEWTRUE
|
130
|
+
Instructions::NEWTRUE
|
131
|
+
when 137 # NEWFALSE
|
132
|
+
Instructions::NEWFALSE
|
133
|
+
when 138 # LONG1
|
134
|
+
length = read_uint8
|
135
|
+
long = read_int_le(length)
|
136
|
+
|
137
|
+
Instructions::Long1.new(length,long)
|
138
|
+
when 139 # LONG4
|
139
|
+
length = read_uint32_le
|
140
|
+
long = read_int_le(length)
|
141
|
+
|
142
|
+
Instructions::Long4.new(length,long)
|
143
|
+
#
|
144
|
+
# Protocol 3 instructions
|
145
|
+
#
|
146
|
+
when 66 # BINBYTES
|
147
|
+
length = read_uint32_le
|
148
|
+
bytes = @io.read(length)
|
149
|
+
|
150
|
+
Instructions::BinBytes.new(length,bytes)
|
151
|
+
when 67 # SHORT_BINBYTES
|
152
|
+
length = read_uint8
|
153
|
+
bytes = @io.read(length)
|
154
|
+
|
155
|
+
Instructions::ShortBinBytes.new(length,bytes)
|
156
|
+
else
|
157
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 3")
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,285 @@
|
|
1
|
+
require 'python/pickle/protocol3'
|
2
|
+
require 'python/pickle/instructions/short_bin_unicode'
|
3
|
+
require 'python/pickle/instructions/bin_unicode8'
|
4
|
+
require 'python/pickle/instructions/bin_bytes8'
|
5
|
+
require 'python/pickle/instructions/empty_set'
|
6
|
+
require 'python/pickle/instructions/add_items'
|
7
|
+
require 'python/pickle/instructions/frozen_set'
|
8
|
+
require 'python/pickle/instructions/new_obj_ex'
|
9
|
+
require 'python/pickle/instructions/stack_global'
|
10
|
+
require 'python/pickle/instructions/memoize'
|
11
|
+
require 'python/pickle/instructions/frame'
|
12
|
+
|
13
|
+
module Python
|
14
|
+
module Pickle
|
15
|
+
#
|
16
|
+
# Implements Python Pickle protocol 4.
|
17
|
+
#
|
18
|
+
class Protocol4 < Protocol3
|
19
|
+
|
20
|
+
#
|
21
|
+
# Initializes the protocol 4 reader/writer.
|
22
|
+
#
|
23
|
+
def initialize(io)
|
24
|
+
super(io)
|
25
|
+
|
26
|
+
@io_stack = []
|
27
|
+
end
|
28
|
+
|
29
|
+
# Opcodes for Pickle protocol 4.
|
30
|
+
#
|
31
|
+
# @see https://peps.python.org/pep-3154/
|
32
|
+
OPCODES = Protocol3::OPCODES + Set[
|
33
|
+
140, # SHORT_BINUNICODE
|
34
|
+
141, # BINUNICODE8
|
35
|
+
142, # BINBYTES8
|
36
|
+
143, # EMPTY_SET
|
37
|
+
144, # ADDITEMS
|
38
|
+
145, # FROZENSET
|
39
|
+
146, # NEWOBJ_EX
|
40
|
+
147, # STACK_GLOBAL
|
41
|
+
148, # MEMOIZE
|
42
|
+
149 # FRAME
|
43
|
+
]
|
44
|
+
|
45
|
+
#
|
46
|
+
# Reads an instruction from the pickle stream.
|
47
|
+
#
|
48
|
+
# @return [Instruction]
|
49
|
+
# The decoded instruction.
|
50
|
+
#
|
51
|
+
# @raise [InvalidFormat]
|
52
|
+
# The pickle stream could not be parsed.
|
53
|
+
#
|
54
|
+
def read_instruction
|
55
|
+
case (opcode = @io.getbyte)
|
56
|
+
#
|
57
|
+
# Protocol 0 instructions
|
58
|
+
#
|
59
|
+
when 40 # MARK
|
60
|
+
Instructions::MARK
|
61
|
+
when 46 # STOP
|
62
|
+
Instructions::STOP
|
63
|
+
when 48 # POP
|
64
|
+
Instructions::POP
|
65
|
+
when 49 # POP_MARK
|
66
|
+
Instructions::POP_MARK
|
67
|
+
when 50 # DUP
|
68
|
+
Instructions::DUP
|
69
|
+
when 70 # FLOAT
|
70
|
+
Instructions::Float.new(read_float)
|
71
|
+
when 73 # INT
|
72
|
+
Instructions::Int.new(read_int)
|
73
|
+
when 76 # LONG
|
74
|
+
Instructions::Long.new(read_long)
|
75
|
+
when 78 # NONE
|
76
|
+
Instructions::NONE
|
77
|
+
when 82 # REDUCE
|
78
|
+
Instructions::REDUCE
|
79
|
+
when 83 # STRING
|
80
|
+
Instructions::String.new(read_string)
|
81
|
+
when 86 # UNICODE
|
82
|
+
Instructions::String.new(read_unicode_string)
|
83
|
+
when 97 # APPEND
|
84
|
+
Instructions::APPEND
|
85
|
+
when 98 # BUILD
|
86
|
+
Instructions::BUILD
|
87
|
+
when 99 # GLOBAL
|
88
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
89
|
+
when 100 # DICT
|
90
|
+
Instructions::DICT
|
91
|
+
when 103 # GET
|
92
|
+
Instructions::Get.new(read_int)
|
93
|
+
when 108 # LIST
|
94
|
+
Instructions::LIST
|
95
|
+
when 112 # PUT
|
96
|
+
Instructions::Put.new(read_int)
|
97
|
+
when 115 # SETITEM
|
98
|
+
Instructions::SETITEM
|
99
|
+
when 116 # TUPLE
|
100
|
+
Instructions::TUPLE
|
101
|
+
#
|
102
|
+
# Protocol 1 instructions
|
103
|
+
#
|
104
|
+
when 41 # EMPTY_TUPLE
|
105
|
+
Instructions::EMPTY_TUPLE
|
106
|
+
when 71 # BINFLOAT
|
107
|
+
Instructions::BinFloat.new(read_float64_be)
|
108
|
+
when 75 # BININT1
|
109
|
+
Instructions::BinInt1.new(read_uint8)
|
110
|
+
when 84 # BINSTRING
|
111
|
+
length = read_uint32_le
|
112
|
+
string = @io.read(length)
|
113
|
+
|
114
|
+
Instructions::BinString.new(length,string)
|
115
|
+
when 85 # SHORT_BINSTRING
|
116
|
+
length = read_uint8
|
117
|
+
string = @io.read(length)
|
118
|
+
|
119
|
+
Instructions::ShortBinString.new(length,string)
|
120
|
+
when 88 # BINUNICODE
|
121
|
+
length = read_uint32_le
|
122
|
+
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
123
|
+
|
124
|
+
Instructions::BinUnicode.new(length,string)
|
125
|
+
when 93 # EMPTY_LIST
|
126
|
+
Instructions::EMPTY_LIST
|
127
|
+
when 101 # APPENDS
|
128
|
+
Instructions::APPENDS
|
129
|
+
when 104 # BINGET
|
130
|
+
Instructions::BinGet.new(read_uint8)
|
131
|
+
when 106 # LONG_BINGET
|
132
|
+
Instructions::LongBinGet.new(read_uint32_le)
|
133
|
+
when 113 # BINPUT
|
134
|
+
Instructions::BinPut.new(read_uint8)
|
135
|
+
when 117 # SETITEMS
|
136
|
+
Instructions::SETITEMS
|
137
|
+
when 125 # EMPTY_DICT
|
138
|
+
Instructions::EMPTY_DICT
|
139
|
+
#
|
140
|
+
# Protocol 2 instructions
|
141
|
+
#
|
142
|
+
when 128 # PROT
|
143
|
+
Instructions::Proto.new(read_uint8)
|
144
|
+
when 129 # NEWOBJ
|
145
|
+
Instructions::NEWOBJ
|
146
|
+
when 130 # EXT1
|
147
|
+
Instructions::Ext1.new(read_uint8)
|
148
|
+
when 131 # EXT2
|
149
|
+
Instructions::Ext2.new(read_uint16_le)
|
150
|
+
when 132 # EXT4
|
151
|
+
Instructions::Ext4.new(read_uint32_le)
|
152
|
+
when 133 # TUPLE1
|
153
|
+
Instructions::TUPLE1
|
154
|
+
when 134 # TUPLE2
|
155
|
+
Instructions::TUPLE2
|
156
|
+
when 135 # TUPLE3
|
157
|
+
Instructions::TUPLE3
|
158
|
+
when 136 # NEWTRUE
|
159
|
+
Instructions::NEWTRUE
|
160
|
+
when 137 # NEWFALSE
|
161
|
+
Instructions::NEWFALSE
|
162
|
+
when 138 # LONG1
|
163
|
+
length = read_uint8
|
164
|
+
long = read_int_le(length)
|
165
|
+
|
166
|
+
Instructions::Long1.new(length,long)
|
167
|
+
when 139 # LONG4
|
168
|
+
length = read_uint32_le
|
169
|
+
long = read_int_le(length)
|
170
|
+
|
171
|
+
Instructions::Long4.new(length,long)
|
172
|
+
#
|
173
|
+
# Protocol 3 instructions
|
174
|
+
#
|
175
|
+
when 66 # BINBYTES
|
176
|
+
length = read_uint32_le
|
177
|
+
bytes = @io.read(length)
|
178
|
+
|
179
|
+
Instructions::BinBytes.new(length,bytes)
|
180
|
+
when 67 # SHORT_BINBYTES
|
181
|
+
length = read_uint8
|
182
|
+
bytes = @io.read(length)
|
183
|
+
|
184
|
+
Instructions::ShortBinBytes.new(length,bytes)
|
185
|
+
#
|
186
|
+
# Protocol 4 instructions
|
187
|
+
#
|
188
|
+
when 140 # SHORT_BINUNICODE
|
189
|
+
length = read_uint8
|
190
|
+
string = read_utf8_string(length)
|
191
|
+
|
192
|
+
Instructions::ShortBinUnicode.new(length,string)
|
193
|
+
when 141 # BINUNICODE8
|
194
|
+
length = read_uint64_le
|
195
|
+
string = read_utf8_string(length)
|
196
|
+
|
197
|
+
Instructions::BinUnicode8.new(length,string)
|
198
|
+
when 142 # BINBYTES8
|
199
|
+
length = read_uint64_le
|
200
|
+
bytes = @io.read(length)
|
201
|
+
|
202
|
+
Instructions::BinBytes8.new(length,bytes)
|
203
|
+
when 143 # EMPTY_SET
|
204
|
+
Instructions::EMPTY_SET
|
205
|
+
when 144 # ADDITEMS
|
206
|
+
Instructions::ADDITEMS
|
207
|
+
when 145 # FROZENSET
|
208
|
+
Instructions::FROZENSET
|
209
|
+
when 146 # NEWOBJ_EX
|
210
|
+
Instructions::NEWOBJ_EX
|
211
|
+
when 147 # STACK_GLOBAL
|
212
|
+
Instructions::STACK_GLOBAL
|
213
|
+
when 148 # MEMOIZE
|
214
|
+
Instructions::MEMOIZE
|
215
|
+
when 149 # FRAME
|
216
|
+
length = read_uint64_le
|
217
|
+
|
218
|
+
enter_frame(read_frame(length))
|
219
|
+
|
220
|
+
Instructions::Frame.new(length)
|
221
|
+
else
|
222
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 4")
|
223
|
+
end
|
224
|
+
ensure
|
225
|
+
if @io.eof? && !@io_stack.empty?
|
226
|
+
leave_frame
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
#
|
231
|
+
# Reads an unsigned 64bit integer, in little-endian byte-order.
|
232
|
+
#
|
233
|
+
# @return [Integer]
|
234
|
+
#
|
235
|
+
def read_uint64_le
|
236
|
+
@io.read(8).unpack1('Q<')
|
237
|
+
end
|
238
|
+
|
239
|
+
#
|
240
|
+
# Reads a UTF-8 string of the desired length.
|
241
|
+
#
|
242
|
+
# @param [Integer] length
|
243
|
+
# The desired length to read.
|
244
|
+
#
|
245
|
+
# @return [String]
|
246
|
+
# The read UTF-8 string.
|
247
|
+
#
|
248
|
+
def read_utf8_string(length)
|
249
|
+
@io.read(length).force_encoding(Encoding::UTF_8)
|
250
|
+
end
|
251
|
+
|
252
|
+
#
|
253
|
+
# Reads a data frame of the given length.
|
254
|
+
#
|
255
|
+
# @param [Integer] length
|
256
|
+
# The desired length of the frame.
|
257
|
+
#
|
258
|
+
# @return [String]
|
259
|
+
# The read data frame.
|
260
|
+
#
|
261
|
+
def read_frame(length)
|
262
|
+
@io.read(length)
|
263
|
+
end
|
264
|
+
|
265
|
+
#
|
266
|
+
# Enters a new data frame.
|
267
|
+
#
|
268
|
+
# @param [String] frame
|
269
|
+
# The contents of the data frame.
|
270
|
+
#
|
271
|
+
def enter_frame(frame)
|
272
|
+
@io_stack.push(@io)
|
273
|
+
@io = StringIO.new(frame)
|
274
|
+
end
|
275
|
+
|
276
|
+
#
|
277
|
+
# Leaves a data frame and restores {#io}.
|
278
|
+
#
|
279
|
+
def leave_frame
|
280
|
+
@io = @io_stack.pop
|
281
|
+
end
|
282
|
+
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|