python-pickle 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +3 -0
- data/.github/workflows/ruby.yml +27 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +14 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +20 -0
- data/README.md +149 -0
- data/Rakefile +13 -0
- data/gemspec.yml +25 -0
- data/lib/python/pickle/byte_array.rb +40 -0
- data/lib/python/pickle/deserializer.rb +595 -0
- data/lib/python/pickle/exceptions.rb +12 -0
- data/lib/python/pickle/instruction.rb +52 -0
- data/lib/python/pickle/instructions/add_items.rb +26 -0
- data/lib/python/pickle/instructions/append.rb +24 -0
- data/lib/python/pickle/instructions/appends.rb +26 -0
- data/lib/python/pickle/instructions/bin_bytes.rb +32 -0
- data/lib/python/pickle/instructions/bin_bytes8.rb +32 -0
- data/lib/python/pickle/instructions/bin_float.rb +29 -0
- data/lib/python/pickle/instructions/bin_get.rb +27 -0
- data/lib/python/pickle/instructions/bin_int1.rb +29 -0
- data/lib/python/pickle/instructions/bin_put.rb +29 -0
- data/lib/python/pickle/instructions/bin_string.rb +32 -0
- data/lib/python/pickle/instructions/bin_unicode.rb +32 -0
- data/lib/python/pickle/instructions/bin_unicode8.rb +32 -0
- data/lib/python/pickle/instructions/build.rb +24 -0
- data/lib/python/pickle/instructions/byte_array8.rb +32 -0
- data/lib/python/pickle/instructions/dict.rb +17 -0
- data/lib/python/pickle/instructions/dup.rb +24 -0
- data/lib/python/pickle/instructions/empty_dict.rb +26 -0
- data/lib/python/pickle/instructions/empty_list.rb +26 -0
- data/lib/python/pickle/instructions/empty_set.rb +26 -0
- data/lib/python/pickle/instructions/empty_tuple.rb +26 -0
- data/lib/python/pickle/instructions/ext1.rb +29 -0
- data/lib/python/pickle/instructions/ext2.rb +29 -0
- data/lib/python/pickle/instructions/ext4.rb +29 -0
- data/lib/python/pickle/instructions/float.rb +24 -0
- data/lib/python/pickle/instructions/frame.rb +29 -0
- data/lib/python/pickle/instructions/frozen_set.rb +26 -0
- data/lib/python/pickle/instructions/get.rb +27 -0
- data/lib/python/pickle/instructions/global.rb +62 -0
- data/lib/python/pickle/instructions/has_length_and_value.rb +58 -0
- data/lib/python/pickle/instructions/has_value.rb +50 -0
- data/lib/python/pickle/instructions/int.rb +24 -0
- data/lib/python/pickle/instructions/list.rb +24 -0
- data/lib/python/pickle/instructions/long.rb +24 -0
- data/lib/python/pickle/instructions/long1.rb +32 -0
- data/lib/python/pickle/instructions/long4.rb +32 -0
- data/lib/python/pickle/instructions/long_bin_get.rb +27 -0
- data/lib/python/pickle/instructions/mark.rb +24 -0
- data/lib/python/pickle/instructions/memoize.rb +26 -0
- data/lib/python/pickle/instructions/new_false.rb +24 -0
- data/lib/python/pickle/instructions/new_obj.rb +26 -0
- data/lib/python/pickle/instructions/new_obj_ex.rb +26 -0
- data/lib/python/pickle/instructions/new_true.rb +24 -0
- data/lib/python/pickle/instructions/next_buffer.rb +26 -0
- data/lib/python/pickle/instructions/none.rb +24 -0
- data/lib/python/pickle/instructions/pop.rb +24 -0
- data/lib/python/pickle/instructions/pop_mark.rb +24 -0
- data/lib/python/pickle/instructions/proto.rb +29 -0
- data/lib/python/pickle/instructions/put.rb +24 -0
- data/lib/python/pickle/instructions/readonly_buffer.rb +26 -0
- data/lib/python/pickle/instructions/reduce.rb +24 -0
- data/lib/python/pickle/instructions/set_item.rb +24 -0
- data/lib/python/pickle/instructions/set_items.rb +26 -0
- data/lib/python/pickle/instructions/short_bin_bytes.rb +32 -0
- data/lib/python/pickle/instructions/short_bin_string.rb +32 -0
- data/lib/python/pickle/instructions/short_bin_unicode.rb +32 -0
- data/lib/python/pickle/instructions/stack_global.rb +26 -0
- data/lib/python/pickle/instructions/stop.rb +24 -0
- data/lib/python/pickle/instructions/string.rb +24 -0
- data/lib/python/pickle/instructions/tuple.rb +24 -0
- data/lib/python/pickle/instructions/tuple1.rb +24 -0
- data/lib/python/pickle/instructions/tuple2.rb +24 -0
- data/lib/python/pickle/instructions/tuple3.rb +24 -0
- data/lib/python/pickle/protocol.rb +56 -0
- data/lib/python/pickle/protocol0.rb +399 -0
- data/lib/python/pickle/protocol1.rb +183 -0
- data/lib/python/pickle/protocol2.rb +229 -0
- data/lib/python/pickle/protocol3.rb +163 -0
- data/lib/python/pickle/protocol4.rb +285 -0
- data/lib/python/pickle/protocol5.rb +218 -0
- data/lib/python/pickle/py_class.rb +75 -0
- data/lib/python/pickle/py_object.rb +141 -0
- data/lib/python/pickle/tuple.rb +19 -0
- data/lib/python/pickle/version.rb +6 -0
- data/lib/python/pickle.rb +226 -0
- data/python-pickle.gemspec +62 -0
- data/spec/byte_array_spec.rb +54 -0
- data/spec/deserializer_spec.rb +1201 -0
- data/spec/fixtures/ascii_str_v3.pkl +0 -0
- data/spec/fixtures/ascii_str_v4.pkl +0 -0
- data/spec/fixtures/ascii_str_v5.pkl +0 -0
- data/spec/fixtures/bin_str_v0.pkl +3 -0
- data/spec/fixtures/bin_str_v1.pkl +0 -0
- data/spec/fixtures/bin_str_v2.pkl +0 -0
- data/spec/fixtures/bin_str_v3.pkl +0 -0
- data/spec/fixtures/bin_str_v4.pkl +0 -0
- data/spec/fixtures/bin_str_v5.pkl +0 -0
- data/spec/fixtures/bytearray_v0.pkl +10 -0
- data/spec/fixtures/bytearray_v1.pkl +0 -0
- data/spec/fixtures/bytearray_v2.pkl +0 -0
- data/spec/fixtures/bytearray_v3.pkl +0 -0
- data/spec/fixtures/bytearray_v4.pkl +0 -0
- data/spec/fixtures/bytearray_v5.pkl +0 -0
- data/spec/fixtures/class_v0.pkl +4 -0
- data/spec/fixtures/class_v1.pkl +0 -0
- data/spec/fixtures/class_v2.pkl +0 -0
- data/spec/fixtures/class_v3.pkl +0 -0
- data/spec/fixtures/class_v4.pkl +0 -0
- data/spec/fixtures/class_v5.pkl +0 -0
- data/spec/fixtures/dict_v0.pkl +6 -0
- data/spec/fixtures/dict_v1.pkl +0 -0
- data/spec/fixtures/dict_v2.pkl +0 -0
- data/spec/fixtures/dict_v3.pkl +0 -0
- data/spec/fixtures/dict_v4.pkl +0 -0
- data/spec/fixtures/dict_v5.pkl +0 -0
- data/spec/fixtures/escaped_str_v0.pkl +3 -0
- data/spec/fixtures/escaped_str_v1.pkl +0 -0
- data/spec/fixtures/escaped_str_v2.pkl +0 -0
- data/spec/fixtures/false_v0.pkl +2 -0
- data/spec/fixtures/false_v1.pkl +2 -0
- data/spec/fixtures/false_v2.pkl +1 -0
- data/spec/fixtures/false_v3.pkl +1 -0
- data/spec/fixtures/false_v4.pkl +1 -0
- data/spec/fixtures/false_v5.pkl +1 -0
- data/spec/fixtures/float_v0.pkl +2 -0
- data/spec/fixtures/float_v1.pkl +1 -0
- data/spec/fixtures/float_v2.pkl +1 -0
- data/spec/fixtures/float_v3.pkl +1 -0
- data/spec/fixtures/float_v4.pkl +0 -0
- data/spec/fixtures/float_v5.pkl +0 -0
- data/spec/fixtures/function_v0.pkl +4 -0
- data/spec/fixtures/function_v1.pkl +0 -0
- data/spec/fixtures/function_v2.pkl +0 -0
- data/spec/fixtures/function_v3.pkl +0 -0
- data/spec/fixtures/function_v4.pkl +0 -0
- data/spec/fixtures/function_v5.pkl +0 -0
- data/spec/fixtures/hex_str_v0.pkl +3 -0
- data/spec/fixtures/hex_str_v1.pkl +0 -0
- data/spec/fixtures/hex_str_v2.pkl +0 -0
- data/spec/fixtures/int_v0.pkl +2 -0
- data/spec/fixtures/int_v1.pkl +1 -0
- data/spec/fixtures/int_v2.pkl +1 -0
- data/spec/fixtures/int_v3.pkl +1 -0
- data/spec/fixtures/int_v4.pkl +1 -0
- data/spec/fixtures/int_v5.pkl +1 -0
- data/spec/fixtures/list_v0.pkl +7 -0
- data/spec/fixtures/list_v1.pkl +0 -0
- data/spec/fixtures/list_v2.pkl +0 -0
- data/spec/fixtures/list_v3.pkl +0 -0
- data/spec/fixtures/list_v4.pkl +0 -0
- data/spec/fixtures/list_v5.pkl +0 -0
- data/spec/fixtures/long_v0.pkl +2 -0
- data/spec/fixtures/long_v1.pkl +2 -0
- data/spec/fixtures/long_v2.pkl +0 -0
- data/spec/fixtures/long_v3.pkl +0 -0
- data/spec/fixtures/long_v4.pkl +0 -0
- data/spec/fixtures/long_v5.pkl +0 -0
- data/spec/fixtures/nested_dict_v0.pkl +12 -0
- data/spec/fixtures/nested_dict_v1.pkl +0 -0
- data/spec/fixtures/nested_dict_v2.pkl +0 -0
- data/spec/fixtures/nested_dict_v3.pkl +0 -0
- data/spec/fixtures/nested_dict_v4.pkl +0 -0
- data/spec/fixtures/nested_dict_v5.pkl +0 -0
- data/spec/fixtures/nested_list_v0.pkl +9 -0
- data/spec/fixtures/nested_list_v1.pkl +0 -0
- data/spec/fixtures/nested_list_v2.pkl +0 -0
- data/spec/fixtures/nested_list_v3.pkl +0 -0
- data/spec/fixtures/nested_list_v4.pkl +0 -0
- data/spec/fixtures/nested_list_v5.pkl +0 -0
- data/spec/fixtures/none_v0.pkl +1 -0
- data/spec/fixtures/none_v1.pkl +1 -0
- data/spec/fixtures/none_v2.pkl +1 -0
- data/spec/fixtures/none_v3.pkl +1 -0
- data/spec/fixtures/none_v4.pkl +1 -0
- data/spec/fixtures/none_v5.pkl +1 -0
- data/spec/fixtures/object_v0.pkl +19 -0
- data/spec/fixtures/object_v1.pkl +0 -0
- data/spec/fixtures/object_v2.pkl +0 -0
- data/spec/fixtures/object_v3.pkl +0 -0
- data/spec/fixtures/object_v4.pkl +0 -0
- data/spec/fixtures/object_v5.pkl +0 -0
- data/spec/fixtures/str_v0.pkl +3 -0
- data/spec/fixtures/str_v1.pkl +0 -0
- data/spec/fixtures/str_v2.pkl +0 -0
- data/spec/fixtures/str_v3.pkl +0 -0
- data/spec/fixtures/str_v4.pkl +0 -0
- data/spec/fixtures/str_v5.pkl +0 -0
- data/spec/fixtures/true_v0.pkl +2 -0
- data/spec/fixtures/true_v1.pkl +2 -0
- data/spec/fixtures/true_v2.pkl +1 -0
- data/spec/fixtures/true_v3.pkl +1 -0
- data/spec/fixtures/true_v4.pkl +1 -0
- data/spec/fixtures/true_v5.pkl +1 -0
- data/spec/fixtures/unicode_str_v0.pkl +3 -0
- data/spec/fixtures/unicode_str_v1.pkl +0 -0
- data/spec/fixtures/unicode_str_v2.pkl +0 -0
- data/spec/fixtures/unicode_str_v3.pkl +0 -0
- data/spec/fixtures/unicode_str_v4.pkl +0 -0
- data/spec/fixtures/unicode_str_v5.pkl +0 -0
- data/spec/generate_pickles2.py +41 -0
- data/spec/generate_pickles3.py +40 -0
- data/spec/integration/load/protocol0_spec.rb +258 -0
- data/spec/integration/load/protocol1_spec.rb +258 -0
- data/spec/integration/load/protocol2_spec.rb +258 -0
- data/spec/integration/load/protocol3_spec.rb +258 -0
- data/spec/integration/load/protocol4_spec.rb +258 -0
- data/spec/integration/load/protocol5_spec.rb +258 -0
- data/spec/integration/parse/protocol0_spec.rb +467 -0
- data/spec/integration/parse/protocol1_spec.rb +459 -0
- data/spec/integration/parse/protocol2_spec.rb +471 -0
- data/spec/integration/parse/protocol3_spec.rb +407 -0
- data/spec/integration/parse/protocol4_spec.rb +439 -0
- data/spec/integration/parse/protocol5_spec.rb +419 -0
- data/spec/pickle_spec.rb +163 -0
- data/spec/protocol0_read_instruction_examples.rb +211 -0
- data/spec/protocol0_spec.rb +445 -0
- data/spec/protocol1_read_instruction_examples.rb +156 -0
- data/spec/protocol1_spec.rb +59 -0
- data/spec/protocol2_read_instruction_examples.rb +135 -0
- data/spec/protocol2_spec.rb +128 -0
- data/spec/protocol3_read_instruction_examples.rb +29 -0
- data/spec/protocol3_spec.rb +32 -0
- data/spec/protocol4_read_instruction_examples.rb +142 -0
- data/spec/protocol4_spec.rb +58 -0
- data/spec/protocol5_spec.rb +68 -0
- data/spec/py_class_spec.rb +62 -0
- data/spec/py_object_spec.rb +149 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/tuple_spec.rb +18 -0
- metadata +325 -0
@@ -0,0 +1,229 @@
|
|
1
|
+
require 'python/pickle/protocol1'
|
2
|
+
require 'python/pickle/instructions/proto'
|
3
|
+
require 'python/pickle/instructions/new_obj'
|
4
|
+
require 'python/pickle/instructions/ext1'
|
5
|
+
require 'python/pickle/instructions/ext2'
|
6
|
+
require 'python/pickle/instructions/ext4'
|
7
|
+
require 'python/pickle/instructions/tuple1'
|
8
|
+
require 'python/pickle/instructions/tuple2'
|
9
|
+
require 'python/pickle/instructions/tuple3'
|
10
|
+
require 'python/pickle/instructions/new_true'
|
11
|
+
require 'python/pickle/instructions/new_false'
|
12
|
+
require 'python/pickle/instructions/long1'
|
13
|
+
require 'python/pickle/instructions/long4'
|
14
|
+
|
15
|
+
module Python
|
16
|
+
module Pickle
|
17
|
+
class Protocol2 < Protocol1
|
18
|
+
|
19
|
+
# Opcodes for Pickle protocol version 2.
|
20
|
+
#
|
21
|
+
# @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
|
22
|
+
OPCODES = Protocol1::OPCODES + Set[
|
23
|
+
128, # PROTO
|
24
|
+
129, # NEWOBJ
|
25
|
+
130, # EXT1
|
26
|
+
131, # EXT2
|
27
|
+
132, # EXT4
|
28
|
+
133, # TUPLE1
|
29
|
+
134, # TUPLE2
|
30
|
+
135, # TUPLE3
|
31
|
+
136, # NEWTRUE
|
32
|
+
137, # NEWFALSE
|
33
|
+
138, # LONG1
|
34
|
+
139 # LONG4
|
35
|
+
]
|
36
|
+
|
37
|
+
#
|
38
|
+
# Reads an instruction from the pickle stream.
|
39
|
+
#
|
40
|
+
# @return [Instruction]
|
41
|
+
# The decoded instruction.
|
42
|
+
#
|
43
|
+
# @raise [InvalidFormat]
|
44
|
+
# The pickle stream could not be parsed.
|
45
|
+
#
|
46
|
+
def read_instruction
|
47
|
+
case (opcode = @io.getbyte)
|
48
|
+
#
|
49
|
+
# Protocol 0 instructions
|
50
|
+
#
|
51
|
+
when 40 # MARK
|
52
|
+
Instructions::MARK
|
53
|
+
when 46 # STOP
|
54
|
+
Instructions::STOP
|
55
|
+
when 48 # POP
|
56
|
+
Instructions::POP
|
57
|
+
when 49 # POP_MARK
|
58
|
+
Instructions::POP_MARK
|
59
|
+
when 50 # DUP
|
60
|
+
Instructions::DUP
|
61
|
+
when 70 # FLOAT
|
62
|
+
Instructions::Float.new(read_float)
|
63
|
+
when 73 # INT
|
64
|
+
Instructions::Int.new(read_int)
|
65
|
+
when 76 # LONG
|
66
|
+
Instructions::Long.new(read_long)
|
67
|
+
when 78 # NONE
|
68
|
+
Instructions::NONE
|
69
|
+
when 82 # REDUCE
|
70
|
+
Instructions::REDUCE
|
71
|
+
when 83 # STRING
|
72
|
+
Instructions::String.new(read_string)
|
73
|
+
when 86 # UNICODE
|
74
|
+
Instructions::String.new(read_unicode_string)
|
75
|
+
when 97 # APPEND
|
76
|
+
Instructions::APPEND
|
77
|
+
when 98 # BUILD
|
78
|
+
Instructions::BUILD
|
79
|
+
when 99 # GLOBAL
|
80
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
81
|
+
when 100 # DICT
|
82
|
+
Instructions::DICT
|
83
|
+
when 103 # GET
|
84
|
+
Instructions::Get.new(read_int)
|
85
|
+
when 108 # LIST
|
86
|
+
Instructions::LIST
|
87
|
+
when 112 # PUT
|
88
|
+
Instructions::Put.new(read_int)
|
89
|
+
when 115 # SETITEM
|
90
|
+
Instructions::SETITEM
|
91
|
+
when 116 # TUPLE
|
92
|
+
Instructions::TUPLE
|
93
|
+
#
|
94
|
+
# Protocol 1 instructions
|
95
|
+
#
|
96
|
+
when 41 # EMPTY_TUPLE
|
97
|
+
Instructions::EMPTY_TUPLE
|
98
|
+
when 71 # BINFLOAT
|
99
|
+
Instructions::BinFloat.new(read_float64_be)
|
100
|
+
when 75 # BININT1
|
101
|
+
Instructions::BinInt1.new(read_uint8)
|
102
|
+
when 84 # BINSTRING
|
103
|
+
length = read_uint32_le
|
104
|
+
string = @io.read(length)
|
105
|
+
|
106
|
+
Instructions::BinString.new(length,string)
|
107
|
+
when 85 # SHORT_BINSTRING
|
108
|
+
length = read_uint8
|
109
|
+
string = @io.read(length)
|
110
|
+
|
111
|
+
Instructions::ShortBinString.new(length,string)
|
112
|
+
when 88 # BINUNICODE
|
113
|
+
length = read_uint32_le
|
114
|
+
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
115
|
+
|
116
|
+
Instructions::BinUnicode.new(length,string)
|
117
|
+
when 93 # EMPTY_LIST
|
118
|
+
Instructions::EMPTY_LIST
|
119
|
+
when 101 # APPENDS
|
120
|
+
Instructions::APPENDS
|
121
|
+
when 104 # BINGET
|
122
|
+
Instructions::BinGet.new(read_uint8)
|
123
|
+
when 106 # LONG_BINGET
|
124
|
+
Instructions::LongBinGet.new(read_uint32_le)
|
125
|
+
when 113 # BINPUT
|
126
|
+
Instructions::BinPut.new(read_uint8)
|
127
|
+
when 117 # SETITEMS
|
128
|
+
Instructions::SETITEMS
|
129
|
+
when 125 # EMPTY_DICT
|
130
|
+
Instructions::EMPTY_DICT
|
131
|
+
#
|
132
|
+
# Protocol 2 instructions
|
133
|
+
#
|
134
|
+
when 128 # PROT
|
135
|
+
Instructions::Proto.new(read_uint8)
|
136
|
+
when 129 # NEWOBJ
|
137
|
+
Instructions::NEWOBJ
|
138
|
+
when 130 # EXT1
|
139
|
+
Instructions::Ext1.new(read_uint8)
|
140
|
+
when 131 # EXT2
|
141
|
+
Instructions::Ext2.new(read_uint16_le)
|
142
|
+
when 132 # EXT4
|
143
|
+
Instructions::Ext4.new(read_uint32_le)
|
144
|
+
when 133 # TUPLE1
|
145
|
+
Instructions::TUPLE1
|
146
|
+
when 134 # TUPLE2
|
147
|
+
Instructions::TUPLE2
|
148
|
+
when 135 # TUPLE3
|
149
|
+
Instructions::TUPLE3
|
150
|
+
when 136 # NEWTRUE
|
151
|
+
Instructions::NEWTRUE
|
152
|
+
when 137 # NEWFALSE
|
153
|
+
Instructions::NEWFALSE
|
154
|
+
when 138 # LONG1
|
155
|
+
length = read_uint8
|
156
|
+
long = read_int_le(length)
|
157
|
+
|
158
|
+
Instructions::Long1.new(length,long)
|
159
|
+
when 139 # LONG4
|
160
|
+
length = read_uint32_le
|
161
|
+
long = read_int_le(length)
|
162
|
+
|
163
|
+
Instructions::Long4.new(length,long)
|
164
|
+
else
|
165
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 2")
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
#
|
170
|
+
# Reads an unsigned 16bit integer in little-endian byte-order.
|
171
|
+
#
|
172
|
+
# @return [Integer]
|
173
|
+
# The decoded integer.
|
174
|
+
#
|
175
|
+
def read_uint16_le
|
176
|
+
@io.read(2).unpack1('S<')
|
177
|
+
end
|
178
|
+
|
179
|
+
#
|
180
|
+
# Reads and unpacks a signed integer of arbitrary length.
|
181
|
+
#
|
182
|
+
# @param [Integer] length
|
183
|
+
# The number of bytes to read.
|
184
|
+
#
|
185
|
+
# @return [Integer]
|
186
|
+
# The decoded long integer.
|
187
|
+
#
|
188
|
+
def read_int_le(length)
|
189
|
+
data = @io.read(length)
|
190
|
+
|
191
|
+
if data.bytesize < length
|
192
|
+
raise(InvalidFormat,"premature end of string")
|
193
|
+
end
|
194
|
+
|
195
|
+
return unpack_int_le(data)
|
196
|
+
end
|
197
|
+
|
198
|
+
#
|
199
|
+
# Decodes a packed twos-complement long value of arbitrary length.
|
200
|
+
#
|
201
|
+
# @param [String] data
|
202
|
+
# The packed long to decode.
|
203
|
+
#
|
204
|
+
# @return [Integer]
|
205
|
+
# The unpacked long.
|
206
|
+
#
|
207
|
+
def unpack_int_le(data)
|
208
|
+
return 0 if data.empty?
|
209
|
+
|
210
|
+
long = 0
|
211
|
+
shift = 0
|
212
|
+
|
213
|
+
data.each_byte do |b|
|
214
|
+
long |= b << shift
|
215
|
+
shift += 8
|
216
|
+
end
|
217
|
+
|
218
|
+
max_signed = (1 << (shift-1))
|
219
|
+
|
220
|
+
if long >= max_signed
|
221
|
+
long -= (1 << shift)
|
222
|
+
end
|
223
|
+
|
224
|
+
return long
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'python/pickle/protocol2'
|
2
|
+
require 'python/pickle/instructions/bin_bytes'
|
3
|
+
require 'python/pickle/instructions/short_bin_bytes'
|
4
|
+
|
5
|
+
module Python
|
6
|
+
module Pickle
|
7
|
+
class Protocol3 < Protocol2
|
8
|
+
# Opcodes for Pickle protocol version 2.
|
9
|
+
#
|
10
|
+
# @see http://formats.kaitai.io/python_pickle/ruby.html
|
11
|
+
OPCODES = Protocol2::OPCODES + Set[
|
12
|
+
66, # BINBYTES
|
13
|
+
67 # SHORT_BINBYTES
|
14
|
+
]
|
15
|
+
|
16
|
+
#
|
17
|
+
# Reads an instruction from the pickle stream.
|
18
|
+
#
|
19
|
+
# @return [Instruction]
|
20
|
+
# The decoded instruction.
|
21
|
+
#
|
22
|
+
# @raise [InvalidFormat]
|
23
|
+
# The pickle stream could not be parsed.
|
24
|
+
#
|
25
|
+
def read_instruction
|
26
|
+
case (opcode = @io.getbyte)
|
27
|
+
#
|
28
|
+
# Protocol 0 instructions
|
29
|
+
#
|
30
|
+
when 40 # MARK
|
31
|
+
Instructions::MARK
|
32
|
+
when 46 # STOP
|
33
|
+
Instructions::STOP
|
34
|
+
when 48 # POP
|
35
|
+
Instructions::POP
|
36
|
+
when 49 # POP_MARK
|
37
|
+
Instructions::POP_MARK
|
38
|
+
when 50 # DUP
|
39
|
+
Instructions::DUP
|
40
|
+
when 70 # FLOAT
|
41
|
+
Instructions::Float.new(read_float)
|
42
|
+
when 73 # INT
|
43
|
+
Instructions::Int.new(read_int)
|
44
|
+
when 76 # LONG
|
45
|
+
Instructions::Long.new(read_long)
|
46
|
+
when 78 # NONE
|
47
|
+
Instructions::NONE
|
48
|
+
when 82 # REDUCE
|
49
|
+
Instructions::REDUCE
|
50
|
+
when 83 # STRING
|
51
|
+
Instructions::String.new(read_string)
|
52
|
+
when 86 # UNICODE
|
53
|
+
Instructions::String.new(read_unicode_string)
|
54
|
+
when 97 # APPEND
|
55
|
+
Instructions::APPEND
|
56
|
+
when 98 # BUILD
|
57
|
+
Instructions::BUILD
|
58
|
+
when 99 # GLOBAL
|
59
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
60
|
+
when 100 # DICT
|
61
|
+
Instructions::DICT
|
62
|
+
when 103 # GET
|
63
|
+
Instructions::Get.new(read_int)
|
64
|
+
when 108 # LIST
|
65
|
+
Instructions::LIST
|
66
|
+
when 112 # PUT
|
67
|
+
Instructions::Put.new(read_int)
|
68
|
+
when 115 # SETITEM
|
69
|
+
Instructions::SETITEM
|
70
|
+
when 116 # TUPLE
|
71
|
+
Instructions::TUPLE
|
72
|
+
#
|
73
|
+
# Protocol 1 instructions
|
74
|
+
#
|
75
|
+
when 41 # EMPTY_TUPLE
|
76
|
+
Instructions::EMPTY_TUPLE
|
77
|
+
when 71 # BINFLOAT
|
78
|
+
Instructions::BinFloat.new(read_float64_be)
|
79
|
+
when 75 # BININT1
|
80
|
+
Instructions::BinInt1.new(read_uint8)
|
81
|
+
when 84 # BINSTRING
|
82
|
+
length = read_uint32_le
|
83
|
+
string = @io.read(length)
|
84
|
+
|
85
|
+
Instructions::BinString.new(length,string)
|
86
|
+
when 85 # SHORT_BINSTRING
|
87
|
+
length = read_uint8
|
88
|
+
string = @io.read(length)
|
89
|
+
|
90
|
+
Instructions::ShortBinString.new(length,string)
|
91
|
+
when 88 # BINUNICODE
|
92
|
+
length = read_uint32_le
|
93
|
+
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
94
|
+
|
95
|
+
Instructions::BinUnicode.new(length,string)
|
96
|
+
when 93 # EMPTY_LIST
|
97
|
+
Instructions::EMPTY_LIST
|
98
|
+
when 101 # APPENDS
|
99
|
+
Instructions::APPENDS
|
100
|
+
when 104 # BINGET
|
101
|
+
Instructions::BinGet.new(read_uint8)
|
102
|
+
when 106 # LONG_BINGET
|
103
|
+
Instructions::LongBinGet.new(read_uint32_le)
|
104
|
+
when 113 # BINPUT
|
105
|
+
Instructions::BinPut.new(read_uint8)
|
106
|
+
when 117 # SETITEMS
|
107
|
+
Instructions::SETITEMS
|
108
|
+
when 125 # EMPTY_DICT
|
109
|
+
Instructions::EMPTY_DICT
|
110
|
+
#
|
111
|
+
# Protocol 2 instructions
|
112
|
+
#
|
113
|
+
when 128 # PROT
|
114
|
+
Instructions::Proto.new(read_uint8)
|
115
|
+
when 129 # NEWOBJ
|
116
|
+
Instructions::NEWOBJ
|
117
|
+
when 130 # EXT1
|
118
|
+
Instructions::Ext1.new(read_uint8)
|
119
|
+
when 131 # EXT2
|
120
|
+
Instructions::Ext2.new(read_uint16_le)
|
121
|
+
when 132 # EXT4
|
122
|
+
Instructions::Ext4.new(read_uint32_le)
|
123
|
+
when 133 # TUPLE1
|
124
|
+
Instructions::TUPLE1
|
125
|
+
when 134 # TUPLE2
|
126
|
+
Instructions::TUPLE2
|
127
|
+
when 135 # TUPLE3
|
128
|
+
Instructions::TUPLE3
|
129
|
+
when 136 # NEWTRUE
|
130
|
+
Instructions::NEWTRUE
|
131
|
+
when 137 # NEWFALSE
|
132
|
+
Instructions::NEWFALSE
|
133
|
+
when 138 # LONG1
|
134
|
+
length = read_uint8
|
135
|
+
long = read_int_le(length)
|
136
|
+
|
137
|
+
Instructions::Long1.new(length,long)
|
138
|
+
when 139 # LONG4
|
139
|
+
length = read_uint32_le
|
140
|
+
long = read_int_le(length)
|
141
|
+
|
142
|
+
Instructions::Long4.new(length,long)
|
143
|
+
#
|
144
|
+
# Protocol 3 instructions
|
145
|
+
#
|
146
|
+
when 66 # BINBYTES
|
147
|
+
length = read_uint32_le
|
148
|
+
bytes = @io.read(length)
|
149
|
+
|
150
|
+
Instructions::BinBytes.new(length,bytes)
|
151
|
+
when 67 # SHORT_BINBYTES
|
152
|
+
length = read_uint8
|
153
|
+
bytes = @io.read(length)
|
154
|
+
|
155
|
+
Instructions::ShortBinBytes.new(length,bytes)
|
156
|
+
else
|
157
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 3")
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,285 @@
|
|
1
|
+
require 'python/pickle/protocol3'
|
2
|
+
require 'python/pickle/instructions/short_bin_unicode'
|
3
|
+
require 'python/pickle/instructions/bin_unicode8'
|
4
|
+
require 'python/pickle/instructions/bin_bytes8'
|
5
|
+
require 'python/pickle/instructions/empty_set'
|
6
|
+
require 'python/pickle/instructions/add_items'
|
7
|
+
require 'python/pickle/instructions/frozen_set'
|
8
|
+
require 'python/pickle/instructions/new_obj_ex'
|
9
|
+
require 'python/pickle/instructions/stack_global'
|
10
|
+
require 'python/pickle/instructions/memoize'
|
11
|
+
require 'python/pickle/instructions/frame'
|
12
|
+
|
13
|
+
module Python
|
14
|
+
module Pickle
|
15
|
+
#
|
16
|
+
# Implements Python Pickle protocol 4.
|
17
|
+
#
|
18
|
+
class Protocol4 < Protocol3
|
19
|
+
|
20
|
+
#
|
21
|
+
# Initializes the protocol 4 reader/writer.
|
22
|
+
#
|
23
|
+
def initialize(io)
|
24
|
+
super(io)
|
25
|
+
|
26
|
+
@io_stack = []
|
27
|
+
end
|
28
|
+
|
29
|
+
# Opcodes for Pickle protocol 4.
|
30
|
+
#
|
31
|
+
# @see https://peps.python.org/pep-3154/
|
32
|
+
OPCODES = Protocol3::OPCODES + Set[
|
33
|
+
140, # SHORT_BINUNICODE
|
34
|
+
141, # BINUNICODE8
|
35
|
+
142, # BINBYTES8
|
36
|
+
143, # EMPTY_SET
|
37
|
+
144, # ADDITEMS
|
38
|
+
145, # FROZENSET
|
39
|
+
146, # NEWOBJ_EX
|
40
|
+
147, # STACK_GLOBAL
|
41
|
+
148, # MEMOIZE
|
42
|
+
149 # FRAME
|
43
|
+
]
|
44
|
+
|
45
|
+
#
|
46
|
+
# Reads an instruction from the pickle stream.
|
47
|
+
#
|
48
|
+
# @return [Instruction]
|
49
|
+
# The decoded instruction.
|
50
|
+
#
|
51
|
+
# @raise [InvalidFormat]
|
52
|
+
# The pickle stream could not be parsed.
|
53
|
+
#
|
54
|
+
def read_instruction
|
55
|
+
case (opcode = @io.getbyte)
|
56
|
+
#
|
57
|
+
# Protocol 0 instructions
|
58
|
+
#
|
59
|
+
when 40 # MARK
|
60
|
+
Instructions::MARK
|
61
|
+
when 46 # STOP
|
62
|
+
Instructions::STOP
|
63
|
+
when 48 # POP
|
64
|
+
Instructions::POP
|
65
|
+
when 49 # POP_MARK
|
66
|
+
Instructions::POP_MARK
|
67
|
+
when 50 # DUP
|
68
|
+
Instructions::DUP
|
69
|
+
when 70 # FLOAT
|
70
|
+
Instructions::Float.new(read_float)
|
71
|
+
when 73 # INT
|
72
|
+
Instructions::Int.new(read_int)
|
73
|
+
when 76 # LONG
|
74
|
+
Instructions::Long.new(read_long)
|
75
|
+
when 78 # NONE
|
76
|
+
Instructions::NONE
|
77
|
+
when 82 # REDUCE
|
78
|
+
Instructions::REDUCE
|
79
|
+
when 83 # STRING
|
80
|
+
Instructions::String.new(read_string)
|
81
|
+
when 86 # UNICODE
|
82
|
+
Instructions::String.new(read_unicode_string)
|
83
|
+
when 97 # APPEND
|
84
|
+
Instructions::APPEND
|
85
|
+
when 98 # BUILD
|
86
|
+
Instructions::BUILD
|
87
|
+
when 99 # GLOBAL
|
88
|
+
Instructions::Global.new(read_nl_string,read_nl_string)
|
89
|
+
when 100 # DICT
|
90
|
+
Instructions::DICT
|
91
|
+
when 103 # GET
|
92
|
+
Instructions::Get.new(read_int)
|
93
|
+
when 108 # LIST
|
94
|
+
Instructions::LIST
|
95
|
+
when 112 # PUT
|
96
|
+
Instructions::Put.new(read_int)
|
97
|
+
when 115 # SETITEM
|
98
|
+
Instructions::SETITEM
|
99
|
+
when 116 # TUPLE
|
100
|
+
Instructions::TUPLE
|
101
|
+
#
|
102
|
+
# Protocol 1 instructions
|
103
|
+
#
|
104
|
+
when 41 # EMPTY_TUPLE
|
105
|
+
Instructions::EMPTY_TUPLE
|
106
|
+
when 71 # BINFLOAT
|
107
|
+
Instructions::BinFloat.new(read_float64_be)
|
108
|
+
when 75 # BININT1
|
109
|
+
Instructions::BinInt1.new(read_uint8)
|
110
|
+
when 84 # BINSTRING
|
111
|
+
length = read_uint32_le
|
112
|
+
string = @io.read(length)
|
113
|
+
|
114
|
+
Instructions::BinString.new(length,string)
|
115
|
+
when 85 # SHORT_BINSTRING
|
116
|
+
length = read_uint8
|
117
|
+
string = @io.read(length)
|
118
|
+
|
119
|
+
Instructions::ShortBinString.new(length,string)
|
120
|
+
when 88 # BINUNICODE
|
121
|
+
length = read_uint32_le
|
122
|
+
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
123
|
+
|
124
|
+
Instructions::BinUnicode.new(length,string)
|
125
|
+
when 93 # EMPTY_LIST
|
126
|
+
Instructions::EMPTY_LIST
|
127
|
+
when 101 # APPENDS
|
128
|
+
Instructions::APPENDS
|
129
|
+
when 104 # BINGET
|
130
|
+
Instructions::BinGet.new(read_uint8)
|
131
|
+
when 106 # LONG_BINGET
|
132
|
+
Instructions::LongBinGet.new(read_uint32_le)
|
133
|
+
when 113 # BINPUT
|
134
|
+
Instructions::BinPut.new(read_uint8)
|
135
|
+
when 117 # SETITEMS
|
136
|
+
Instructions::SETITEMS
|
137
|
+
when 125 # EMPTY_DICT
|
138
|
+
Instructions::EMPTY_DICT
|
139
|
+
#
|
140
|
+
# Protocol 2 instructions
|
141
|
+
#
|
142
|
+
when 128 # PROT
|
143
|
+
Instructions::Proto.new(read_uint8)
|
144
|
+
when 129 # NEWOBJ
|
145
|
+
Instructions::NEWOBJ
|
146
|
+
when 130 # EXT1
|
147
|
+
Instructions::Ext1.new(read_uint8)
|
148
|
+
when 131 # EXT2
|
149
|
+
Instructions::Ext2.new(read_uint16_le)
|
150
|
+
when 132 # EXT4
|
151
|
+
Instructions::Ext4.new(read_uint32_le)
|
152
|
+
when 133 # TUPLE1
|
153
|
+
Instructions::TUPLE1
|
154
|
+
when 134 # TUPLE2
|
155
|
+
Instructions::TUPLE2
|
156
|
+
when 135 # TUPLE3
|
157
|
+
Instructions::TUPLE3
|
158
|
+
when 136 # NEWTRUE
|
159
|
+
Instructions::NEWTRUE
|
160
|
+
when 137 # NEWFALSE
|
161
|
+
Instructions::NEWFALSE
|
162
|
+
when 138 # LONG1
|
163
|
+
length = read_uint8
|
164
|
+
long = read_int_le(length)
|
165
|
+
|
166
|
+
Instructions::Long1.new(length,long)
|
167
|
+
when 139 # LONG4
|
168
|
+
length = read_uint32_le
|
169
|
+
long = read_int_le(length)
|
170
|
+
|
171
|
+
Instructions::Long4.new(length,long)
|
172
|
+
#
|
173
|
+
# Protocol 3 instructions
|
174
|
+
#
|
175
|
+
when 66 # BINBYTES
|
176
|
+
length = read_uint32_le
|
177
|
+
bytes = @io.read(length)
|
178
|
+
|
179
|
+
Instructions::BinBytes.new(length,bytes)
|
180
|
+
when 67 # SHORT_BINBYTES
|
181
|
+
length = read_uint8
|
182
|
+
bytes = @io.read(length)
|
183
|
+
|
184
|
+
Instructions::ShortBinBytes.new(length,bytes)
|
185
|
+
#
|
186
|
+
# Protocol 4 instructions
|
187
|
+
#
|
188
|
+
when 140 # SHORT_BINUNICODE
|
189
|
+
length = read_uint8
|
190
|
+
string = read_utf8_string(length)
|
191
|
+
|
192
|
+
Instructions::ShortBinUnicode.new(length,string)
|
193
|
+
when 141 # BINUNICODE8
|
194
|
+
length = read_uint64_le
|
195
|
+
string = read_utf8_string(length)
|
196
|
+
|
197
|
+
Instructions::BinUnicode8.new(length,string)
|
198
|
+
when 142 # BINBYTES8
|
199
|
+
length = read_uint64_le
|
200
|
+
bytes = @io.read(length)
|
201
|
+
|
202
|
+
Instructions::BinBytes8.new(length,bytes)
|
203
|
+
when 143 # EMPTY_SET
|
204
|
+
Instructions::EMPTY_SET
|
205
|
+
when 144 # ADDITEMS
|
206
|
+
Instructions::ADDITEMS
|
207
|
+
when 145 # FROZENSET
|
208
|
+
Instructions::FROZENSET
|
209
|
+
when 146 # NEWOBJ_EX
|
210
|
+
Instructions::NEWOBJ_EX
|
211
|
+
when 147 # STACK_GLOBAL
|
212
|
+
Instructions::STACK_GLOBAL
|
213
|
+
when 148 # MEMOIZE
|
214
|
+
Instructions::MEMOIZE
|
215
|
+
when 149 # FRAME
|
216
|
+
length = read_uint64_le
|
217
|
+
|
218
|
+
enter_frame(read_frame(length))
|
219
|
+
|
220
|
+
Instructions::Frame.new(length)
|
221
|
+
else
|
222
|
+
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 4")
|
223
|
+
end
|
224
|
+
ensure
|
225
|
+
if @io.eof? && !@io_stack.empty?
|
226
|
+
leave_frame
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
#
|
231
|
+
# Reads an unsigned 64bit integer, in little-endian byte-order.
|
232
|
+
#
|
233
|
+
# @return [Integer]
|
234
|
+
#
|
235
|
+
def read_uint64_le
|
236
|
+
@io.read(8).unpack1('Q<')
|
237
|
+
end
|
238
|
+
|
239
|
+
#
|
240
|
+
# Reads a UTF-8 string of the desired length.
|
241
|
+
#
|
242
|
+
# @param [Integer] length
|
243
|
+
# The desired length to read.
|
244
|
+
#
|
245
|
+
# @return [String]
|
246
|
+
# The read UTF-8 string.
|
247
|
+
#
|
248
|
+
def read_utf8_string(length)
|
249
|
+
@io.read(length).force_encoding(Encoding::UTF_8)
|
250
|
+
end
|
251
|
+
|
252
|
+
#
|
253
|
+
# Reads a data frame of the given length.
|
254
|
+
#
|
255
|
+
# @param [Integer] length
|
256
|
+
# The desired length of the frame.
|
257
|
+
#
|
258
|
+
# @return [String]
|
259
|
+
# The read data frame.
|
260
|
+
#
|
261
|
+
def read_frame(length)
|
262
|
+
@io.read(length)
|
263
|
+
end
|
264
|
+
|
265
|
+
#
|
266
|
+
# Enters a new data frame.
|
267
|
+
#
|
268
|
+
# @param [String] frame
|
269
|
+
# The contents of the data frame.
|
270
|
+
#
|
271
|
+
def enter_frame(frame)
|
272
|
+
@io_stack.push(@io)
|
273
|
+
@io = StringIO.new(frame)
|
274
|
+
end
|
275
|
+
|
276
|
+
#
|
277
|
+
# Leaves a data frame and restores {#io}.
|
278
|
+
#
|
279
|
+
def leave_frame
|
280
|
+
@io = @io_stack.pop
|
281
|
+
end
|
282
|
+
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|