python-pickle 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog.md +12 -0
- data/README.md +2 -1
- data/lib/python/pickle/deserializer.rb +142 -80
- data/lib/python/pickle/instructions/bin_persid.rb +31 -0
- data/lib/python/pickle/instructions/global.rb +11 -41
- data/lib/python/pickle/instructions/has_namespace_and_name.rb +61 -0
- data/lib/python/pickle/instructions/inst.rb +34 -0
- data/lib/python/pickle/instructions/next_buffer.rb +5 -1
- data/lib/python/pickle/instructions/obj.rb +30 -0
- data/lib/python/pickle/instructions/persid.rb +31 -0
- data/lib/python/pickle/instructions/readonly_buffer.rb +4 -0
- data/lib/python/pickle/instructions.rb +64 -0
- data/lib/python/pickle/protocol0.rb +313 -68
- data/lib/python/pickle/protocol1.rb +225 -93
- data/lib/python/pickle/protocol2.rb +205 -124
- data/lib/python/pickle/protocol3.rb +92 -123
- data/lib/python/pickle/protocol4.rb +188 -165
- data/lib/python/pickle/protocol5.rb +98 -166
- data/lib/python/pickle/version.rb +1 -1
- data/lib/python/pickle.rb +38 -32
- data/spec/deserializer_spec.rb +308 -0
- data/spec/fixtures/set_v0.pkl +11 -0
- data/spec/fixtures/set_v1.pkl +0 -0
- data/spec/fixtures/set_v2.pkl +0 -0
- data/spec/fixtures/set_v3.pkl +0 -0
- data/spec/fixtures/set_v4.pkl +0 -0
- data/spec/fixtures/set_v5.pkl +0 -0
- data/spec/generate_pickles2.py +1 -0
- data/spec/generate_pickles3.py +1 -0
- data/spec/integration/load/protocol0_spec.rb +10 -0
- data/spec/integration/load/protocol1_spec.rb +10 -0
- data/spec/integration/load/protocol2_spec.rb +10 -0
- data/spec/integration/load/protocol3_spec.rb +10 -0
- data/spec/integration/load/protocol4_spec.rb +10 -0
- data/spec/integration/load/protocol5_spec.rb +10 -0
- data/spec/protocol0_read_instruction_examples.rb +44 -0
- metadata +14 -2
@@ -7,14 +7,26 @@ module Python
|
|
7
7
|
module Pickle
|
8
8
|
class Protocol5 < Protocol4
|
9
9
|
|
10
|
-
#
|
10
|
+
# The `BYTEARRAY8` opcode.
|
11
11
|
#
|
12
|
-
# @
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
# @since 0.2.0
|
13
|
+
#
|
14
|
+
# @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L193
|
15
|
+
BYTEARRAY8 = 150
|
16
|
+
|
17
|
+
# The `NEXT_BUFFER` opcode.
|
18
|
+
#
|
19
|
+
# @since 0.2.0
|
20
|
+
#
|
21
|
+
# @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L194
|
22
|
+
NEXT_BUFFER = 151
|
23
|
+
|
24
|
+
# The `READONLY_BUFFER` opcode.
|
25
|
+
#
|
26
|
+
# @since 0.2.0
|
27
|
+
#
|
28
|
+
# @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L195
|
29
|
+
READONLY_BUFFER = 152
|
18
30
|
|
19
31
|
#
|
20
32
|
# Reads an instruction from the pickle stream.
|
@@ -30,180 +42,86 @@ module Python
|
|
30
42
|
#
|
31
43
|
# Protocol 0 instructions
|
32
44
|
#
|
33
|
-
when
|
34
|
-
|
35
|
-
when
|
36
|
-
|
37
|
-
when
|
38
|
-
|
39
|
-
when
|
40
|
-
|
41
|
-
when
|
42
|
-
|
43
|
-
when
|
44
|
-
|
45
|
-
when
|
46
|
-
|
47
|
-
when
|
48
|
-
|
49
|
-
when
|
50
|
-
|
51
|
-
when
|
52
|
-
|
53
|
-
when
|
54
|
-
|
55
|
-
when
|
56
|
-
|
57
|
-
when
|
58
|
-
Instructions::APPEND
|
59
|
-
when 98 # BUILD
|
60
|
-
Instructions::BUILD
|
61
|
-
when 99 # GLOBAL
|
62
|
-
Instructions::Global.new(read_nl_string,read_nl_string)
|
63
|
-
when 100 # DICT
|
64
|
-
Instructions::DICT
|
65
|
-
when 103 # GET
|
66
|
-
Instructions::Get.new(read_int)
|
67
|
-
when 108 # LIST
|
68
|
-
Instructions::LIST
|
69
|
-
when 112 # PUT
|
70
|
-
Instructions::Put.new(read_int)
|
71
|
-
when 115 # SETITEM
|
72
|
-
Instructions::SETITEM
|
73
|
-
when 116 # TUPLE
|
74
|
-
Instructions::TUPLE
|
45
|
+
when MARK then Instructions::MARK
|
46
|
+
when STOP then Instructions::STOP
|
47
|
+
when POP then Instructions::POP
|
48
|
+
when POP_MARK then Instructions::POP_MARK
|
49
|
+
when DUP then Instructions::DUP
|
50
|
+
when FLOAT then read_float_instruction
|
51
|
+
when INT then read_int_instruction
|
52
|
+
when LONG then read_long_instruction
|
53
|
+
when NONE then Instructions::NONE
|
54
|
+
when REDUCE then Instructions::REDUCE
|
55
|
+
when STRING then read_string_instruction
|
56
|
+
when UNICODE then read_unicode_instruction
|
57
|
+
when APPEND then Instructions::APPEND
|
58
|
+
when BUILD then Instructions::BUILD
|
59
|
+
when GLOBAL then read_global_instruction
|
60
|
+
when DICT then Instructions::DICT
|
61
|
+
when GET then read_get_instruction
|
62
|
+
when LIST then Instructions::LIST
|
63
|
+
when PUT then read_put_instruction
|
64
|
+
when SETITEM then Instructions::SETITEM
|
65
|
+
when TUPLE then Instructions::TUPLE
|
66
|
+
when INST then read_inst_instruction
|
67
|
+
when OBJ then Instructions::OBJ
|
68
|
+
when PERSID then read_persid_instruction
|
69
|
+
when BINPERSID then Instructions::BINPERSID
|
75
70
|
#
|
76
71
|
# Protocol 1 instructions
|
77
72
|
#
|
78
|
-
when
|
79
|
-
|
80
|
-
when
|
81
|
-
|
82
|
-
when
|
83
|
-
|
84
|
-
when
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
when
|
90
|
-
|
91
|
-
string = @io.read(length)
|
92
|
-
|
93
|
-
Instructions::ShortBinString.new(length,string)
|
94
|
-
when 88 # BINUNICODE
|
95
|
-
length = read_uint32_le
|
96
|
-
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
97
|
-
|
98
|
-
Instructions::BinUnicode.new(length,string)
|
99
|
-
when 93 # EMPTY_LIST
|
100
|
-
Instructions::EMPTY_LIST
|
101
|
-
when 101 # APPENDS
|
102
|
-
Instructions::APPENDS
|
103
|
-
when 104 # BINGET
|
104
|
-
Instructions::BinGet.new(read_uint8)
|
105
|
-
when 106 # LONG_BINGET
|
106
|
-
Instructions::LongBinGet.new(read_uint32_le)
|
107
|
-
when 113 # BINPUT
|
108
|
-
Instructions::BinPut.new(read_uint8)
|
109
|
-
when 117 # SETITEMS
|
110
|
-
Instructions::SETITEMS
|
111
|
-
when 125 # EMPTY_DICT
|
112
|
-
Instructions::EMPTY_DICT
|
73
|
+
when EMPTY_TUPLE then Instructions::EMPTY_TUPLE
|
74
|
+
when BINFLOAT then read_binfloat_instruction
|
75
|
+
when BININT1 then read_binint1_instruction
|
76
|
+
when BINSTRING then read_binstring_instruction
|
77
|
+
when SHORT_BINSTRING then read_short_binstring_instruction
|
78
|
+
when BINUNICODE then read_binunicode_instruction
|
79
|
+
when EMPTY_LIST then Instructions::EMPTY_LIST
|
80
|
+
when APPENDS then Instructions::APPENDS
|
81
|
+
when BINGET then read_binget_instruction
|
82
|
+
when LONG_BINGET then read_long_binget_instruction
|
83
|
+
when BINPUT then read_binput_instruction
|
84
|
+
when SETITEMS then Instructions::SETITEMS
|
85
|
+
when EMPTY_DICT then Instructions::EMPTY_DICT
|
113
86
|
#
|
114
87
|
# Protocol 2 instructions
|
115
88
|
#
|
116
|
-
when
|
117
|
-
|
118
|
-
when
|
119
|
-
|
120
|
-
when
|
121
|
-
|
122
|
-
when
|
123
|
-
|
124
|
-
when
|
125
|
-
|
126
|
-
when
|
127
|
-
|
128
|
-
when 134 # TUPLE2
|
129
|
-
Instructions::TUPLE2
|
130
|
-
when 135 # TUPLE3
|
131
|
-
Instructions::TUPLE3
|
132
|
-
when 136 # NEWTRUE
|
133
|
-
Instructions::NEWTRUE
|
134
|
-
when 137 # NEWFALSE
|
135
|
-
Instructions::NEWFALSE
|
136
|
-
when 138 # LONG1
|
137
|
-
length = read_uint8
|
138
|
-
long = read_int_le(length)
|
139
|
-
|
140
|
-
Instructions::Long1.new(length,long)
|
141
|
-
when 139 # LONG4
|
142
|
-
length = read_uint32_le
|
143
|
-
long = read_int_le(length)
|
144
|
-
|
145
|
-
Instructions::Long4.new(length,long)
|
89
|
+
when PROTO then read_proto_instruction
|
90
|
+
when NEWOBJ then Instructions::NEWOBJ
|
91
|
+
when EXT1 then read_ext1_instruction
|
92
|
+
when EXT2 then read_ext2_instruction
|
93
|
+
when EXT4 then read_ext4_instruction
|
94
|
+
when TUPLE1 then Instructions::TUPLE1
|
95
|
+
when TUPLE2 then Instructions::TUPLE2
|
96
|
+
when TUPLE3 then Instructions::TUPLE3
|
97
|
+
when NEWTRUE then Instructions::NEWTRUE
|
98
|
+
when NEWFALSE then Instructions::NEWFALSE
|
99
|
+
when LONG1 then read_long1_instruction
|
100
|
+
when LONG4 then read_long4_instruction
|
146
101
|
#
|
147
102
|
# Protocol 3 instructions
|
148
103
|
#
|
149
|
-
when
|
150
|
-
|
151
|
-
bytes = @io.read(length)
|
152
|
-
|
153
|
-
Instructions::BinBytes.new(length,bytes)
|
154
|
-
when 67 # SHORT_BINBYTES
|
155
|
-
length = read_uint8
|
156
|
-
bytes = @io.read(length)
|
157
|
-
|
158
|
-
Instructions::ShortBinBytes.new(length,bytes)
|
104
|
+
when BINBYTES then read_binbytes_instruction
|
105
|
+
when SHORT_BINBYTES then read_short_binbytes_instruction
|
159
106
|
#
|
160
107
|
# Protocol 4 instructions
|
161
108
|
#
|
162
|
-
when
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
when
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
when 142 # BINBYTES8
|
173
|
-
length = read_uint64_le
|
174
|
-
bytes = @io.read(length)
|
175
|
-
|
176
|
-
Instructions::BinBytes8.new(length,bytes)
|
177
|
-
when 143 # EMPTY_SET
|
178
|
-
Instructions::EMPTY_SET
|
179
|
-
when 144 # ADDITEMS
|
180
|
-
Instructions::ADDITEMS
|
181
|
-
when 145 # FROZENSET
|
182
|
-
Instructions::FROZENSET
|
183
|
-
when 146 # NEWOBJ_EX
|
184
|
-
Instructions::NEWOBJ_EX
|
185
|
-
when 147 # STACK_GLOBAL
|
186
|
-
Instructions::STACK_GLOBAL
|
187
|
-
when 148 # MEMOIZE
|
188
|
-
Instructions::MEMOIZE
|
189
|
-
when 149 # FRAME
|
190
|
-
length = read_uint64_le
|
191
|
-
|
192
|
-
enter_frame(read_frame(length))
|
193
|
-
|
194
|
-
Instructions::Frame.new(length)
|
109
|
+
when SHORT_BINUNICODE then read_short_binunicode_instruction
|
110
|
+
when BINUNICODE8 then read_binunicode8_instruction
|
111
|
+
when BINBYTES8 then read_binbytes8_instruction
|
112
|
+
when EMPTY_SET then Instructions::EMPTY_SET
|
113
|
+
when ADDITEMS then Instructions::ADDITEMS
|
114
|
+
when FROZENSET then Instructions::FROZENSET
|
115
|
+
when NEWOBJ_EX then Instructions::NEWOBJ_EX
|
116
|
+
when STACK_GLOBAL then Instructions::STACK_GLOBAL
|
117
|
+
when MEMOIZE then Instructions::MEMOIZE
|
118
|
+
when FRAME then read_frame_instruction
|
195
119
|
#
|
196
120
|
# Protocol 5 instructions.
|
197
121
|
#
|
198
|
-
when
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
Instructions::ByteArray8.new(length,bytes)
|
203
|
-
when 151 # NEXT_BUFFER
|
204
|
-
Instructions::NEXT_BUFFER
|
205
|
-
when 152 # READONLY_BUFFER
|
206
|
-
Instructions::READONLY_BUFFER
|
122
|
+
when BYTEARRAY8 then read_bytearray8_instruction
|
123
|
+
when NEXT_BUFFER then Instructions::NEXT_BUFFER
|
124
|
+
when READONLY_BUFFER then Instructions::READONLY_BUFFER
|
207
125
|
else
|
208
126
|
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 5")
|
209
127
|
end
|
@@ -213,6 +131,20 @@ module Python
|
|
213
131
|
end
|
214
132
|
end
|
215
133
|
|
134
|
+
#
|
135
|
+
# Reads a `BYTEARRAY8` instruction.
|
136
|
+
#
|
137
|
+
# @return [Instructions::ByteArray8]
|
138
|
+
#
|
139
|
+
# @since 0.2.0
|
140
|
+
#
|
141
|
+
def read_bytearray8_instruction
|
142
|
+
length = read_uint64_le
|
143
|
+
bytes = @io.read(length)
|
144
|
+
|
145
|
+
Instructions::ByteArray8.new(length,bytes)
|
146
|
+
end
|
147
|
+
|
216
148
|
end
|
217
149
|
end
|
218
150
|
end
|
data/lib/python/pickle.rb
CHANGED
@@ -98,6 +98,9 @@ module Python
|
|
98
98
|
# An optional mapping of custom Python constant names to Ruby classes
|
99
99
|
# or methods.
|
100
100
|
#
|
101
|
+
# @option kwargs [Enumerable, nil] :buffers
|
102
|
+
# An enumerable list of out-of-band buffers.
|
103
|
+
#
|
101
104
|
# @api public
|
102
105
|
#
|
103
106
|
def self.load(data, protocol: nil, **kwargs)
|
@@ -130,6 +133,9 @@ module Python
|
|
130
133
|
# An optional mapping of custom Python constant names to Ruby classes
|
131
134
|
# or methods.
|
132
135
|
#
|
136
|
+
# @option kwargs [Enumerable, nil] :buffers
|
137
|
+
# An enumerable list of out-of-band buffers.
|
138
|
+
#
|
133
139
|
# @return [Object]
|
134
140
|
# The deserialized object.
|
135
141
|
#
|
@@ -183,38 +189,38 @@ module Python
|
|
183
189
|
|
184
190
|
begin
|
185
191
|
case opcode
|
186
|
-
when
|
192
|
+
when Protocol2::PROTO
|
187
193
|
version = io.getbyte
|
188
194
|
io.ungetbyte(version)
|
189
195
|
return version
|
190
|
-
when
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
196
|
+
when Protocol0::POP,
|
197
|
+
Protocol0::DUP,
|
198
|
+
Protocol0::FLOAT,
|
199
|
+
Protocol0::STRING,
|
200
|
+
Protocol0::UNICODE,
|
201
|
+
Protocol0::DICT,
|
202
|
+
Protocol0::GET,
|
203
|
+
Protocol0::LIST,
|
204
|
+
Protocol0::PUT
|
199
205
|
0
|
200
|
-
when
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
206
|
+
when Protocol1::EMPTY_TUPLE,
|
207
|
+
Protocol1::BINFLOAT,
|
208
|
+
Protocol1::BININT1,
|
209
|
+
Protocol1::BINSTRING,
|
210
|
+
Protocol1::SHORT_BINSTRING,
|
211
|
+
Protocol1::BINUNICODE,
|
212
|
+
Protocol1::EMPTY_LIST,
|
213
|
+
Protocol1::APPENDS,
|
214
|
+
Protocol1::BINPUT,
|
215
|
+
Protocol1::SETITEMS,
|
216
|
+
Protocol1::EMPTY_DICT
|
211
217
|
1
|
212
|
-
when
|
218
|
+
when Protocol0::STOP
|
213
219
|
# if we've read all the way to the end of the stream and still cannot
|
214
220
|
# find any protocol 0 or protocol 1 specific opcodes, assume protocol 0
|
215
221
|
0
|
216
|
-
when
|
217
|
-
|
222
|
+
when Protocol0::INT, # identical in both protocol 0 and 1
|
223
|
+
Protocol0::LONG # identical in both protocol 0 and 1
|
218
224
|
chars = io.gets
|
219
225
|
|
220
226
|
begin
|
@@ -222,15 +228,15 @@ module Python
|
|
222
228
|
ensure
|
223
229
|
chars.each_byte.reverse_each { |b| io.ungetbyte(b) }
|
224
230
|
end
|
225
|
-
when
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
231
|
+
when Protocol0::MARK, # identical in both protocol 0 and 1
|
232
|
+
Protocol0::NONE, # identical in both protocol 0 and 1
|
233
|
+
Protocol0::REDUCE, # identical in both protocol 0 and 1
|
234
|
+
Protocol0::APPEND, # identical in both protocol 0 and 1
|
235
|
+
Protocol0::BUILD, # identical in both protocol 0 and 1
|
236
|
+
Protocol0::SETITEM, # identical in both protocol 0 and 1
|
237
|
+
Protocol0::TUPLE # identical in both protocol 0 and 1
|
232
238
|
infer_protocol_version(io)
|
233
|
-
when
|
239
|
+
when Protocol0::GLOBAL
|
234
240
|
first_nl_string = io.gets
|
235
241
|
second_nl_string = io.gets
|
236
242
|
|