python-pickle 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog.md +17 -0
- data/README.md +4 -1
- data/lib/python/pickle/deserializer.rb +142 -80
- data/lib/python/pickle/instructions/bin_persid.rb +31 -0
- data/lib/python/pickle/instructions/global.rb +11 -41
- data/lib/python/pickle/instructions/has_namespace_and_name.rb +61 -0
- data/lib/python/pickle/instructions/inst.rb +34 -0
- data/lib/python/pickle/instructions/next_buffer.rb +5 -1
- data/lib/python/pickle/instructions/obj.rb +30 -0
- data/lib/python/pickle/instructions/persid.rb +31 -0
- data/lib/python/pickle/instructions/readonly_buffer.rb +4 -0
- data/lib/python/pickle/instructions.rb +64 -0
- data/lib/python/pickle/protocol0.rb +313 -68
- data/lib/python/pickle/protocol1.rb +225 -93
- data/lib/python/pickle/protocol2.rb +205 -124
- data/lib/python/pickle/protocol3.rb +92 -123
- data/lib/python/pickle/protocol4.rb +188 -165
- data/lib/python/pickle/protocol5.rb +98 -166
- data/lib/python/pickle/version.rb +1 -1
- data/lib/python/pickle.rb +71 -39
- data/spec/deserializer_spec.rb +359 -0
- data/spec/fixtures/set_v0.pkl +11 -0
- data/spec/fixtures/set_v1.pkl +0 -0
- data/spec/fixtures/set_v2.pkl +0 -0
- data/spec/fixtures/set_v3.pkl +0 -0
- data/spec/fixtures/set_v4.pkl +0 -0
- data/spec/fixtures/set_v5.pkl +0 -0
- data/spec/generate_pickles2.py +1 -0
- data/spec/generate_pickles3.py +1 -0
- data/spec/integration/load/protocol0_spec.rb +10 -0
- data/spec/integration/load/protocol1_spec.rb +10 -0
- data/spec/integration/load/protocol2_spec.rb +10 -0
- data/spec/integration/load/protocol3_spec.rb +10 -0
- data/spec/integration/load/protocol4_spec.rb +10 -0
- data/spec/integration/load/protocol5_spec.rb +10 -0
- data/spec/pickle_spec.rb +61 -0
- data/spec/protocol0_read_instruction_examples.rb +44 -0
- metadata +14 -2
@@ -7,14 +7,26 @@ module Python
|
|
7
7
|
module Pickle
|
8
8
|
class Protocol5 < Protocol4
|
9
9
|
|
10
|
-
#
|
10
|
+
# The `BYTEARRAY8` opcode.
|
11
11
|
#
|
12
|
-
# @
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
# @since 0.2.0
|
13
|
+
#
|
14
|
+
# @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L193
|
15
|
+
BYTEARRAY8 = 150
|
16
|
+
|
17
|
+
# The `NEXT_BUFFER` opcode.
|
18
|
+
#
|
19
|
+
# @since 0.2.0
|
20
|
+
#
|
21
|
+
# @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L194
|
22
|
+
NEXT_BUFFER = 151
|
23
|
+
|
24
|
+
# The `READONLY_BUFFER` opcode.
|
25
|
+
#
|
26
|
+
# @since 0.2.0
|
27
|
+
#
|
28
|
+
# @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L195
|
29
|
+
READONLY_BUFFER = 152
|
18
30
|
|
19
31
|
#
|
20
32
|
# Reads an instruction from the pickle stream.
|
@@ -30,180 +42,86 @@ module Python
|
|
30
42
|
#
|
31
43
|
# Protocol 0 instructions
|
32
44
|
#
|
33
|
-
when
|
34
|
-
|
35
|
-
when
|
36
|
-
|
37
|
-
when
|
38
|
-
|
39
|
-
when
|
40
|
-
|
41
|
-
when
|
42
|
-
|
43
|
-
when
|
44
|
-
|
45
|
-
when
|
46
|
-
|
47
|
-
when
|
48
|
-
|
49
|
-
when
|
50
|
-
|
51
|
-
when
|
52
|
-
|
53
|
-
when
|
54
|
-
|
55
|
-
when
|
56
|
-
|
57
|
-
when
|
58
|
-
Instructions::APPEND
|
59
|
-
when 98 # BUILD
|
60
|
-
Instructions::BUILD
|
61
|
-
when 99 # GLOBAL
|
62
|
-
Instructions::Global.new(read_nl_string,read_nl_string)
|
63
|
-
when 100 # DICT
|
64
|
-
Instructions::DICT
|
65
|
-
when 103 # GET
|
66
|
-
Instructions::Get.new(read_int)
|
67
|
-
when 108 # LIST
|
68
|
-
Instructions::LIST
|
69
|
-
when 112 # PUT
|
70
|
-
Instructions::Put.new(read_int)
|
71
|
-
when 115 # SETITEM
|
72
|
-
Instructions::SETITEM
|
73
|
-
when 116 # TUPLE
|
74
|
-
Instructions::TUPLE
|
45
|
+
when MARK then Instructions::MARK
|
46
|
+
when STOP then Instructions::STOP
|
47
|
+
when POP then Instructions::POP
|
48
|
+
when POP_MARK then Instructions::POP_MARK
|
49
|
+
when DUP then Instructions::DUP
|
50
|
+
when FLOAT then read_float_instruction
|
51
|
+
when INT then read_int_instruction
|
52
|
+
when LONG then read_long_instruction
|
53
|
+
when NONE then Instructions::NONE
|
54
|
+
when REDUCE then Instructions::REDUCE
|
55
|
+
when STRING then read_string_instruction
|
56
|
+
when UNICODE then read_unicode_instruction
|
57
|
+
when APPEND then Instructions::APPEND
|
58
|
+
when BUILD then Instructions::BUILD
|
59
|
+
when GLOBAL then read_global_instruction
|
60
|
+
when DICT then Instructions::DICT
|
61
|
+
when GET then read_get_instruction
|
62
|
+
when LIST then Instructions::LIST
|
63
|
+
when PUT then read_put_instruction
|
64
|
+
when SETITEM then Instructions::SETITEM
|
65
|
+
when TUPLE then Instructions::TUPLE
|
66
|
+
when INST then read_inst_instruction
|
67
|
+
when OBJ then Instructions::OBJ
|
68
|
+
when PERSID then read_persid_instruction
|
69
|
+
when BINPERSID then Instructions::BINPERSID
|
75
70
|
#
|
76
71
|
# Protocol 1 instructions
|
77
72
|
#
|
78
|
-
when
|
79
|
-
|
80
|
-
when
|
81
|
-
|
82
|
-
when
|
83
|
-
|
84
|
-
when
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
when
|
90
|
-
|
91
|
-
string = @io.read(length)
|
92
|
-
|
93
|
-
Instructions::ShortBinString.new(length,string)
|
94
|
-
when 88 # BINUNICODE
|
95
|
-
length = read_uint32_le
|
96
|
-
string = @io.read(length).force_encoding(Encoding::UTF_8)
|
97
|
-
|
98
|
-
Instructions::BinUnicode.new(length,string)
|
99
|
-
when 93 # EMPTY_LIST
|
100
|
-
Instructions::EMPTY_LIST
|
101
|
-
when 101 # APPENDS
|
102
|
-
Instructions::APPENDS
|
103
|
-
when 104 # BINGET
|
104
|
-
Instructions::BinGet.new(read_uint8)
|
105
|
-
when 106 # LONG_BINGET
|
106
|
-
Instructions::LongBinGet.new(read_uint32_le)
|
107
|
-
when 113 # BINPUT
|
108
|
-
Instructions::BinPut.new(read_uint8)
|
109
|
-
when 117 # SETITEMS
|
110
|
-
Instructions::SETITEMS
|
111
|
-
when 125 # EMPTY_DICT
|
112
|
-
Instructions::EMPTY_DICT
|
73
|
+
when EMPTY_TUPLE then Instructions::EMPTY_TUPLE
|
74
|
+
when BINFLOAT then read_binfloat_instruction
|
75
|
+
when BININT1 then read_binint1_instruction
|
76
|
+
when BINSTRING then read_binstring_instruction
|
77
|
+
when SHORT_BINSTRING then read_short_binstring_instruction
|
78
|
+
when BINUNICODE then read_binunicode_instruction
|
79
|
+
when EMPTY_LIST then Instructions::EMPTY_LIST
|
80
|
+
when APPENDS then Instructions::APPENDS
|
81
|
+
when BINGET then read_binget_instruction
|
82
|
+
when LONG_BINGET then read_long_binget_instruction
|
83
|
+
when BINPUT then read_binput_instruction
|
84
|
+
when SETITEMS then Instructions::SETITEMS
|
85
|
+
when EMPTY_DICT then Instructions::EMPTY_DICT
|
113
86
|
#
|
114
87
|
# Protocol 2 instructions
|
115
88
|
#
|
116
|
-
when
|
117
|
-
|
118
|
-
when
|
119
|
-
|
120
|
-
when
|
121
|
-
|
122
|
-
when
|
123
|
-
|
124
|
-
when
|
125
|
-
|
126
|
-
when
|
127
|
-
|
128
|
-
when 134 # TUPLE2
|
129
|
-
Instructions::TUPLE2
|
130
|
-
when 135 # TUPLE3
|
131
|
-
Instructions::TUPLE3
|
132
|
-
when 136 # NEWTRUE
|
133
|
-
Instructions::NEWTRUE
|
134
|
-
when 137 # NEWFALSE
|
135
|
-
Instructions::NEWFALSE
|
136
|
-
when 138 # LONG1
|
137
|
-
length = read_uint8
|
138
|
-
long = read_int_le(length)
|
139
|
-
|
140
|
-
Instructions::Long1.new(length,long)
|
141
|
-
when 139 # LONG4
|
142
|
-
length = read_uint32_le
|
143
|
-
long = read_int_le(length)
|
144
|
-
|
145
|
-
Instructions::Long4.new(length,long)
|
89
|
+
when PROTO then read_proto_instruction
|
90
|
+
when NEWOBJ then Instructions::NEWOBJ
|
91
|
+
when EXT1 then read_ext1_instruction
|
92
|
+
when EXT2 then read_ext2_instruction
|
93
|
+
when EXT4 then read_ext4_instruction
|
94
|
+
when TUPLE1 then Instructions::TUPLE1
|
95
|
+
when TUPLE2 then Instructions::TUPLE2
|
96
|
+
when TUPLE3 then Instructions::TUPLE3
|
97
|
+
when NEWTRUE then Instructions::NEWTRUE
|
98
|
+
when NEWFALSE then Instructions::NEWFALSE
|
99
|
+
when LONG1 then read_long1_instruction
|
100
|
+
when LONG4 then read_long4_instruction
|
146
101
|
#
|
147
102
|
# Protocol 3 instructions
|
148
103
|
#
|
149
|
-
when
|
150
|
-
|
151
|
-
bytes = @io.read(length)
|
152
|
-
|
153
|
-
Instructions::BinBytes.new(length,bytes)
|
154
|
-
when 67 # SHORT_BINBYTES
|
155
|
-
length = read_uint8
|
156
|
-
bytes = @io.read(length)
|
157
|
-
|
158
|
-
Instructions::ShortBinBytes.new(length,bytes)
|
104
|
+
when BINBYTES then read_binbytes_instruction
|
105
|
+
when SHORT_BINBYTES then read_short_binbytes_instruction
|
159
106
|
#
|
160
107
|
# Protocol 4 instructions
|
161
108
|
#
|
162
|
-
when
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
when
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
when 142 # BINBYTES8
|
173
|
-
length = read_uint64_le
|
174
|
-
bytes = @io.read(length)
|
175
|
-
|
176
|
-
Instructions::BinBytes8.new(length,bytes)
|
177
|
-
when 143 # EMPTY_SET
|
178
|
-
Instructions::EMPTY_SET
|
179
|
-
when 144 # ADDITEMS
|
180
|
-
Instructions::ADDITEMS
|
181
|
-
when 145 # FROZENSET
|
182
|
-
Instructions::FROZENSET
|
183
|
-
when 146 # NEWOBJ_EX
|
184
|
-
Instructions::NEWOBJ_EX
|
185
|
-
when 147 # STACK_GLOBAL
|
186
|
-
Instructions::STACK_GLOBAL
|
187
|
-
when 148 # MEMOIZE
|
188
|
-
Instructions::MEMOIZE
|
189
|
-
when 149 # FRAME
|
190
|
-
length = read_uint64_le
|
191
|
-
|
192
|
-
enter_frame(read_frame(length))
|
193
|
-
|
194
|
-
Instructions::Frame.new(length)
|
109
|
+
when SHORT_BINUNICODE then read_short_binunicode_instruction
|
110
|
+
when BINUNICODE8 then read_binunicode8_instruction
|
111
|
+
when BINBYTES8 then read_binbytes8_instruction
|
112
|
+
when EMPTY_SET then Instructions::EMPTY_SET
|
113
|
+
when ADDITEMS then Instructions::ADDITEMS
|
114
|
+
when FROZENSET then Instructions::FROZENSET
|
115
|
+
when NEWOBJ_EX then Instructions::NEWOBJ_EX
|
116
|
+
when STACK_GLOBAL then Instructions::STACK_GLOBAL
|
117
|
+
when MEMOIZE then Instructions::MEMOIZE
|
118
|
+
when FRAME then read_frame_instruction
|
195
119
|
#
|
196
120
|
# Protocol 5 instructions.
|
197
121
|
#
|
198
|
-
when
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
Instructions::ByteArray8.new(length,bytes)
|
203
|
-
when 151 # NEXT_BUFFER
|
204
|
-
Instructions::NEXT_BUFFER
|
205
|
-
when 152 # READONLY_BUFFER
|
206
|
-
Instructions::READONLY_BUFFER
|
122
|
+
when BYTEARRAY8 then read_bytearray8_instruction
|
123
|
+
when NEXT_BUFFER then Instructions::NEXT_BUFFER
|
124
|
+
when READONLY_BUFFER then Instructions::READONLY_BUFFER
|
207
125
|
else
|
208
126
|
raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 5")
|
209
127
|
end
|
@@ -213,6 +131,20 @@ module Python
|
|
213
131
|
end
|
214
132
|
end
|
215
133
|
|
134
|
+
#
|
135
|
+
# Reads a `BYTEARRAY8` instruction.
|
136
|
+
#
|
137
|
+
# @return [Instructions::ByteArray8]
|
138
|
+
#
|
139
|
+
# @since 0.2.0
|
140
|
+
#
|
141
|
+
def read_bytearray8_instruction
|
142
|
+
length = read_uint64_le
|
143
|
+
bytes = @io.read(length)
|
144
|
+
|
145
|
+
Instructions::ByteArray8.new(length,bytes)
|
146
|
+
end
|
147
|
+
|
216
148
|
end
|
217
149
|
end
|
218
150
|
end
|
data/lib/python/pickle.rb
CHANGED
@@ -28,7 +28,7 @@ module Python
|
|
28
28
|
# The default protocol version to use.
|
29
29
|
#
|
30
30
|
# @api public
|
31
|
-
|
31
|
+
DEFAULT_PROTOCOL = 4
|
32
32
|
|
33
33
|
# The highest protocol version supported.
|
34
34
|
#
|
@@ -88,12 +88,25 @@ module Python
|
|
88
88
|
# The explicit protocol version to use. If `nil` the protocol version will
|
89
89
|
# be inferred by inspecting the first two bytes of the stream.
|
90
90
|
#
|
91
|
+
# @param [Hash{Symbol => Object}] kwargs
|
92
|
+
# Additional keyword arguments.
|
93
|
+
#
|
94
|
+
# @option kwargs [Hash{Integer => Object}] :extensions
|
95
|
+
# A Hash of registered extension IDs and their Objects.
|
96
|
+
#
|
97
|
+
# @option kwargs [Hash{String => Hash{String => Class,Method}}] :constants
|
98
|
+
# An optional mapping of custom Python constant names to Ruby classes
|
99
|
+
# or methods.
|
100
|
+
#
|
101
|
+
# @option kwargs [Enumerable, nil] :buffers
|
102
|
+
# An enumerable list of out-of-band buffers.
|
103
|
+
#
|
91
104
|
# @api public
|
92
105
|
#
|
93
|
-
def self.load(data
|
106
|
+
def self.load(data, protocol: nil, **kwargs)
|
94
107
|
deserializer = Deserializer.new(**kwargs)
|
95
108
|
|
96
|
-
parse(data) do |instruction|
|
109
|
+
parse(data, protocol: protocol) do |instruction|
|
97
110
|
status, object = deserializer.execute(instruction)
|
98
111
|
|
99
112
|
if status == :halt
|
@@ -110,11 +123,30 @@ module Python
|
|
110
123
|
# @param [String] path
|
111
124
|
# The path of the file.
|
112
125
|
#
|
126
|
+
# @param [Hash{Symbol => Object}] kwargs
|
127
|
+
# Additional keyword arguments.
|
128
|
+
#
|
129
|
+
# @option kwargs [Hash{Integer => Object}] :extensions
|
130
|
+
# A Hash of registered extension IDs and their Objects.
|
131
|
+
#
|
132
|
+
# @option kwargs [Hash{String => Hash{String => Class,Method}}] :constants
|
133
|
+
# An optional mapping of custom Python constant names to Ruby classes
|
134
|
+
# or methods.
|
135
|
+
#
|
136
|
+
# @option kwargs [Enumerable, nil] :buffers
|
137
|
+
# An enumerable list of out-of-band buffers.
|
138
|
+
#
|
113
139
|
# @return [Object]
|
114
140
|
# The deserialized object.
|
115
141
|
#
|
116
142
|
def self.load_file(path,**kwargs)
|
117
|
-
|
143
|
+
result = nil
|
144
|
+
|
145
|
+
File.open(path,'rb') do |file|
|
146
|
+
result = load(file,**kwargs)
|
147
|
+
end
|
148
|
+
|
149
|
+
return result
|
118
150
|
end
|
119
151
|
|
120
152
|
#
|
@@ -129,12 +161,12 @@ module Python
|
|
129
161
|
# @param [Integer] protocol
|
130
162
|
# The desired Python Pickle protocol to use.
|
131
163
|
#
|
164
|
+
# @note serializing is currently not supported.
|
165
|
+
#
|
132
166
|
# @api public
|
133
167
|
#
|
134
168
|
def self.dump(object,output=nil, protocol: DEFAULT_PROTOCOL)
|
135
|
-
|
136
|
-
raise(ArgumentError,"protocol must be between 0 or #{HIGHEST_PROTOCOL}, but was #{protocol.inspect}")
|
137
|
-
end
|
169
|
+
raise(NotImplementedError,"pickle serializing is currently not supported")
|
138
170
|
end
|
139
171
|
|
140
172
|
#
|
@@ -157,38 +189,38 @@ module Python
|
|
157
189
|
|
158
190
|
begin
|
159
191
|
case opcode
|
160
|
-
when
|
192
|
+
when Protocol2::PROTO
|
161
193
|
version = io.getbyte
|
162
194
|
io.ungetbyte(version)
|
163
195
|
return version
|
164
|
-
when
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
196
|
+
when Protocol0::POP,
|
197
|
+
Protocol0::DUP,
|
198
|
+
Protocol0::FLOAT,
|
199
|
+
Protocol0::STRING,
|
200
|
+
Protocol0::UNICODE,
|
201
|
+
Protocol0::DICT,
|
202
|
+
Protocol0::GET,
|
203
|
+
Protocol0::LIST,
|
204
|
+
Protocol0::PUT
|
173
205
|
0
|
174
|
-
when
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
206
|
+
when Protocol1::EMPTY_TUPLE,
|
207
|
+
Protocol1::BINFLOAT,
|
208
|
+
Protocol1::BININT1,
|
209
|
+
Protocol1::BINSTRING,
|
210
|
+
Protocol1::SHORT_BINSTRING,
|
211
|
+
Protocol1::BINUNICODE,
|
212
|
+
Protocol1::EMPTY_LIST,
|
213
|
+
Protocol1::APPENDS,
|
214
|
+
Protocol1::BINPUT,
|
215
|
+
Protocol1::SETITEMS,
|
216
|
+
Protocol1::EMPTY_DICT
|
185
217
|
1
|
186
|
-
when
|
218
|
+
when Protocol0::STOP
|
187
219
|
# if we've read all the way to the end of the stream and still cannot
|
188
220
|
# find any protocol 0 or protocol 1 specific opcodes, assume protocol 0
|
189
221
|
0
|
190
|
-
when
|
191
|
-
|
222
|
+
when Protocol0::INT, # identical in both protocol 0 and 1
|
223
|
+
Protocol0::LONG # identical in both protocol 0 and 1
|
192
224
|
chars = io.gets
|
193
225
|
|
194
226
|
begin
|
@@ -196,15 +228,15 @@ module Python
|
|
196
228
|
ensure
|
197
229
|
chars.each_byte.reverse_each { |b| io.ungetbyte(b) }
|
198
230
|
end
|
199
|
-
when
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
231
|
+
when Protocol0::MARK, # identical in both protocol 0 and 1
|
232
|
+
Protocol0::NONE, # identical in both protocol 0 and 1
|
233
|
+
Protocol0::REDUCE, # identical in both protocol 0 and 1
|
234
|
+
Protocol0::APPEND, # identical in both protocol 0 and 1
|
235
|
+
Protocol0::BUILD, # identical in both protocol 0 and 1
|
236
|
+
Protocol0::SETITEM, # identical in both protocol 0 and 1
|
237
|
+
Protocol0::TUPLE # identical in both protocol 0 and 1
|
206
238
|
infer_protocol_version(io)
|
207
|
-
when
|
239
|
+
when Protocol0::GLOBAL
|
208
240
|
first_nl_string = io.gets
|
209
241
|
second_nl_string = io.gets
|
210
242
|
|