python-pickle 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog.md +12 -0
  3. data/README.md +2 -1
  4. data/lib/python/pickle/deserializer.rb +142 -80
  5. data/lib/python/pickle/instructions/bin_persid.rb +31 -0
  6. data/lib/python/pickle/instructions/global.rb +11 -41
  7. data/lib/python/pickle/instructions/has_namespace_and_name.rb +61 -0
  8. data/lib/python/pickle/instructions/inst.rb +34 -0
  9. data/lib/python/pickle/instructions/next_buffer.rb +5 -1
  10. data/lib/python/pickle/instructions/obj.rb +30 -0
  11. data/lib/python/pickle/instructions/persid.rb +31 -0
  12. data/lib/python/pickle/instructions/readonly_buffer.rb +4 -0
  13. data/lib/python/pickle/instructions.rb +64 -0
  14. data/lib/python/pickle/protocol0.rb +313 -68
  15. data/lib/python/pickle/protocol1.rb +225 -93
  16. data/lib/python/pickle/protocol2.rb +205 -124
  17. data/lib/python/pickle/protocol3.rb +92 -123
  18. data/lib/python/pickle/protocol4.rb +188 -165
  19. data/lib/python/pickle/protocol5.rb +98 -166
  20. data/lib/python/pickle/version.rb +1 -1
  21. data/lib/python/pickle.rb +38 -32
  22. data/spec/deserializer_spec.rb +308 -0
  23. data/spec/fixtures/set_v0.pkl +11 -0
  24. data/spec/fixtures/set_v1.pkl +0 -0
  25. data/spec/fixtures/set_v2.pkl +0 -0
  26. data/spec/fixtures/set_v3.pkl +0 -0
  27. data/spec/fixtures/set_v4.pkl +0 -0
  28. data/spec/fixtures/set_v5.pkl +0 -0
  29. data/spec/generate_pickles2.py +1 -0
  30. data/spec/generate_pickles3.py +1 -0
  31. data/spec/integration/load/protocol0_spec.rb +10 -0
  32. data/spec/integration/load/protocol1_spec.rb +10 -0
  33. data/spec/integration/load/protocol2_spec.rb +10 -0
  34. data/spec/integration/load/protocol3_spec.rb +10 -0
  35. data/spec/integration/load/protocol4_spec.rb +10 -0
  36. data/spec/integration/load/protocol5_spec.rb +10 -0
  37. data/spec/protocol0_read_instruction_examples.rb +44 -0
  38. metadata +14 -2
@@ -7,14 +7,26 @@ module Python
7
7
  module Pickle
8
8
  class Protocol5 < Protocol4
9
9
 
10
- # Opcodes for Pickle protocol 5.
10
+ # The `BYTEARRAY8` opcode.
11
11
  #
12
- # @see https://peps.python.org/pep-0574/
13
- OPCODES = Protocol4::OPCODES + Set[
14
- 150, # BYTEARRAY8
15
- 151, # NEXT_BUFFER
16
- 152, # READONLY_BUFFER
17
- ]
12
+ # @since 0.2.0
13
+ #
14
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L193
15
+ BYTEARRAY8 = 150
16
+
17
+ # The `NEXT_BUFFER` opcode.
18
+ #
19
+ # @since 0.2.0
20
+ #
21
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L194
22
+ NEXT_BUFFER = 151
23
+
24
+ # The `READONLY_BUFFER` opcode.
25
+ #
26
+ # @since 0.2.0
27
+ #
28
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L195
29
+ READONLY_BUFFER = 152
18
30
 
19
31
  #
20
32
  # Reads an instruction from the pickle stream.
@@ -30,180 +42,86 @@ module Python
30
42
  #
31
43
  # Protocol 0 instructions
32
44
  #
33
- when 40 # MARK
34
- Instructions::MARK
35
- when 46 # STOP
36
- Instructions::STOP
37
- when 48 # POP
38
- Instructions::POP
39
- when 49 # POP_MARK
40
- Instructions::POP_MARK
41
- when 50 # DUP
42
- Instructions::DUP
43
- when 70 # FLOAT
44
- Instructions::Float.new(read_float)
45
- when 73 # INT
46
- Instructions::Int.new(read_int)
47
- when 76 # LONG
48
- Instructions::Long.new(read_long)
49
- when 78 # NONE
50
- Instructions::NONE
51
- when 82 # REDUCE
52
- Instructions::REDUCE
53
- when 83 # STRING
54
- Instructions::String.new(read_string)
55
- when 86 # UNICODE
56
- Instructions::String.new(read_unicode_string)
57
- when 97 # APPEND
58
- Instructions::APPEND
59
- when 98 # BUILD
60
- Instructions::BUILD
61
- when 99 # GLOBAL
62
- Instructions::Global.new(read_nl_string,read_nl_string)
63
- when 100 # DICT
64
- Instructions::DICT
65
- when 103 # GET
66
- Instructions::Get.new(read_int)
67
- when 108 # LIST
68
- Instructions::LIST
69
- when 112 # PUT
70
- Instructions::Put.new(read_int)
71
- when 115 # SETITEM
72
- Instructions::SETITEM
73
- when 116 # TUPLE
74
- Instructions::TUPLE
45
+ when MARK then Instructions::MARK
46
+ when STOP then Instructions::STOP
47
+ when POP then Instructions::POP
48
+ when POP_MARK then Instructions::POP_MARK
49
+ when DUP then Instructions::DUP
50
+ when FLOAT then read_float_instruction
51
+ when INT then read_int_instruction
52
+ when LONG then read_long_instruction
53
+ when NONE then Instructions::NONE
54
+ when REDUCE then Instructions::REDUCE
55
+ when STRING then read_string_instruction
56
+ when UNICODE then read_unicode_instruction
57
+ when APPEND then Instructions::APPEND
58
+ when BUILD then Instructions::BUILD
59
+ when GLOBAL then read_global_instruction
60
+ when DICT then Instructions::DICT
61
+ when GET then read_get_instruction
62
+ when LIST then Instructions::LIST
63
+ when PUT then read_put_instruction
64
+ when SETITEM then Instructions::SETITEM
65
+ when TUPLE then Instructions::TUPLE
66
+ when INST then read_inst_instruction
67
+ when OBJ then Instructions::OBJ
68
+ when PERSID then read_persid_instruction
69
+ when BINPERSID then Instructions::BINPERSID
75
70
  #
76
71
  # Protocol 1 instructions
77
72
  #
78
- when 41 # EMPTY_TUPLE
79
- Instructions::EMPTY_TUPLE
80
- when 71 # BINFLOAT
81
- Instructions::BinFloat.new(read_float64_be)
82
- when 75 # BININT1
83
- Instructions::BinInt1.new(read_uint8)
84
- when 84 # BINSTRING
85
- length = read_uint32_le
86
- string = @io.read(length)
87
-
88
- Instructions::BinString.new(length,string)
89
- when 85 # SHORT_BINSTRING
90
- length = read_uint8
91
- string = @io.read(length)
92
-
93
- Instructions::ShortBinString.new(length,string)
94
- when 88 # BINUNICODE
95
- length = read_uint32_le
96
- string = @io.read(length).force_encoding(Encoding::UTF_8)
97
-
98
- Instructions::BinUnicode.new(length,string)
99
- when 93 # EMPTY_LIST
100
- Instructions::EMPTY_LIST
101
- when 101 # APPENDS
102
- Instructions::APPENDS
103
- when 104 # BINGET
104
- Instructions::BinGet.new(read_uint8)
105
- when 106 # LONG_BINGET
106
- Instructions::LongBinGet.new(read_uint32_le)
107
- when 113 # BINPUT
108
- Instructions::BinPut.new(read_uint8)
109
- when 117 # SETITEMS
110
- Instructions::SETITEMS
111
- when 125 # EMPTY_DICT
112
- Instructions::EMPTY_DICT
73
+ when EMPTY_TUPLE then Instructions::EMPTY_TUPLE
74
+ when BINFLOAT then read_binfloat_instruction
75
+ when BININT1 then read_binint1_instruction
76
+ when BINSTRING then read_binstring_instruction
77
+ when SHORT_BINSTRING then read_short_binstring_instruction
78
+ when BINUNICODE then read_binunicode_instruction
79
+ when EMPTY_LIST then Instructions::EMPTY_LIST
80
+ when APPENDS then Instructions::APPENDS
81
+ when BINGET then read_binget_instruction
82
+ when LONG_BINGET then read_long_binget_instruction
83
+ when BINPUT then read_binput_instruction
84
+ when SETITEMS then Instructions::SETITEMS
85
+ when EMPTY_DICT then Instructions::EMPTY_DICT
113
86
  #
114
87
  # Protocol 2 instructions
115
88
  #
116
- when 128 # PROT
117
- Instructions::Proto.new(read_uint8)
118
- when 129 # NEWOBJ
119
- Instructions::NEWOBJ
120
- when 130 # EXT1
121
- Instructions::Ext1.new(read_uint8)
122
- when 131 # EXT2
123
- Instructions::Ext2.new(read_uint16_le)
124
- when 132 # EXT4
125
- Instructions::Ext4.new(read_uint32_le)
126
- when 133 # TUPLE1
127
- Instructions::TUPLE1
128
- when 134 # TUPLE2
129
- Instructions::TUPLE2
130
- when 135 # TUPLE3
131
- Instructions::TUPLE3
132
- when 136 # NEWTRUE
133
- Instructions::NEWTRUE
134
- when 137 # NEWFALSE
135
- Instructions::NEWFALSE
136
- when 138 # LONG1
137
- length = read_uint8
138
- long = read_int_le(length)
139
-
140
- Instructions::Long1.new(length,long)
141
- when 139 # LONG4
142
- length = read_uint32_le
143
- long = read_int_le(length)
144
-
145
- Instructions::Long4.new(length,long)
89
+ when PROTO then read_proto_instruction
90
+ when NEWOBJ then Instructions::NEWOBJ
91
+ when EXT1 then read_ext1_instruction
92
+ when EXT2 then read_ext2_instruction
93
+ when EXT4 then read_ext4_instruction
94
+ when TUPLE1 then Instructions::TUPLE1
95
+ when TUPLE2 then Instructions::TUPLE2
96
+ when TUPLE3 then Instructions::TUPLE3
97
+ when NEWTRUE then Instructions::NEWTRUE
98
+ when NEWFALSE then Instructions::NEWFALSE
99
+ when LONG1 then read_long1_instruction
100
+ when LONG4 then read_long4_instruction
146
101
  #
147
102
  # Protocol 3 instructions
148
103
  #
149
- when 66 # BINBYTES
150
- length = read_uint32_le
151
- bytes = @io.read(length)
152
-
153
- Instructions::BinBytes.new(length,bytes)
154
- when 67 # SHORT_BINBYTES
155
- length = read_uint8
156
- bytes = @io.read(length)
157
-
158
- Instructions::ShortBinBytes.new(length,bytes)
104
+ when BINBYTES then read_binbytes_instruction
105
+ when SHORT_BINBYTES then read_short_binbytes_instruction
159
106
  #
160
107
  # Protocol 4 instructions
161
108
  #
162
- when 140 # SHORT_BINUNICODE
163
- length = read_uint8
164
- string = read_utf8_string(length)
165
-
166
- Instructions::ShortBinUnicode.new(length,string)
167
- when 141 # BINUNICODE8
168
- length = read_uint64_le
169
- string = read_utf8_string(length)
170
-
171
- Instructions::BinUnicode8.new(length,string)
172
- when 142 # BINBYTES8
173
- length = read_uint64_le
174
- bytes = @io.read(length)
175
-
176
- Instructions::BinBytes8.new(length,bytes)
177
- when 143 # EMPTY_SET
178
- Instructions::EMPTY_SET
179
- when 144 # ADDITEMS
180
- Instructions::ADDITEMS
181
- when 145 # FROZENSET
182
- Instructions::FROZENSET
183
- when 146 # NEWOBJ_EX
184
- Instructions::NEWOBJ_EX
185
- when 147 # STACK_GLOBAL
186
- Instructions::STACK_GLOBAL
187
- when 148 # MEMOIZE
188
- Instructions::MEMOIZE
189
- when 149 # FRAME
190
- length = read_uint64_le
191
-
192
- enter_frame(read_frame(length))
193
-
194
- Instructions::Frame.new(length)
109
+ when SHORT_BINUNICODE then read_short_binunicode_instruction
110
+ when BINUNICODE8 then read_binunicode8_instruction
111
+ when BINBYTES8 then read_binbytes8_instruction
112
+ when EMPTY_SET then Instructions::EMPTY_SET
113
+ when ADDITEMS then Instructions::ADDITEMS
114
+ when FROZENSET then Instructions::FROZENSET
115
+ when NEWOBJ_EX then Instructions::NEWOBJ_EX
116
+ when STACK_GLOBAL then Instructions::STACK_GLOBAL
117
+ when MEMOIZE then Instructions::MEMOIZE
118
+ when FRAME then read_frame_instruction
195
119
  #
196
120
  # Protocol 5 instructions.
197
121
  #
198
- when 150 # BYTEARRAY8
199
- length = read_uint64_le
200
- bytes = @io.read(length)
201
-
202
- Instructions::ByteArray8.new(length,bytes)
203
- when 151 # NEXT_BUFFER
204
- Instructions::NEXT_BUFFER
205
- when 152 # READONLY_BUFFER
206
- Instructions::READONLY_BUFFER
122
+ when BYTEARRAY8 then read_bytearray8_instruction
123
+ when NEXT_BUFFER then Instructions::NEXT_BUFFER
124
+ when READONLY_BUFFER then Instructions::READONLY_BUFFER
207
125
  else
208
126
  raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 5")
209
127
  end
@@ -213,6 +131,20 @@ module Python
213
131
  end
214
132
  end
215
133
 
134
+ #
135
+ # Reads a `BYTEARRAY8` instruction.
136
+ #
137
+ # @return [Instructions::ByteArray8]
138
+ #
139
+ # @since 0.2.0
140
+ #
141
+ def read_bytearray8_instruction
142
+ length = read_uint64_le
143
+ bytes = @io.read(length)
144
+
145
+ Instructions::ByteArray8.new(length,bytes)
146
+ end
147
+
216
148
  end
217
149
  end
218
150
  end
@@ -1,6 +1,6 @@
1
1
  module Python
2
2
  module Pickle
3
3
  # python-pickle version
4
- VERSION = '0.1.1'
4
+ VERSION = '0.2.0'
5
5
  end
6
6
  end
data/lib/python/pickle.rb CHANGED
@@ -98,6 +98,9 @@ module Python
98
98
  # An optional mapping of custom Python constant names to Ruby classes
99
99
  # or methods.
100
100
  #
101
+ # @option kwargs [Enumerable, nil] :buffers
102
+ # An enumerable list of out-of-band buffers.
103
+ #
101
104
  # @api public
102
105
  #
103
106
  def self.load(data, protocol: nil, **kwargs)
@@ -130,6 +133,9 @@ module Python
130
133
  # An optional mapping of custom Python constant names to Ruby classes
131
134
  # or methods.
132
135
  #
136
+ # @option kwargs [Enumerable, nil] :buffers
137
+ # An enumerable list of out-of-band buffers.
138
+ #
133
139
  # @return [Object]
134
140
  # The deserialized object.
135
141
  #
@@ -183,38 +189,38 @@ module Python
183
189
 
184
190
  begin
185
191
  case opcode
186
- when 0x80 # PROTO (added in protocol 2)
192
+ when Protocol2::PROTO
187
193
  version = io.getbyte
188
194
  io.ungetbyte(version)
189
195
  return version
190
- when 48, # POP (protocol 0)
191
- 50, # DUP (protocol 0)
192
- 70, # FLOAT (protocol 0)
193
- 83, # STRING (protocol 0)
194
- 86, # UNICODE (protocol 0)
195
- 100, # DICT (protocol 0)
196
- 103, # GET (protocol 0)
197
- 108, # LIST (protocol 0)
198
- 112 # PUT (protocol 0)
196
+ when Protocol0::POP,
197
+ Protocol0::DUP,
198
+ Protocol0::FLOAT,
199
+ Protocol0::STRING,
200
+ Protocol0::UNICODE,
201
+ Protocol0::DICT,
202
+ Protocol0::GET,
203
+ Protocol0::LIST,
204
+ Protocol0::PUT
199
205
  0
200
- when 41, # EMPTY_TUPLE (protocol 1)
201
- 71, # BINFLOAT (protocol 1)
202
- 75, # BININT1 (protocol 1)
203
- 84, # BINSTRING (protocol 1)
204
- 85, # SHORT_BINSTRING (protocol 1)
205
- 88, # BINUNICODE (protocol 1)
206
- 93, # EMPTY_LIST (protocol 1)
207
- 101, # APPENDS (protocol 1)
208
- 113, # BINPUT (protocol 1)
209
- 117, # SETITEMS (protocol 1)
210
- 125 # EMPTY_DICT (protocol 1)
206
+ when Protocol1::EMPTY_TUPLE,
207
+ Protocol1::BINFLOAT,
208
+ Protocol1::BININT1,
209
+ Protocol1::BINSTRING,
210
+ Protocol1::SHORT_BINSTRING,
211
+ Protocol1::BINUNICODE,
212
+ Protocol1::EMPTY_LIST,
213
+ Protocol1::APPENDS,
214
+ Protocol1::BINPUT,
215
+ Protocol1::SETITEMS,
216
+ Protocol1::EMPTY_DICT
211
217
  1
212
- when 46 # STOP
218
+ when Protocol0::STOP
213
219
  # if we've read all the way to the end of the stream and still cannot
214
220
  # find any protocol 0 or protocol 1 specific opcodes, assume protocol 0
215
221
  0
216
- when 73, # INT (identical in both protocol 0 and 1)
217
- 76 # LONG (identical in both protocol 0 and 1)
222
+ when Protocol0::INT, # identical in both protocol 0 and 1
223
+ Protocol0::LONG # identical in both protocol 0 and 1
218
224
  chars = io.gets
219
225
 
220
226
  begin
@@ -222,15 +228,15 @@ module Python
222
228
  ensure
223
229
  chars.each_byte.reverse_each { |b| io.ungetbyte(b) }
224
230
  end
225
- when 40, # MARK (identical in both protocol 0 and 1)
226
- 78, # NONE (identical in both protocol 0 and 1)
227
- 82, # REDUCE (identical in both protocol 0 and 1)
228
- 97, # APPEND (identical in both protocol 0 and 1)
229
- 98, # BUILD (identical in both protocol 0 and 1)
230
- 115, # SETITEM (identical in both protocol 0 and 1)
231
- 116 # TUPLE (identical in both protocol 0 and 1)
231
+ when Protocol0::MARK, # identical in both protocol 0 and 1
232
+ Protocol0::NONE, # identical in both protocol 0 and 1
233
+ Protocol0::REDUCE, # identical in both protocol 0 and 1
234
+ Protocol0::APPEND, # identical in both protocol 0 and 1
235
+ Protocol0::BUILD, # identical in both protocol 0 and 1
236
+ Protocol0::SETITEM, # identical in both protocol 0 and 1
237
+ Protocol0::TUPLE # identical in both protocol 0 and 1
232
238
  infer_protocol_version(io)
233
- when 99 # GLOBAL
239
+ when Protocol0::GLOBAL
234
240
  first_nl_string = io.gets
235
241
  second_nl_string = io.gets
236
242