python-pickle 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog.md +12 -0
  3. data/README.md +2 -1
  4. data/lib/python/pickle/deserializer.rb +142 -80
  5. data/lib/python/pickle/instructions/bin_persid.rb +31 -0
  6. data/lib/python/pickle/instructions/global.rb +11 -41
  7. data/lib/python/pickle/instructions/has_namespace_and_name.rb +61 -0
  8. data/lib/python/pickle/instructions/inst.rb +34 -0
  9. data/lib/python/pickle/instructions/next_buffer.rb +5 -1
  10. data/lib/python/pickle/instructions/obj.rb +30 -0
  11. data/lib/python/pickle/instructions/persid.rb +31 -0
  12. data/lib/python/pickle/instructions/readonly_buffer.rb +4 -0
  13. data/lib/python/pickle/instructions.rb +64 -0
  14. data/lib/python/pickle/protocol0.rb +313 -68
  15. data/lib/python/pickle/protocol1.rb +225 -93
  16. data/lib/python/pickle/protocol2.rb +205 -124
  17. data/lib/python/pickle/protocol3.rb +92 -123
  18. data/lib/python/pickle/protocol4.rb +188 -165
  19. data/lib/python/pickle/protocol5.rb +98 -166
  20. data/lib/python/pickle/version.rb +1 -1
  21. data/lib/python/pickle.rb +38 -32
  22. data/spec/deserializer_spec.rb +308 -0
  23. data/spec/fixtures/set_v0.pkl +11 -0
  24. data/spec/fixtures/set_v1.pkl +0 -0
  25. data/spec/fixtures/set_v2.pkl +0 -0
  26. data/spec/fixtures/set_v3.pkl +0 -0
  27. data/spec/fixtures/set_v4.pkl +0 -0
  28. data/spec/fixtures/set_v5.pkl +0 -0
  29. data/spec/generate_pickles2.py +1 -0
  30. data/spec/generate_pickles3.py +1 -0
  31. data/spec/integration/load/protocol0_spec.rb +10 -0
  32. data/spec/integration/load/protocol1_spec.rb +10 -0
  33. data/spec/integration/load/protocol2_spec.rb +10 -0
  34. data/spec/integration/load/protocol3_spec.rb +10 -0
  35. data/spec/integration/load/protocol4_spec.rb +10 -0
  36. data/spec/integration/load/protocol5_spec.rb +10 -0
  37. data/spec/protocol0_read_instruction_examples.rb +44 -0
  38. metadata +14 -2
@@ -7,14 +7,26 @@ module Python
7
7
  module Pickle
8
8
  class Protocol5 < Protocol4
9
9
 
10
- # Opcodes for Pickle protocol 5.
10
+ # The `BYTEARRAY8` opcode.
11
11
  #
12
- # @see https://peps.python.org/pep-0574/
13
- OPCODES = Protocol4::OPCODES + Set[
14
- 150, # BYTEARRAY8
15
- 151, # NEXT_BUFFER
16
- 152, # READONLY_BUFFER
17
- ]
12
+ # @since 0.2.0
13
+ #
14
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L193
15
+ BYTEARRAY8 = 150
16
+
17
+ # The `NEXT_BUFFER` opcode.
18
+ #
19
+ # @since 0.2.0
20
+ #
21
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L194
22
+ NEXT_BUFFER = 151
23
+
24
+ # The `READONLY_BUFFER` opcode.
25
+ #
26
+ # @since 0.2.0
27
+ #
28
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L195
29
+ READONLY_BUFFER = 152
18
30
 
19
31
  #
20
32
  # Reads an instruction from the pickle stream.
@@ -30,180 +42,86 @@ module Python
30
42
  #
31
43
  # Protocol 0 instructions
32
44
  #
33
- when 40 # MARK
34
- Instructions::MARK
35
- when 46 # STOP
36
- Instructions::STOP
37
- when 48 # POP
38
- Instructions::POP
39
- when 49 # POP_MARK
40
- Instructions::POP_MARK
41
- when 50 # DUP
42
- Instructions::DUP
43
- when 70 # FLOAT
44
- Instructions::Float.new(read_float)
45
- when 73 # INT
46
- Instructions::Int.new(read_int)
47
- when 76 # LONG
48
- Instructions::Long.new(read_long)
49
- when 78 # NONE
50
- Instructions::NONE
51
- when 82 # REDUCE
52
- Instructions::REDUCE
53
- when 83 # STRING
54
- Instructions::String.new(read_string)
55
- when 86 # UNICODE
56
- Instructions::String.new(read_unicode_string)
57
- when 97 # APPEND
58
- Instructions::APPEND
59
- when 98 # BUILD
60
- Instructions::BUILD
61
- when 99 # GLOBAL
62
- Instructions::Global.new(read_nl_string,read_nl_string)
63
- when 100 # DICT
64
- Instructions::DICT
65
- when 103 # GET
66
- Instructions::Get.new(read_int)
67
- when 108 # LIST
68
- Instructions::LIST
69
- when 112 # PUT
70
- Instructions::Put.new(read_int)
71
- when 115 # SETITEM
72
- Instructions::SETITEM
73
- when 116 # TUPLE
74
- Instructions::TUPLE
45
+ when MARK then Instructions::MARK
46
+ when STOP then Instructions::STOP
47
+ when POP then Instructions::POP
48
+ when POP_MARK then Instructions::POP_MARK
49
+ when DUP then Instructions::DUP
50
+ when FLOAT then read_float_instruction
51
+ when INT then read_int_instruction
52
+ when LONG then read_long_instruction
53
+ when NONE then Instructions::NONE
54
+ when REDUCE then Instructions::REDUCE
55
+ when STRING then read_string_instruction
56
+ when UNICODE then read_unicode_instruction
57
+ when APPEND then Instructions::APPEND
58
+ when BUILD then Instructions::BUILD
59
+ when GLOBAL then read_global_instruction
60
+ when DICT then Instructions::DICT
61
+ when GET then read_get_instruction
62
+ when LIST then Instructions::LIST
63
+ when PUT then read_put_instruction
64
+ when SETITEM then Instructions::SETITEM
65
+ when TUPLE then Instructions::TUPLE
66
+ when INST then read_inst_instruction
67
+ when OBJ then Instructions::OBJ
68
+ when PERSID then read_persid_instruction
69
+ when BINPERSID then Instructions::BINPERSID
75
70
  #
76
71
  # Protocol 1 instructions
77
72
  #
78
- when 41 # EMPTY_TUPLE
79
- Instructions::EMPTY_TUPLE
80
- when 71 # BINFLOAT
81
- Instructions::BinFloat.new(read_float64_be)
82
- when 75 # BININT1
83
- Instructions::BinInt1.new(read_uint8)
84
- when 84 # BINSTRING
85
- length = read_uint32_le
86
- string = @io.read(length)
87
-
88
- Instructions::BinString.new(length,string)
89
- when 85 # SHORT_BINSTRING
90
- length = read_uint8
91
- string = @io.read(length)
92
-
93
- Instructions::ShortBinString.new(length,string)
94
- when 88 # BINUNICODE
95
- length = read_uint32_le
96
- string = @io.read(length).force_encoding(Encoding::UTF_8)
97
-
98
- Instructions::BinUnicode.new(length,string)
99
- when 93 # EMPTY_LIST
100
- Instructions::EMPTY_LIST
101
- when 101 # APPENDS
102
- Instructions::APPENDS
103
- when 104 # BINGET
104
- Instructions::BinGet.new(read_uint8)
105
- when 106 # LONG_BINGET
106
- Instructions::LongBinGet.new(read_uint32_le)
107
- when 113 # BINPUT
108
- Instructions::BinPut.new(read_uint8)
109
- when 117 # SETITEMS
110
- Instructions::SETITEMS
111
- when 125 # EMPTY_DICT
112
- Instructions::EMPTY_DICT
73
+ when EMPTY_TUPLE then Instructions::EMPTY_TUPLE
74
+ when BINFLOAT then read_binfloat_instruction
75
+ when BININT1 then read_binint1_instruction
76
+ when BINSTRING then read_binstring_instruction
77
+ when SHORT_BINSTRING then read_short_binstring_instruction
78
+ when BINUNICODE then read_binunicode_instruction
79
+ when EMPTY_LIST then Instructions::EMPTY_LIST
80
+ when APPENDS then Instructions::APPENDS
81
+ when BINGET then read_binget_instruction
82
+ when LONG_BINGET then read_long_binget_instruction
83
+ when BINPUT then read_binput_instruction
84
+ when SETITEMS then Instructions::SETITEMS
85
+ when EMPTY_DICT then Instructions::EMPTY_DICT
113
86
  #
114
87
  # Protocol 2 instructions
115
88
  #
116
- when 128 # PROT
117
- Instructions::Proto.new(read_uint8)
118
- when 129 # NEWOBJ
119
- Instructions::NEWOBJ
120
- when 130 # EXT1
121
- Instructions::Ext1.new(read_uint8)
122
- when 131 # EXT2
123
- Instructions::Ext2.new(read_uint16_le)
124
- when 132 # EXT4
125
- Instructions::Ext4.new(read_uint32_le)
126
- when 133 # TUPLE1
127
- Instructions::TUPLE1
128
- when 134 # TUPLE2
129
- Instructions::TUPLE2
130
- when 135 # TUPLE3
131
- Instructions::TUPLE3
132
- when 136 # NEWTRUE
133
- Instructions::NEWTRUE
134
- when 137 # NEWFALSE
135
- Instructions::NEWFALSE
136
- when 138 # LONG1
137
- length = read_uint8
138
- long = read_int_le(length)
139
-
140
- Instructions::Long1.new(length,long)
141
- when 139 # LONG4
142
- length = read_uint32_le
143
- long = read_int_le(length)
144
-
145
- Instructions::Long4.new(length,long)
89
+ when PROTO then read_proto_instruction
90
+ when NEWOBJ then Instructions::NEWOBJ
91
+ when EXT1 then read_ext1_instruction
92
+ when EXT2 then read_ext2_instruction
93
+ when EXT4 then read_ext4_instruction
94
+ when TUPLE1 then Instructions::TUPLE1
95
+ when TUPLE2 then Instructions::TUPLE2
96
+ when TUPLE3 then Instructions::TUPLE3
97
+ when NEWTRUE then Instructions::NEWTRUE
98
+ when NEWFALSE then Instructions::NEWFALSE
99
+ when LONG1 then read_long1_instruction
100
+ when LONG4 then read_long4_instruction
146
101
  #
147
102
  # Protocol 3 instructions
148
103
  #
149
- when 66 # BINBYTES
150
- length = read_uint32_le
151
- bytes = @io.read(length)
152
-
153
- Instructions::BinBytes.new(length,bytes)
154
- when 67 # SHORT_BINBYTES
155
- length = read_uint8
156
- bytes = @io.read(length)
157
-
158
- Instructions::ShortBinBytes.new(length,bytes)
104
+ when BINBYTES then read_binbytes_instruction
105
+ when SHORT_BINBYTES then read_short_binbytes_instruction
159
106
  #
160
107
  # Protocol 4 instructions
161
108
  #
162
- when 140 # SHORT_BINUNICODE
163
- length = read_uint8
164
- string = read_utf8_string(length)
165
-
166
- Instructions::ShortBinUnicode.new(length,string)
167
- when 141 # BINUNICODE8
168
- length = read_uint64_le
169
- string = read_utf8_string(length)
170
-
171
- Instructions::BinUnicode8.new(length,string)
172
- when 142 # BINBYTES8
173
- length = read_uint64_le
174
- bytes = @io.read(length)
175
-
176
- Instructions::BinBytes8.new(length,bytes)
177
- when 143 # EMPTY_SET
178
- Instructions::EMPTY_SET
179
- when 144 # ADDITEMS
180
- Instructions::ADDITEMS
181
- when 145 # FROZENSET
182
- Instructions::FROZENSET
183
- when 146 # NEWOBJ_EX
184
- Instructions::NEWOBJ_EX
185
- when 147 # STACK_GLOBAL
186
- Instructions::STACK_GLOBAL
187
- when 148 # MEMOIZE
188
- Instructions::MEMOIZE
189
- when 149 # FRAME
190
- length = read_uint64_le
191
-
192
- enter_frame(read_frame(length))
193
-
194
- Instructions::Frame.new(length)
109
+ when SHORT_BINUNICODE then read_short_binunicode_instruction
110
+ when BINUNICODE8 then read_binunicode8_instruction
111
+ when BINBYTES8 then read_binbytes8_instruction
112
+ when EMPTY_SET then Instructions::EMPTY_SET
113
+ when ADDITEMS then Instructions::ADDITEMS
114
+ when FROZENSET then Instructions::FROZENSET
115
+ when NEWOBJ_EX then Instructions::NEWOBJ_EX
116
+ when STACK_GLOBAL then Instructions::STACK_GLOBAL
117
+ when MEMOIZE then Instructions::MEMOIZE
118
+ when FRAME then read_frame_instruction
195
119
  #
196
120
  # Protocol 5 instructions.
197
121
  #
198
- when 150 # BYTEARRAY8
199
- length = read_uint64_le
200
- bytes = @io.read(length)
201
-
202
- Instructions::ByteArray8.new(length,bytes)
203
- when 151 # NEXT_BUFFER
204
- Instructions::NEXT_BUFFER
205
- when 152 # READONLY_BUFFER
206
- Instructions::READONLY_BUFFER
122
+ when BYTEARRAY8 then read_bytearray8_instruction
123
+ when NEXT_BUFFER then Instructions::NEXT_BUFFER
124
+ when READONLY_BUFFER then Instructions::READONLY_BUFFER
207
125
  else
208
126
  raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 5")
209
127
  end
@@ -213,6 +131,20 @@ module Python
213
131
  end
214
132
  end
215
133
 
134
+ #
135
+ # Reads a `BYTEARRAY8` instruction.
136
+ #
137
+ # @return [Instructions::ByteArray8]
138
+ #
139
+ # @since 0.2.0
140
+ #
141
+ def read_bytearray8_instruction
142
+ length = read_uint64_le
143
+ bytes = @io.read(length)
144
+
145
+ Instructions::ByteArray8.new(length,bytes)
146
+ end
147
+
216
148
  end
217
149
  end
218
150
  end
@@ -1,6 +1,6 @@
1
1
  module Python
2
2
  module Pickle
3
3
  # python-pickle version
4
- VERSION = '0.1.1'
4
+ VERSION = '0.2.0'
5
5
  end
6
6
  end
data/lib/python/pickle.rb CHANGED
@@ -98,6 +98,9 @@ module Python
98
98
  # An optional mapping of custom Python constant names to Ruby classes
99
99
  # or methods.
100
100
  #
101
+ # @option kwargs [Enumerable, nil] :buffers
102
+ # An enumerable list of out-of-band buffers.
103
+ #
101
104
  # @api public
102
105
  #
103
106
  def self.load(data, protocol: nil, **kwargs)
@@ -130,6 +133,9 @@ module Python
130
133
  # An optional mapping of custom Python constant names to Ruby classes
131
134
  # or methods.
132
135
  #
136
+ # @option kwargs [Enumerable, nil] :buffers
137
+ # An enumerable list of out-of-band buffers.
138
+ #
133
139
  # @return [Object]
134
140
  # The deserialized object.
135
141
  #
@@ -183,38 +189,38 @@ module Python
183
189
 
184
190
  begin
185
191
  case opcode
186
- when 0x80 # PROTO (added in protocol 2)
192
+ when Protocol2::PROTO
187
193
  version = io.getbyte
188
194
  io.ungetbyte(version)
189
195
  return version
190
- when 48, # POP (protocol 0)
191
- 50, # DUP (protocol 0)
192
- 70, # FLOAT (protocol 0)
193
- 83, # STRING (protocol 0)
194
- 86, # UNICODE (protocol 0)
195
- 100, # DICT (protocol 0)
196
- 103, # GET (protocol 0)
197
- 108, # LIST (protocol 0)
198
- 112 # PUT (protocol 0)
196
+ when Protocol0::POP,
197
+ Protocol0::DUP,
198
+ Protocol0::FLOAT,
199
+ Protocol0::STRING,
200
+ Protocol0::UNICODE,
201
+ Protocol0::DICT,
202
+ Protocol0::GET,
203
+ Protocol0::LIST,
204
+ Protocol0::PUT
199
205
  0
200
- when 41, # EMPTY_TUPLE (protocol 1)
201
- 71, # BINFLOAT (protocol 1)
202
- 75, # BININT1 (protocol 1)
203
- 84, # BINSTRING (protocol 1)
204
- 85, # SHORT_BINSTRING (protocol 1)
205
- 88, # BINUNICODE (protocol 1)
206
- 93, # EMPTY_LIST (protocol 1)
207
- 101, # APPENDS (protocol 1)
208
- 113, # BINPUT (protocol 1)
209
- 117, # SETITEMS (protocol 1)
210
- 125 # EMPTY_DICT (protocol 1)
206
+ when Protocol1::EMPTY_TUPLE,
207
+ Protocol1::BINFLOAT,
208
+ Protocol1::BININT1,
209
+ Protocol1::BINSTRING,
210
+ Protocol1::SHORT_BINSTRING,
211
+ Protocol1::BINUNICODE,
212
+ Protocol1::EMPTY_LIST,
213
+ Protocol1::APPENDS,
214
+ Protocol1::BINPUT,
215
+ Protocol1::SETITEMS,
216
+ Protocol1::EMPTY_DICT
211
217
  1
212
- when 46 # STOP
218
+ when Protocol0::STOP
213
219
  # if we've read all the way to the end of the stream and still cannot
214
220
  # find any protocol 0 or protocol 1 specific opcodes, assume protocol 0
215
221
  0
216
- when 73, # INT (identical in both protocol 0 and 1)
217
- 76 # LONG (identical in both protocol 0 and 1)
222
+ when Protocol0::INT, # identical in both protocol 0 and 1
223
+ Protocol0::LONG # identical in both protocol 0 and 1
218
224
  chars = io.gets
219
225
 
220
226
  begin
@@ -222,15 +228,15 @@ module Python
222
228
  ensure
223
229
  chars.each_byte.reverse_each { |b| io.ungetbyte(b) }
224
230
  end
225
- when 40, # MARK (identical in both protocol 0 and 1)
226
- 78, # NONE (identical in both protocol 0 and 1)
227
- 82, # REDUCE (identical in both protocol 0 and 1)
228
- 97, # APPEND (identical in both protocol 0 and 1)
229
- 98, # BUILD (identical in both protocol 0 and 1)
230
- 115, # SETITEM (identical in both protocol 0 and 1)
231
- 116 # TUPLE (identical in both protocol 0 and 1)
231
+ when Protocol0::MARK, # identical in both protocol 0 and 1
232
+ Protocol0::NONE, # identical in both protocol 0 and 1
233
+ Protocol0::REDUCE, # identical in both protocol 0 and 1
234
+ Protocol0::APPEND, # identical in both protocol 0 and 1
235
+ Protocol0::BUILD, # identical in both protocol 0 and 1
236
+ Protocol0::SETITEM, # identical in both protocol 0 and 1
237
+ Protocol0::TUPLE # identical in both protocol 0 and 1
232
238
  infer_protocol_version(io)
233
- when 99 # GLOBAL
239
+ when Protocol0::GLOBAL
234
240
  first_nl_string = io.gets
235
241
  second_nl_string = io.gets
236
242