python-pickle 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog.md +17 -0
  3. data/README.md +4 -1
  4. data/lib/python/pickle/deserializer.rb +142 -80
  5. data/lib/python/pickle/instructions/bin_persid.rb +31 -0
  6. data/lib/python/pickle/instructions/global.rb +11 -41
  7. data/lib/python/pickle/instructions/has_namespace_and_name.rb +61 -0
  8. data/lib/python/pickle/instructions/inst.rb +34 -0
  9. data/lib/python/pickle/instructions/next_buffer.rb +5 -1
  10. data/lib/python/pickle/instructions/obj.rb +30 -0
  11. data/lib/python/pickle/instructions/persid.rb +31 -0
  12. data/lib/python/pickle/instructions/readonly_buffer.rb +4 -0
  13. data/lib/python/pickle/instructions.rb +64 -0
  14. data/lib/python/pickle/protocol0.rb +313 -68
  15. data/lib/python/pickle/protocol1.rb +225 -93
  16. data/lib/python/pickle/protocol2.rb +205 -124
  17. data/lib/python/pickle/protocol3.rb +92 -123
  18. data/lib/python/pickle/protocol4.rb +188 -165
  19. data/lib/python/pickle/protocol5.rb +98 -166
  20. data/lib/python/pickle/version.rb +1 -1
  21. data/lib/python/pickle.rb +71 -39
  22. data/spec/deserializer_spec.rb +359 -0
  23. data/spec/fixtures/set_v0.pkl +11 -0
  24. data/spec/fixtures/set_v1.pkl +0 -0
  25. data/spec/fixtures/set_v2.pkl +0 -0
  26. data/spec/fixtures/set_v3.pkl +0 -0
  27. data/spec/fixtures/set_v4.pkl +0 -0
  28. data/spec/fixtures/set_v5.pkl +0 -0
  29. data/spec/generate_pickles2.py +1 -0
  30. data/spec/generate_pickles3.py +1 -0
  31. data/spec/integration/load/protocol0_spec.rb +10 -0
  32. data/spec/integration/load/protocol1_spec.rb +10 -0
  33. data/spec/integration/load/protocol2_spec.rb +10 -0
  34. data/spec/integration/load/protocol3_spec.rb +10 -0
  35. data/spec/integration/load/protocol4_spec.rb +10 -0
  36. data/spec/integration/load/protocol5_spec.rb +10 -0
  37. data/spec/pickle_spec.rb +61 -0
  38. data/spec/protocol0_read_instruction_examples.rb +44 -0
  39. metadata +14 -2
@@ -7,14 +7,26 @@ module Python
7
7
  module Pickle
8
8
  class Protocol5 < Protocol4
9
9
 
10
- # Opcodes for Pickle protocol 5.
10
+ # The `BYTEARRAY8` opcode.
11
11
  #
12
- # @see https://peps.python.org/pep-0574/
13
- OPCODES = Protocol4::OPCODES + Set[
14
- 150, # BYTEARRAY8
15
- 151, # NEXT_BUFFER
16
- 152, # READONLY_BUFFER
17
- ]
12
+ # @since 0.2.0
13
+ #
14
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L193
15
+ BYTEARRAY8 = 150
16
+
17
+ # The `NEXT_BUFFER` opcode.
18
+ #
19
+ # @since 0.2.0
20
+ #
21
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L194
22
+ NEXT_BUFFER = 151
23
+
24
+ # The `READONLY_BUFFER` opcode.
25
+ #
26
+ # @since 0.2.0
27
+ #
28
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L195
29
+ READONLY_BUFFER = 152
18
30
 
19
31
  #
20
32
  # Reads an instruction from the pickle stream.
@@ -30,180 +42,86 @@ module Python
30
42
  #
31
43
  # Protocol 0 instructions
32
44
  #
33
- when 40 # MARK
34
- Instructions::MARK
35
- when 46 # STOP
36
- Instructions::STOP
37
- when 48 # POP
38
- Instructions::POP
39
- when 49 # POP_MARK
40
- Instructions::POP_MARK
41
- when 50 # DUP
42
- Instructions::DUP
43
- when 70 # FLOAT
44
- Instructions::Float.new(read_float)
45
- when 73 # INT
46
- Instructions::Int.new(read_int)
47
- when 76 # LONG
48
- Instructions::Long.new(read_long)
49
- when 78 # NONE
50
- Instructions::NONE
51
- when 82 # REDUCE
52
- Instructions::REDUCE
53
- when 83 # STRING
54
- Instructions::String.new(read_string)
55
- when 86 # UNICODE
56
- Instructions::String.new(read_unicode_string)
57
- when 97 # APPEND
58
- Instructions::APPEND
59
- when 98 # BUILD
60
- Instructions::BUILD
61
- when 99 # GLOBAL
62
- Instructions::Global.new(read_nl_string,read_nl_string)
63
- when 100 # DICT
64
- Instructions::DICT
65
- when 103 # GET
66
- Instructions::Get.new(read_int)
67
- when 108 # LIST
68
- Instructions::LIST
69
- when 112 # PUT
70
- Instructions::Put.new(read_int)
71
- when 115 # SETITEM
72
- Instructions::SETITEM
73
- when 116 # TUPLE
74
- Instructions::TUPLE
45
+ when MARK then Instructions::MARK
46
+ when STOP then Instructions::STOP
47
+ when POP then Instructions::POP
48
+ when POP_MARK then Instructions::POP_MARK
49
+ when DUP then Instructions::DUP
50
+ when FLOAT then read_float_instruction
51
+ when INT then read_int_instruction
52
+ when LONG then read_long_instruction
53
+ when NONE then Instructions::NONE
54
+ when REDUCE then Instructions::REDUCE
55
+ when STRING then read_string_instruction
56
+ when UNICODE then read_unicode_instruction
57
+ when APPEND then Instructions::APPEND
58
+ when BUILD then Instructions::BUILD
59
+ when GLOBAL then read_global_instruction
60
+ when DICT then Instructions::DICT
61
+ when GET then read_get_instruction
62
+ when LIST then Instructions::LIST
63
+ when PUT then read_put_instruction
64
+ when SETITEM then Instructions::SETITEM
65
+ when TUPLE then Instructions::TUPLE
66
+ when INST then read_inst_instruction
67
+ when OBJ then Instructions::OBJ
68
+ when PERSID then read_persid_instruction
69
+ when BINPERSID then Instructions::BINPERSID
75
70
  #
76
71
  # Protocol 1 instructions
77
72
  #
78
- when 41 # EMPTY_TUPLE
79
- Instructions::EMPTY_TUPLE
80
- when 71 # BINFLOAT
81
- Instructions::BinFloat.new(read_float64_be)
82
- when 75 # BININT1
83
- Instructions::BinInt1.new(read_uint8)
84
- when 84 # BINSTRING
85
- length = read_uint32_le
86
- string = @io.read(length)
87
-
88
- Instructions::BinString.new(length,string)
89
- when 85 # SHORT_BINSTRING
90
- length = read_uint8
91
- string = @io.read(length)
92
-
93
- Instructions::ShortBinString.new(length,string)
94
- when 88 # BINUNICODE
95
- length = read_uint32_le
96
- string = @io.read(length).force_encoding(Encoding::UTF_8)
97
-
98
- Instructions::BinUnicode.new(length,string)
99
- when 93 # EMPTY_LIST
100
- Instructions::EMPTY_LIST
101
- when 101 # APPENDS
102
- Instructions::APPENDS
103
- when 104 # BINGET
104
- Instructions::BinGet.new(read_uint8)
105
- when 106 # LONG_BINGET
106
- Instructions::LongBinGet.new(read_uint32_le)
107
- when 113 # BINPUT
108
- Instructions::BinPut.new(read_uint8)
109
- when 117 # SETITEMS
110
- Instructions::SETITEMS
111
- when 125 # EMPTY_DICT
112
- Instructions::EMPTY_DICT
73
+ when EMPTY_TUPLE then Instructions::EMPTY_TUPLE
74
+ when BINFLOAT then read_binfloat_instruction
75
+ when BININT1 then read_binint1_instruction
76
+ when BINSTRING then read_binstring_instruction
77
+ when SHORT_BINSTRING then read_short_binstring_instruction
78
+ when BINUNICODE then read_binunicode_instruction
79
+ when EMPTY_LIST then Instructions::EMPTY_LIST
80
+ when APPENDS then Instructions::APPENDS
81
+ when BINGET then read_binget_instruction
82
+ when LONG_BINGET then read_long_binget_instruction
83
+ when BINPUT then read_binput_instruction
84
+ when SETITEMS then Instructions::SETITEMS
85
+ when EMPTY_DICT then Instructions::EMPTY_DICT
113
86
  #
114
87
  # Protocol 2 instructions
115
88
  #
116
- when 128 # PROT
117
- Instructions::Proto.new(read_uint8)
118
- when 129 # NEWOBJ
119
- Instructions::NEWOBJ
120
- when 130 # EXT1
121
- Instructions::Ext1.new(read_uint8)
122
- when 131 # EXT2
123
- Instructions::Ext2.new(read_uint16_le)
124
- when 132 # EXT4
125
- Instructions::Ext4.new(read_uint32_le)
126
- when 133 # TUPLE1
127
- Instructions::TUPLE1
128
- when 134 # TUPLE2
129
- Instructions::TUPLE2
130
- when 135 # TUPLE3
131
- Instructions::TUPLE3
132
- when 136 # NEWTRUE
133
- Instructions::NEWTRUE
134
- when 137 # NEWFALSE
135
- Instructions::NEWFALSE
136
- when 138 # LONG1
137
- length = read_uint8
138
- long = read_int_le(length)
139
-
140
- Instructions::Long1.new(length,long)
141
- when 139 # LONG4
142
- length = read_uint32_le
143
- long = read_int_le(length)
144
-
145
- Instructions::Long4.new(length,long)
89
+ when PROTO then read_proto_instruction
90
+ when NEWOBJ then Instructions::NEWOBJ
91
+ when EXT1 then read_ext1_instruction
92
+ when EXT2 then read_ext2_instruction
93
+ when EXT4 then read_ext4_instruction
94
+ when TUPLE1 then Instructions::TUPLE1
95
+ when TUPLE2 then Instructions::TUPLE2
96
+ when TUPLE3 then Instructions::TUPLE3
97
+ when NEWTRUE then Instructions::NEWTRUE
98
+ when NEWFALSE then Instructions::NEWFALSE
99
+ when LONG1 then read_long1_instruction
100
+ when LONG4 then read_long4_instruction
146
101
  #
147
102
  # Protocol 3 instructions
148
103
  #
149
- when 66 # BINBYTES
150
- length = read_uint32_le
151
- bytes = @io.read(length)
152
-
153
- Instructions::BinBytes.new(length,bytes)
154
- when 67 # SHORT_BINBYTES
155
- length = read_uint8
156
- bytes = @io.read(length)
157
-
158
- Instructions::ShortBinBytes.new(length,bytes)
104
+ when BINBYTES then read_binbytes_instruction
105
+ when SHORT_BINBYTES then read_short_binbytes_instruction
159
106
  #
160
107
  # Protocol 4 instructions
161
108
  #
162
- when 140 # SHORT_BINUNICODE
163
- length = read_uint8
164
- string = read_utf8_string(length)
165
-
166
- Instructions::ShortBinUnicode.new(length,string)
167
- when 141 # BINUNICODE8
168
- length = read_uint64_le
169
- string = read_utf8_string(length)
170
-
171
- Instructions::BinUnicode8.new(length,string)
172
- when 142 # BINBYTES8
173
- length = read_uint64_le
174
- bytes = @io.read(length)
175
-
176
- Instructions::BinBytes8.new(length,bytes)
177
- when 143 # EMPTY_SET
178
- Instructions::EMPTY_SET
179
- when 144 # ADDITEMS
180
- Instructions::ADDITEMS
181
- when 145 # FROZENSET
182
- Instructions::FROZENSET
183
- when 146 # NEWOBJ_EX
184
- Instructions::NEWOBJ_EX
185
- when 147 # STACK_GLOBAL
186
- Instructions::STACK_GLOBAL
187
- when 148 # MEMOIZE
188
- Instructions::MEMOIZE
189
- when 149 # FRAME
190
- length = read_uint64_le
191
-
192
- enter_frame(read_frame(length))
193
-
194
- Instructions::Frame.new(length)
109
+ when SHORT_BINUNICODE then read_short_binunicode_instruction
110
+ when BINUNICODE8 then read_binunicode8_instruction
111
+ when BINBYTES8 then read_binbytes8_instruction
112
+ when EMPTY_SET then Instructions::EMPTY_SET
113
+ when ADDITEMS then Instructions::ADDITEMS
114
+ when FROZENSET then Instructions::FROZENSET
115
+ when NEWOBJ_EX then Instructions::NEWOBJ_EX
116
+ when STACK_GLOBAL then Instructions::STACK_GLOBAL
117
+ when MEMOIZE then Instructions::MEMOIZE
118
+ when FRAME then read_frame_instruction
195
119
  #
196
120
  # Protocol 5 instructions.
197
121
  #
198
- when 150 # BYTEARRAY8
199
- length = read_uint64_le
200
- bytes = @io.read(length)
201
-
202
- Instructions::ByteArray8.new(length,bytes)
203
- when 151 # NEXT_BUFFER
204
- Instructions::NEXT_BUFFER
205
- when 152 # READONLY_BUFFER
206
- Instructions::READONLY_BUFFER
122
+ when BYTEARRAY8 then read_bytearray8_instruction
123
+ when NEXT_BUFFER then Instructions::NEXT_BUFFER
124
+ when READONLY_BUFFER then Instructions::READONLY_BUFFER
207
125
  else
208
126
  raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 5")
209
127
  end
@@ -213,6 +131,20 @@ module Python
213
131
  end
214
132
  end
215
133
 
134
+ #
135
+ # Reads a `BYTEARRAY8` instruction.
136
+ #
137
+ # @return [Instructions::ByteArray8]
138
+ #
139
+ # @since 0.2.0
140
+ #
141
+ def read_bytearray8_instruction
142
+ length = read_uint64_le
143
+ bytes = @io.read(length)
144
+
145
+ Instructions::ByteArray8.new(length,bytes)
146
+ end
147
+
216
148
  end
217
149
  end
218
150
  end
@@ -1,6 +1,6 @@
1
1
  module Python
2
2
  module Pickle
3
3
  # python-pickle version
4
- VERSION = '0.1.0'
4
+ VERSION = '0.2.0'
5
5
  end
6
6
  end
data/lib/python/pickle.rb CHANGED
@@ -28,7 +28,7 @@ module Python
28
28
  # The default protocol version to use.
29
29
  #
30
30
  # @api public
31
- DEFAULT_PROTCOL = 4
31
+ DEFAULT_PROTOCOL = 4
32
32
 
33
33
  # The highest protocol version supported.
34
34
  #
@@ -88,12 +88,25 @@ module Python
88
88
  # The explicit protocol version to use. If `nil` the protocol version will
89
89
  # be inferred by inspecting the first two bytes of the stream.
90
90
  #
91
+ # @param [Hash{Symbol => Object}] kwargs
92
+ # Additional keyword arguments.
93
+ #
94
+ # @option kwargs [Hash{Integer => Object}] :extensions
95
+ # A Hash of registered extension IDs and their Objects.
96
+ #
97
+ # @option kwargs [Hash{String => Hash{String => Class,Method}}] :constants
98
+ # An optional mapping of custom Python constant names to Ruby classes
99
+ # or methods.
100
+ #
101
+ # @option kwargs [Enumerable, nil] :buffers
102
+ # An enumerable list of out-of-band buffers.
103
+ #
91
104
  # @api public
92
105
  #
93
- def self.load(data,**kwargs)
106
+ def self.load(data, protocol: nil, **kwargs)
94
107
  deserializer = Deserializer.new(**kwargs)
95
108
 
96
- parse(data) do |instruction|
109
+ parse(data, protocol: protocol) do |instruction|
97
110
  status, object = deserializer.execute(instruction)
98
111
 
99
112
  if status == :halt
@@ -110,11 +123,30 @@ module Python
110
123
  # @param [String] path
111
124
  # The path of the file.
112
125
  #
126
+ # @param [Hash{Symbol => Object}] kwargs
127
+ # Additional keyword arguments.
128
+ #
129
+ # @option kwargs [Hash{Integer => Object}] :extensions
130
+ # A Hash of registered extension IDs and their Objects.
131
+ #
132
+ # @option kwargs [Hash{String => Hash{String => Class,Method}}] :constants
133
+ # An optional mapping of custom Python constant names to Ruby classes
134
+ # or methods.
135
+ #
136
+ # @option kwargs [Enumerable, nil] :buffers
137
+ # An enumerable list of out-of-band buffers.
138
+ #
113
139
  # @return [Object]
114
140
  # The deserialized object.
115
141
  #
116
142
  def self.load_file(path,**kwargs)
117
- load(File.open(path,'rb'),**kwargs)
143
+ result = nil
144
+
145
+ File.open(path,'rb') do |file|
146
+ result = load(file,**kwargs)
147
+ end
148
+
149
+ return result
118
150
  end
119
151
 
120
152
  #
@@ -129,12 +161,12 @@ module Python
129
161
  # @param [Integer] protocol
130
162
  # The desired Python Pickle protocol to use.
131
163
  #
164
+ # @note serializing is currently not supported.
165
+ #
132
166
  # @api public
133
167
  #
134
168
  def self.dump(object,output=nil, protocol: DEFAULT_PROTOCOL)
135
- if (protocol < 0) || (protocol > HIGHEST_PROTOCOL)
136
- raise(ArgumentError,"protocol must be between 0 or #{HIGHEST_PROTOCOL}, but was #{protocol.inspect}")
137
- end
169
+ raise(NotImplementedError,"pickle serializing is currently not supported")
138
170
  end
139
171
 
140
172
  #
@@ -157,38 +189,38 @@ module Python
157
189
 
158
190
  begin
159
191
  case opcode
160
- when 0x80 # PROTO (added in protocol 2)
192
+ when Protocol2::PROTO
161
193
  version = io.getbyte
162
194
  io.ungetbyte(version)
163
195
  return version
164
- when 48, # POP (protocol 0)
165
- 50, # DUP (protocol 0)
166
- 70, # FLOAT (protocol 0)
167
- 83, # STRING (protocol 0)
168
- 86, # UNICODE (protocol 0)
169
- 100, # DICT (protocol 0)
170
- 103, # GET (protocol 0)
171
- 108, # LIST (protocol 0)
172
- 112 # PUT (protocol 0)
196
+ when Protocol0::POP,
197
+ Protocol0::DUP,
198
+ Protocol0::FLOAT,
199
+ Protocol0::STRING,
200
+ Protocol0::UNICODE,
201
+ Protocol0::DICT,
202
+ Protocol0::GET,
203
+ Protocol0::LIST,
204
+ Protocol0::PUT
173
205
  0
174
- when 41, # EMPTY_TUPLE (protocol 1)
175
- 71, # BINFLOAT (protocol 1)
176
- 75, # BININT1 (protocol 1)
177
- 84, # BINSTRING (protocol 1)
178
- 85, # SHORT_BINSTRING (protocol 1)
179
- 88, # BINUNICODE (protocol 1)
180
- 93, # EMPTY_LIST (protocol 1)
181
- 101, # APPENDS (protocol 1)
182
- 113, # BINPUT (protocol 1)
183
- 117, # SETITEMS (protocol 1)
184
- 125 # EMPTY_DICT (protocol 1)
206
+ when Protocol1::EMPTY_TUPLE,
207
+ Protocol1::BINFLOAT,
208
+ Protocol1::BININT1,
209
+ Protocol1::BINSTRING,
210
+ Protocol1::SHORT_BINSTRING,
211
+ Protocol1::BINUNICODE,
212
+ Protocol1::EMPTY_LIST,
213
+ Protocol1::APPENDS,
214
+ Protocol1::BINPUT,
215
+ Protocol1::SETITEMS,
216
+ Protocol1::EMPTY_DICT
185
217
  1
186
- when 46 # STOP
218
+ when Protocol0::STOP
187
219
  # if we've read all the way to the end of the stream and still cannot
188
220
  # find any protocol 0 or protocol 1 specific opcodes, assume protocol 0
189
221
  0
190
- when 73, # INT (identical in both protocol 0 and 1)
191
- 76 # LONG (identical in both protocol 0 and 1)
222
+ when Protocol0::INT, # identical in both protocol 0 and 1
223
+ Protocol0::LONG # identical in both protocol 0 and 1
192
224
  chars = io.gets
193
225
 
194
226
  begin
@@ -196,15 +228,15 @@ module Python
196
228
  ensure
197
229
  chars.each_byte.reverse_each { |b| io.ungetbyte(b) }
198
230
  end
199
- when 40, # MARK (identical in both protocol 0 and 1)
200
- 78, # NONE (identical in both protocol 0 and 1)
201
- 82, # REDUCE (identical in both protocol 0 and 1)
202
- 97, # APPEND (identical in both protocol 0 and 1)
203
- 98, # BUILD (identical in both protocol 0 and 1)
204
- 115, # SETITEM (identical in both protocol 0 and 1)
205
- 116 # TUPLE (identical in both protocol 0 and 1)
231
+ when Protocol0::MARK, # identical in both protocol 0 and 1
232
+ Protocol0::NONE, # identical in both protocol 0 and 1
233
+ Protocol0::REDUCE, # identical in both protocol 0 and 1
234
+ Protocol0::APPEND, # identical in both protocol 0 and 1
235
+ Protocol0::BUILD, # identical in both protocol 0 and 1
236
+ Protocol0::SETITEM, # identical in both protocol 0 and 1
237
+ Protocol0::TUPLE # identical in both protocol 0 and 1
206
238
  infer_protocol_version(io)
207
- when 99 # GLOBAL
239
+ when Protocol0::GLOBAL
208
240
  first_nl_string = io.gets
209
241
  second_nl_string = io.gets
210
242