python-pickle 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog.md +17 -0
  3. data/README.md +4 -1
  4. data/lib/python/pickle/deserializer.rb +142 -80
  5. data/lib/python/pickle/instructions/bin_persid.rb +31 -0
  6. data/lib/python/pickle/instructions/global.rb +11 -41
  7. data/lib/python/pickle/instructions/has_namespace_and_name.rb +61 -0
  8. data/lib/python/pickle/instructions/inst.rb +34 -0
  9. data/lib/python/pickle/instructions/next_buffer.rb +5 -1
  10. data/lib/python/pickle/instructions/obj.rb +30 -0
  11. data/lib/python/pickle/instructions/persid.rb +31 -0
  12. data/lib/python/pickle/instructions/readonly_buffer.rb +4 -0
  13. data/lib/python/pickle/instructions.rb +64 -0
  14. data/lib/python/pickle/protocol0.rb +313 -68
  15. data/lib/python/pickle/protocol1.rb +225 -93
  16. data/lib/python/pickle/protocol2.rb +205 -124
  17. data/lib/python/pickle/protocol3.rb +92 -123
  18. data/lib/python/pickle/protocol4.rb +188 -165
  19. data/lib/python/pickle/protocol5.rb +98 -166
  20. data/lib/python/pickle/version.rb +1 -1
  21. data/lib/python/pickle.rb +71 -39
  22. data/spec/deserializer_spec.rb +359 -0
  23. data/spec/fixtures/set_v0.pkl +11 -0
  24. data/spec/fixtures/set_v1.pkl +0 -0
  25. data/spec/fixtures/set_v2.pkl +0 -0
  26. data/spec/fixtures/set_v3.pkl +0 -0
  27. data/spec/fixtures/set_v4.pkl +0 -0
  28. data/spec/fixtures/set_v5.pkl +0 -0
  29. data/spec/generate_pickles2.py +1 -0
  30. data/spec/generate_pickles3.py +1 -0
  31. data/spec/integration/load/protocol0_spec.rb +10 -0
  32. data/spec/integration/load/protocol1_spec.rb +10 -0
  33. data/spec/integration/load/protocol2_spec.rb +10 -0
  34. data/spec/integration/load/protocol3_spec.rb +10 -0
  35. data/spec/integration/load/protocol4_spec.rb +10 -0
  36. data/spec/integration/load/protocol5_spec.rb +10 -0
  37. data/spec/pickle_spec.rb +61 -0
  38. data/spec/protocol0_read_instruction_examples.rb +44 -0
  39. metadata +14 -2
@@ -7,14 +7,26 @@ module Python
7
7
  module Pickle
8
8
  class Protocol5 < Protocol4
9
9
 
10
- # Opcodes for Pickle protocol 5.
10
+ # The `BYTEARRAY8` opcode.
11
11
  #
12
- # @see https://peps.python.org/pep-0574/
13
- OPCODES = Protocol4::OPCODES + Set[
14
- 150, # BYTEARRAY8
15
- 151, # NEXT_BUFFER
16
- 152, # READONLY_BUFFER
17
- ]
12
+ # @since 0.2.0
13
+ #
14
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L193
15
+ BYTEARRAY8 = 150
16
+
17
+ # The `NEXT_BUFFER` opcode.
18
+ #
19
+ # @since 0.2.0
20
+ #
21
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L194
22
+ NEXT_BUFFER = 151
23
+
24
+ # The `READONLY_BUFFER` opcode.
25
+ #
26
+ # @since 0.2.0
27
+ #
28
+ # @see https://github.com/python/cpython/blob/v2.7/Lib/pickle.py#L195
29
+ READONLY_BUFFER = 152
18
30
 
19
31
  #
20
32
  # Reads an instruction from the pickle stream.
@@ -30,180 +42,86 @@ module Python
30
42
  #
31
43
  # Protocol 0 instructions
32
44
  #
33
- when 40 # MARK
34
- Instructions::MARK
35
- when 46 # STOP
36
- Instructions::STOP
37
- when 48 # POP
38
- Instructions::POP
39
- when 49 # POP_MARK
40
- Instructions::POP_MARK
41
- when 50 # DUP
42
- Instructions::DUP
43
- when 70 # FLOAT
44
- Instructions::Float.new(read_float)
45
- when 73 # INT
46
- Instructions::Int.new(read_int)
47
- when 76 # LONG
48
- Instructions::Long.new(read_long)
49
- when 78 # NONE
50
- Instructions::NONE
51
- when 82 # REDUCE
52
- Instructions::REDUCE
53
- when 83 # STRING
54
- Instructions::String.new(read_string)
55
- when 86 # UNICODE
56
- Instructions::String.new(read_unicode_string)
57
- when 97 # APPEND
58
- Instructions::APPEND
59
- when 98 # BUILD
60
- Instructions::BUILD
61
- when 99 # GLOBAL
62
- Instructions::Global.new(read_nl_string,read_nl_string)
63
- when 100 # DICT
64
- Instructions::DICT
65
- when 103 # GET
66
- Instructions::Get.new(read_int)
67
- when 108 # LIST
68
- Instructions::LIST
69
- when 112 # PUT
70
- Instructions::Put.new(read_int)
71
- when 115 # SETITEM
72
- Instructions::SETITEM
73
- when 116 # TUPLE
74
- Instructions::TUPLE
45
+ when MARK then Instructions::MARK
46
+ when STOP then Instructions::STOP
47
+ when POP then Instructions::POP
48
+ when POP_MARK then Instructions::POP_MARK
49
+ when DUP then Instructions::DUP
50
+ when FLOAT then read_float_instruction
51
+ when INT then read_int_instruction
52
+ when LONG then read_long_instruction
53
+ when NONE then Instructions::NONE
54
+ when REDUCE then Instructions::REDUCE
55
+ when STRING then read_string_instruction
56
+ when UNICODE then read_unicode_instruction
57
+ when APPEND then Instructions::APPEND
58
+ when BUILD then Instructions::BUILD
59
+ when GLOBAL then read_global_instruction
60
+ when DICT then Instructions::DICT
61
+ when GET then read_get_instruction
62
+ when LIST then Instructions::LIST
63
+ when PUT then read_put_instruction
64
+ when SETITEM then Instructions::SETITEM
65
+ when TUPLE then Instructions::TUPLE
66
+ when INST then read_inst_instruction
67
+ when OBJ then Instructions::OBJ
68
+ when PERSID then read_persid_instruction
69
+ when BINPERSID then Instructions::BINPERSID
75
70
  #
76
71
  # Protocol 1 instructions
77
72
  #
78
- when 41 # EMPTY_TUPLE
79
- Instructions::EMPTY_TUPLE
80
- when 71 # BINFLOAT
81
- Instructions::BinFloat.new(read_float64_be)
82
- when 75 # BININT1
83
- Instructions::BinInt1.new(read_uint8)
84
- when 84 # BINSTRING
85
- length = read_uint32_le
86
- string = @io.read(length)
87
-
88
- Instructions::BinString.new(length,string)
89
- when 85 # SHORT_BINSTRING
90
- length = read_uint8
91
- string = @io.read(length)
92
-
93
- Instructions::ShortBinString.new(length,string)
94
- when 88 # BINUNICODE
95
- length = read_uint32_le
96
- string = @io.read(length).force_encoding(Encoding::UTF_8)
97
-
98
- Instructions::BinUnicode.new(length,string)
99
- when 93 # EMPTY_LIST
100
- Instructions::EMPTY_LIST
101
- when 101 # APPENDS
102
- Instructions::APPENDS
103
- when 104 # BINGET
104
- Instructions::BinGet.new(read_uint8)
105
- when 106 # LONG_BINGET
106
- Instructions::LongBinGet.new(read_uint32_le)
107
- when 113 # BINPUT
108
- Instructions::BinPut.new(read_uint8)
109
- when 117 # SETITEMS
110
- Instructions::SETITEMS
111
- when 125 # EMPTY_DICT
112
- Instructions::EMPTY_DICT
73
+ when EMPTY_TUPLE then Instructions::EMPTY_TUPLE
74
+ when BINFLOAT then read_binfloat_instruction
75
+ when BININT1 then read_binint1_instruction
76
+ when BINSTRING then read_binstring_instruction
77
+ when SHORT_BINSTRING then read_short_binstring_instruction
78
+ when BINUNICODE then read_binunicode_instruction
79
+ when EMPTY_LIST then Instructions::EMPTY_LIST
80
+ when APPENDS then Instructions::APPENDS
81
+ when BINGET then read_binget_instruction
82
+ when LONG_BINGET then read_long_binget_instruction
83
+ when BINPUT then read_binput_instruction
84
+ when SETITEMS then Instructions::SETITEMS
85
+ when EMPTY_DICT then Instructions::EMPTY_DICT
113
86
  #
114
87
  # Protocol 2 instructions
115
88
  #
116
- when 128 # PROT
117
- Instructions::Proto.new(read_uint8)
118
- when 129 # NEWOBJ
119
- Instructions::NEWOBJ
120
- when 130 # EXT1
121
- Instructions::Ext1.new(read_uint8)
122
- when 131 # EXT2
123
- Instructions::Ext2.new(read_uint16_le)
124
- when 132 # EXT4
125
- Instructions::Ext4.new(read_uint32_le)
126
- when 133 # TUPLE1
127
- Instructions::TUPLE1
128
- when 134 # TUPLE2
129
- Instructions::TUPLE2
130
- when 135 # TUPLE3
131
- Instructions::TUPLE3
132
- when 136 # NEWTRUE
133
- Instructions::NEWTRUE
134
- when 137 # NEWFALSE
135
- Instructions::NEWFALSE
136
- when 138 # LONG1
137
- length = read_uint8
138
- long = read_int_le(length)
139
-
140
- Instructions::Long1.new(length,long)
141
- when 139 # LONG4
142
- length = read_uint32_le
143
- long = read_int_le(length)
144
-
145
- Instructions::Long4.new(length,long)
89
+ when PROTO then read_proto_instruction
90
+ when NEWOBJ then Instructions::NEWOBJ
91
+ when EXT1 then read_ext1_instruction
92
+ when EXT2 then read_ext2_instruction
93
+ when EXT4 then read_ext4_instruction
94
+ when TUPLE1 then Instructions::TUPLE1
95
+ when TUPLE2 then Instructions::TUPLE2
96
+ when TUPLE3 then Instructions::TUPLE3
97
+ when NEWTRUE then Instructions::NEWTRUE
98
+ when NEWFALSE then Instructions::NEWFALSE
99
+ when LONG1 then read_long1_instruction
100
+ when LONG4 then read_long4_instruction
146
101
  #
147
102
  # Protocol 3 instructions
148
103
  #
149
- when 66 # BINBYTES
150
- length = read_uint32_le
151
- bytes = @io.read(length)
152
-
153
- Instructions::BinBytes.new(length,bytes)
154
- when 67 # SHORT_BINBYTES
155
- length = read_uint8
156
- bytes = @io.read(length)
157
-
158
- Instructions::ShortBinBytes.new(length,bytes)
104
+ when BINBYTES then read_binbytes_instruction
105
+ when SHORT_BINBYTES then read_short_binbytes_instruction
159
106
  #
160
107
  # Protocol 4 instructions
161
108
  #
162
- when 140 # SHORT_BINUNICODE
163
- length = read_uint8
164
- string = read_utf8_string(length)
165
-
166
- Instructions::ShortBinUnicode.new(length,string)
167
- when 141 # BINUNICODE8
168
- length = read_uint64_le
169
- string = read_utf8_string(length)
170
-
171
- Instructions::BinUnicode8.new(length,string)
172
- when 142 # BINBYTES8
173
- length = read_uint64_le
174
- bytes = @io.read(length)
175
-
176
- Instructions::BinBytes8.new(length,bytes)
177
- when 143 # EMPTY_SET
178
- Instructions::EMPTY_SET
179
- when 144 # ADDITEMS
180
- Instructions::ADDITEMS
181
- when 145 # FROZENSET
182
- Instructions::FROZENSET
183
- when 146 # NEWOBJ_EX
184
- Instructions::NEWOBJ_EX
185
- when 147 # STACK_GLOBAL
186
- Instructions::STACK_GLOBAL
187
- when 148 # MEMOIZE
188
- Instructions::MEMOIZE
189
- when 149 # FRAME
190
- length = read_uint64_le
191
-
192
- enter_frame(read_frame(length))
193
-
194
- Instructions::Frame.new(length)
109
+ when SHORT_BINUNICODE then read_short_binunicode_instruction
110
+ when BINUNICODE8 then read_binunicode8_instruction
111
+ when BINBYTES8 then read_binbytes8_instruction
112
+ when EMPTY_SET then Instructions::EMPTY_SET
113
+ when ADDITEMS then Instructions::ADDITEMS
114
+ when FROZENSET then Instructions::FROZENSET
115
+ when NEWOBJ_EX then Instructions::NEWOBJ_EX
116
+ when STACK_GLOBAL then Instructions::STACK_GLOBAL
117
+ when MEMOIZE then Instructions::MEMOIZE
118
+ when FRAME then read_frame_instruction
195
119
  #
196
120
  # Protocol 5 instructions.
197
121
  #
198
- when 150 # BYTEARRAY8
199
- length = read_uint64_le
200
- bytes = @io.read(length)
201
-
202
- Instructions::ByteArray8.new(length,bytes)
203
- when 151 # NEXT_BUFFER
204
- Instructions::NEXT_BUFFER
205
- when 152 # READONLY_BUFFER
206
- Instructions::READONLY_BUFFER
122
+ when BYTEARRAY8 then read_bytearray8_instruction
123
+ when NEXT_BUFFER then Instructions::NEXT_BUFFER
124
+ when READONLY_BUFFER then Instructions::READONLY_BUFFER
207
125
  else
208
126
  raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 5")
209
127
  end
@@ -213,6 +131,20 @@ module Python
213
131
  end
214
132
  end
215
133
 
134
+ #
135
+ # Reads a `BYTEARRAY8` instruction.
136
+ #
137
+ # @return [Instructions::ByteArray8]
138
+ #
139
+ # @since 0.2.0
140
+ #
141
+ def read_bytearray8_instruction
142
+ length = read_uint64_le
143
+ bytes = @io.read(length)
144
+
145
+ Instructions::ByteArray8.new(length,bytes)
146
+ end
147
+
216
148
  end
217
149
  end
218
150
  end
@@ -1,6 +1,6 @@
1
1
  module Python
2
2
  module Pickle
3
3
  # python-pickle version
4
- VERSION = '0.1.0'
4
+ VERSION = '0.2.0'
5
5
  end
6
6
  end
data/lib/python/pickle.rb CHANGED
@@ -28,7 +28,7 @@ module Python
28
28
  # The default protocol version to use.
29
29
  #
30
30
  # @api public
31
- DEFAULT_PROTCOL = 4
31
+ DEFAULT_PROTOCOL = 4
32
32
 
33
33
  # The highest protocol version supported.
34
34
  #
@@ -88,12 +88,25 @@ module Python
88
88
  # The explicit protocol version to use. If `nil` the protocol version will
89
89
  # be inferred by inspecting the first two bytes of the stream.
90
90
  #
91
+ # @param [Hash{Symbol => Object}] kwargs
92
+ # Additional keyword arguments.
93
+ #
94
+ # @option kwargs [Hash{Integer => Object}] :extensions
95
+ # A Hash of registered extension IDs and their Objects.
96
+ #
97
+ # @option kwargs [Hash{String => Hash{String => Class,Method}}] :constants
98
+ # An optional mapping of custom Python constant names to Ruby classes
99
+ # or methods.
100
+ #
101
+ # @option kwargs [Enumerable, nil] :buffers
102
+ # An enumerable list of out-of-band buffers.
103
+ #
91
104
  # @api public
92
105
  #
93
- def self.load(data,**kwargs)
106
+ def self.load(data, protocol: nil, **kwargs)
94
107
  deserializer = Deserializer.new(**kwargs)
95
108
 
96
- parse(data) do |instruction|
109
+ parse(data, protocol: protocol) do |instruction|
97
110
  status, object = deserializer.execute(instruction)
98
111
 
99
112
  if status == :halt
@@ -110,11 +123,30 @@ module Python
110
123
  # @param [String] path
111
124
  # The path of the file.
112
125
  #
126
+ # @param [Hash{Symbol => Object}] kwargs
127
+ # Additional keyword arguments.
128
+ #
129
+ # @option kwargs [Hash{Integer => Object}] :extensions
130
+ # A Hash of registered extension IDs and their Objects.
131
+ #
132
+ # @option kwargs [Hash{String => Hash{String => Class,Method}}] :constants
133
+ # An optional mapping of custom Python constant names to Ruby classes
134
+ # or methods.
135
+ #
136
+ # @option kwargs [Enumerable, nil] :buffers
137
+ # An enumerable list of out-of-band buffers.
138
+ #
113
139
  # @return [Object]
114
140
  # The deserialized object.
115
141
  #
116
142
  def self.load_file(path,**kwargs)
117
- load(File.open(path,'rb'),**kwargs)
143
+ result = nil
144
+
145
+ File.open(path,'rb') do |file|
146
+ result = load(file,**kwargs)
147
+ end
148
+
149
+ return result
118
150
  end
119
151
 
120
152
  #
@@ -129,12 +161,12 @@ module Python
129
161
  # @param [Integer] protocol
130
162
  # The desired Python Pickle protocol to use.
131
163
  #
164
+ # @note serializing is currently not supported.
165
+ #
132
166
  # @api public
133
167
  #
134
168
  def self.dump(object,output=nil, protocol: DEFAULT_PROTOCOL)
135
- if (protocol < 0) || (protocol > HIGHEST_PROTOCOL)
136
- raise(ArgumentError,"protocol must be between 0 or #{HIGHEST_PROTOCOL}, but was #{protocol.inspect}")
137
- end
169
+ raise(NotImplementedError,"pickle serializing is currently not supported")
138
170
  end
139
171
 
140
172
  #
@@ -157,38 +189,38 @@ module Python
157
189
 
158
190
  begin
159
191
  case opcode
160
- when 0x80 # PROTO (added in protocol 2)
192
+ when Protocol2::PROTO
161
193
  version = io.getbyte
162
194
  io.ungetbyte(version)
163
195
  return version
164
- when 48, # POP (protocol 0)
165
- 50, # DUP (protocol 0)
166
- 70, # FLOAT (protocol 0)
167
- 83, # STRING (protocol 0)
168
- 86, # UNICODE (protocol 0)
169
- 100, # DICT (protocol 0)
170
- 103, # GET (protocol 0)
171
- 108, # LIST (protocol 0)
172
- 112 # PUT (protocol 0)
196
+ when Protocol0::POP,
197
+ Protocol0::DUP,
198
+ Protocol0::FLOAT,
199
+ Protocol0::STRING,
200
+ Protocol0::UNICODE,
201
+ Protocol0::DICT,
202
+ Protocol0::GET,
203
+ Protocol0::LIST,
204
+ Protocol0::PUT
173
205
  0
174
- when 41, # EMPTY_TUPLE (protocol 1)
175
- 71, # BINFLOAT (protocol 1)
176
- 75, # BININT1 (protocol 1)
177
- 84, # BINSTRING (protocol 1)
178
- 85, # SHORT_BINSTRING (protocol 1)
179
- 88, # BINUNICODE (protocol 1)
180
- 93, # EMPTY_LIST (protocol 1)
181
- 101, # APPENDS (protocol 1)
182
- 113, # BINPUT (protocol 1)
183
- 117, # SETITEMS (protocol 1)
184
- 125 # EMPTY_DICT (protocol 1)
206
+ when Protocol1::EMPTY_TUPLE,
207
+ Protocol1::BINFLOAT,
208
+ Protocol1::BININT1,
209
+ Protocol1::BINSTRING,
210
+ Protocol1::SHORT_BINSTRING,
211
+ Protocol1::BINUNICODE,
212
+ Protocol1::EMPTY_LIST,
213
+ Protocol1::APPENDS,
214
+ Protocol1::BINPUT,
215
+ Protocol1::SETITEMS,
216
+ Protocol1::EMPTY_DICT
185
217
  1
186
- when 46 # STOP
218
+ when Protocol0::STOP
187
219
  # if we've read all the way to the end of the stream and still cannot
188
220
  # find any protocol 0 or protocol 1 specific opcodes, assume protocol 0
189
221
  0
190
- when 73, # INT (identical in both protocol 0 and 1)
191
- 76 # LONG (identical in both protocol 0 and 1)
222
+ when Protocol0::INT, # identical in both protocol 0 and 1
223
+ Protocol0::LONG # identical in both protocol 0 and 1
192
224
  chars = io.gets
193
225
 
194
226
  begin
@@ -196,15 +228,15 @@ module Python
196
228
  ensure
197
229
  chars.each_byte.reverse_each { |b| io.ungetbyte(b) }
198
230
  end
199
- when 40, # MARK (identical in both protocol 0 and 1)
200
- 78, # NONE (identical in both protocol 0 and 1)
201
- 82, # REDUCE (identical in both protocol 0 and 1)
202
- 97, # APPEND (identical in both protocol 0 and 1)
203
- 98, # BUILD (identical in both protocol 0 and 1)
204
- 115, # SETITEM (identical in both protocol 0 and 1)
205
- 116 # TUPLE (identical in both protocol 0 and 1)
231
+ when Protocol0::MARK, # identical in both protocol 0 and 1
232
+ Protocol0::NONE, # identical in both protocol 0 and 1
233
+ Protocol0::REDUCE, # identical in both protocol 0 and 1
234
+ Protocol0::APPEND, # identical in both protocol 0 and 1
235
+ Protocol0::BUILD, # identical in both protocol 0 and 1
236
+ Protocol0::SETITEM, # identical in both protocol 0 and 1
237
+ Protocol0::TUPLE # identical in both protocol 0 and 1
206
238
  infer_protocol_version(io)
207
- when 99 # GLOBAL
239
+ when Protocol0::GLOBAL
208
240
  first_nl_string = io.gets
209
241
  second_nl_string = io.gets
210
242