unpickle 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +37 -0
- data/Rakefile +8 -0
- data/lib/unpickle.rb +213 -0
- data/test/test_unpickle.rb +99 -0
- metadata +49 -0
data/README.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# unpickle
|
2
|
+
|
3
|
+
This is a very limited tool to unpickle python 'pickle' objects.
|
4
|
+
|
5
|
+
# Usage
|
6
|
+
|
7
|
+
require 'unpickle'
|
8
|
+
|
9
|
+
fh = File.open('some.pickle')
|
10
|
+
o = unpickle(fh.read)
|
11
|
+
|
12
|
+
unpickle() will raise Unpickle::UnpickleException if it doesn't support an
|
13
|
+
opcode in the picklestream, or encounters some kind of problem or invalid
|
14
|
+
sequence.
|
15
|
+
|
16
|
+
# Limitations
|
17
|
+
|
18
|
+
Currently unpickle only supports protocol 0.
|
19
|
+
|
20
|
+
It only supports Integers (not Longs), Strings (non-unicode),
|
21
|
+
booleans, dictionaries, lists, tuples and None.
|
22
|
+
|
23
|
+
Tuples will be returned as a frozen array.
|
24
|
+
|
25
|
+
None will be returned as nil.
|
26
|
+
|
27
|
+
# TODO
|
28
|
+
|
29
|
+
* Support more of protocol 0.
|
30
|
+
* Support newer protocols.
|
31
|
+
* Test object identity mapping.
|
32
|
+
* Package as a gem
|
33
|
+
* fix namespace.
|
34
|
+
|
35
|
+
# Author
|
36
|
+
|
37
|
+
Chris Collins <chris.collins@anchor.net.au>
|
data/Rakefile
ADDED
data/lib/unpickle.rb
ADDED
@@ -0,0 +1,213 @@
|
|
1
|
+
# unpickle.rb
|
2
|
+
#
|
3
|
+
# baby-unpickler to handle limited protocol 0 crap.
|
4
|
+
#
|
5
|
+
# vim:et sts=4 sw=4 ts=8:
|
6
|
+
#
|
7
|
+
module Unpickle
|
8
|
+
class UnpickleException < RuntimeError
|
9
|
+
end
|
10
|
+
|
11
|
+
class Mark
|
12
|
+
end
|
13
|
+
|
14
|
+
class PickleMachine
|
15
|
+
def initialize(input)
|
16
|
+
@stack = []
|
17
|
+
@memo = {}
|
18
|
+
@input = input
|
19
|
+
@idx = 0
|
20
|
+
end
|
21
|
+
|
22
|
+
def at_end?
|
23
|
+
@idx >= @input.length
|
24
|
+
end
|
25
|
+
|
26
|
+
def next_char
|
27
|
+
rv = @input[@idx..@idx]
|
28
|
+
@idx += 1
|
29
|
+
return rv
|
30
|
+
end
|
31
|
+
|
32
|
+
def peek_char
|
33
|
+
@input[@idx..@idx]
|
34
|
+
end
|
35
|
+
|
36
|
+
def read_int
|
37
|
+
strout = ""
|
38
|
+
while peek_char != "\n"
|
39
|
+
strout += next_char
|
40
|
+
end
|
41
|
+
next_char
|
42
|
+
case strout
|
43
|
+
when '00'
|
44
|
+
return false
|
45
|
+
when '01'
|
46
|
+
return true
|
47
|
+
else
|
48
|
+
return strout.to_i
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def marker
|
53
|
+
idx = @stack.length-1
|
54
|
+
while idx >= 0
|
55
|
+
if @stack[idx].is_a?(Mark)
|
56
|
+
return idx
|
57
|
+
end
|
58
|
+
idx -= 1
|
59
|
+
end
|
60
|
+
raise UnpickleException, "Couldn't find Mark"
|
61
|
+
end
|
62
|
+
|
63
|
+
# read from the input stream to read the python string.
|
64
|
+
#
|
65
|
+
# returns the value.
|
66
|
+
def read_string
|
67
|
+
strout = ''
|
68
|
+
if next_char != '\''
|
69
|
+
raise UnpickleException, "Couldn't find leading quote for string"
|
70
|
+
end
|
71
|
+
while not at_end?
|
72
|
+
c = next_char
|
73
|
+
case c
|
74
|
+
when "\\"
|
75
|
+
opt = next_char
|
76
|
+
case opt
|
77
|
+
when 'x'
|
78
|
+
num = ''
|
79
|
+
while peek_char.match(/[\dA-Fa-f]/)
|
80
|
+
num += next_char
|
81
|
+
if num.length >= 2
|
82
|
+
break
|
83
|
+
end
|
84
|
+
end
|
85
|
+
unless (1..2).include?(num.length)
|
86
|
+
raise UnpickleException, "Bad \\x sequence in string"
|
87
|
+
end
|
88
|
+
strout += num.to_i(16).chr
|
89
|
+
when '0'
|
90
|
+
num = ''
|
91
|
+
while peek_char.match(/[0-7]/)
|
92
|
+
num += next_char
|
93
|
+
if num.length >= 3
|
94
|
+
break
|
95
|
+
end
|
96
|
+
end
|
97
|
+
unless (1..3).include?(num.length)
|
98
|
+
raise UnpickleException, "Bad \\0 sequence in string"
|
99
|
+
end
|
100
|
+
strout += num.to_i(8).chr
|
101
|
+
when 'n'
|
102
|
+
strout += "\n"
|
103
|
+
when "\\"
|
104
|
+
strout += "\\"
|
105
|
+
when 't'
|
106
|
+
strout += "\t"
|
107
|
+
when "'"
|
108
|
+
strout += "'"
|
109
|
+
else
|
110
|
+
raise UnpickleException, "Unexpected \\ escape: \\#{opt}"
|
111
|
+
end
|
112
|
+
when "'"
|
113
|
+
# valid end of string...
|
114
|
+
break
|
115
|
+
else
|
116
|
+
strout += c
|
117
|
+
end
|
118
|
+
end
|
119
|
+
if next_char != "\n"
|
120
|
+
raise UnpickleException, "Expected \\n after string"
|
121
|
+
end
|
122
|
+
return strout
|
123
|
+
end
|
124
|
+
|
125
|
+
def unpickle
|
126
|
+
while not at_end?
|
127
|
+
op = next_char
|
128
|
+
case op
|
129
|
+
when '(' # MARK
|
130
|
+
@stack.push(Mark.new)
|
131
|
+
when 'd' # DICT
|
132
|
+
newdict = {}
|
133
|
+
while true
|
134
|
+
if @stack.empty?
|
135
|
+
raise UnpickleException, "Stack empty during 'd'"
|
136
|
+
end
|
137
|
+
v = @stack.pop
|
138
|
+
if v.is_a?(Mark)
|
139
|
+
break
|
140
|
+
end
|
141
|
+
if @stack.empty?
|
142
|
+
raise UnpickleException, "Stack empty during 'd'"
|
143
|
+
end
|
144
|
+
k = stack.pop
|
145
|
+
if k.is_a?(Mark)
|
146
|
+
raise UnpickleException, "Odd number of elements during 'd' stack walk"
|
147
|
+
end
|
148
|
+
newdict[k] = v
|
149
|
+
end
|
150
|
+
@stack.push(newdict)
|
151
|
+
when 'S' # STRING
|
152
|
+
newstr = read_string
|
153
|
+
@stack.push(newstr)
|
154
|
+
when 'p' # PUT (string)
|
155
|
+
index = read_int
|
156
|
+
@memo[index] = @stack[-1]
|
157
|
+
when 'g' # GET (string)
|
158
|
+
index = read_int
|
159
|
+
@stack.push(@memo[index])
|
160
|
+
when 'I' # INT
|
161
|
+
intarg = read_int
|
162
|
+
@stack.push(intarg)
|
163
|
+
when 's' # SETITEM
|
164
|
+
value = @stack.pop
|
165
|
+
key = @stack.pop
|
166
|
+
dict = @stack[-1]
|
167
|
+
dict[key] = value
|
168
|
+
when 't' # TUPLE
|
169
|
+
midx = marker
|
170
|
+
tuple = @stack[midx+1..-1]
|
171
|
+
@stack = @stack[0...midx]
|
172
|
+
tuple.freeze
|
173
|
+
@stack.push(tuple)
|
174
|
+
when 'l' # LIST
|
175
|
+
midx = marker
|
176
|
+
list = @stack[midx+1..-1]
|
177
|
+
@stack = @stack[0...midx]
|
178
|
+
@stack.push(list)
|
179
|
+
when 'N' # NONE
|
180
|
+
@stack.push(nil)
|
181
|
+
when 'a' # APPEND
|
182
|
+
e = @stack.pop
|
183
|
+
@stack[-1].push(e)
|
184
|
+
when '0' # POP
|
185
|
+
@stack.pop
|
186
|
+
when '2' # DUP
|
187
|
+
@stack.push(@stack[-1])
|
188
|
+
when '.' # STOP
|
189
|
+
return @stack.pop
|
190
|
+
else
|
191
|
+
raise UnpickleException, "Unsupported unpickle operation '#{op}'"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
raise UnpickleException, "Hit end of input stream"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# Unpickle the python object pickled into str.
|
199
|
+
#
|
200
|
+
# At this time, this ONLY works with a limited set of constructs
|
201
|
+
# (dicts, lists, tuples, strings, ints, bools, None) and only with
|
202
|
+
# protocol 0.
|
203
|
+
#
|
204
|
+
# Object uniqueness should obey the python semantics but is largely
|
205
|
+
# untested.
|
206
|
+
#
|
207
|
+
# Raises an UnpickleException if anything goes wrong.
|
208
|
+
def Unpickle.loads(str)
|
209
|
+
p = Unpickle::PickleMachine.new(str)
|
210
|
+
return p.unpickle
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# tests for Unpickle.loads.rb
|
2
|
+
#
|
3
|
+
# vim:et sts=4 sw=4 ts=8:
|
4
|
+
require 'test/unit'
|
5
|
+
require 'unpickle'
|
6
|
+
|
7
|
+
class UnpickleTests < Test::Unit::TestCase
|
8
|
+
# all pickle_str's were generated using python 2.5
|
9
|
+
# and pickle.dumps(...., 0)
|
10
|
+
|
11
|
+
def test_simple_string
|
12
|
+
# >>> pickle_obj = 'abcdefg'
|
13
|
+
pickle_str = "S'abcdefg'\np0\n."
|
14
|
+
|
15
|
+
o = Unpickle.loads(pickle_str)
|
16
|
+
|
17
|
+
assert_equal('abcdefg', o)
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_simple_bool_true
|
21
|
+
# >>> pickle_obj = True
|
22
|
+
pickle_str = "I01\n."
|
23
|
+
|
24
|
+
o = Unpickle.loads(pickle_str)
|
25
|
+
|
26
|
+
assert_equal(true, o)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_simple_bool_false
|
30
|
+
# >>> pickle_obj = False
|
31
|
+
pickle_str = "I00\n."
|
32
|
+
|
33
|
+
o = Unpickle.loads(pickle_str)
|
34
|
+
|
35
|
+
assert_equal(false, o)
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_simple_list
|
39
|
+
# >>> pickle_obj = [1,2,3]
|
40
|
+
pickle_str = "(lp0\nI1\naI2\naI3\na."
|
41
|
+
|
42
|
+
o = Unpickle.loads(pickle_str)
|
43
|
+
|
44
|
+
assert_equal([1,2,3], o)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_simple_dict
|
48
|
+
# >>> pickle_obj = {'a': 1, 'b': 2, 'c': 3}
|
49
|
+
pickle_str = "(dp0\nS'a'\np1\nI1\nsS'c'\np2\nI3\nsS'b'\np3\nI2\ns."
|
50
|
+
|
51
|
+
o = Unpickle.loads(pickle_str)
|
52
|
+
|
53
|
+
assert_equal({'a' => 1, 'b' => 2, 'c' => 3}, o)
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_simple_int_zero
|
57
|
+
# >>> pickle_obj = 0
|
58
|
+
pickle_str = "I0\n."
|
59
|
+
|
60
|
+
o = Unpickle.loads(pickle_str)
|
61
|
+
|
62
|
+
assert_equal(0, o)
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_none
|
66
|
+
# >>> pickle_obj = None
|
67
|
+
pickle_str = 'N.'
|
68
|
+
|
69
|
+
o = Unpickle.loads(pickle_str)
|
70
|
+
|
71
|
+
assert_equal(nil, o)
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_mixed
|
75
|
+
# >>> pickle_obj = {'a': [1,2,3,4], 'b': (1,2,3), 'c': None, 'd': 'abcd'}
|
76
|
+
pickle_str = "(dp0\nS'a'\np1\n(lp2\nI1\naI2\naI3\naI4\nasS'c'\np3\nNsS'b'\np4\n(I1\nI2\nI3\ntp5\nsS'd'\np6\nS'abcd'\np7\ns."
|
77
|
+
|
78
|
+
o = Unpickle.loads(pickle_str)
|
79
|
+
|
80
|
+
assert_equal({'a' => [1,2,3,4], 'b' => [1,2,3], 'c' => nil, 'd' => 'abcd'}, o)
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_recursive
|
84
|
+
# >>> aobj = {}
|
85
|
+
# >>> bobj = {'a': aobj}
|
86
|
+
# >>> aobj['b'] = bobj
|
87
|
+
# >>> pickle.dumps(aobj, 0)
|
88
|
+
pickle_str = "(dp0\nS'b'\np1\n(dp2\nS'a'\np3\ng0\nss."
|
89
|
+
|
90
|
+
o = Unpickle.loads(pickle_str)
|
91
|
+
|
92
|
+
assert(o.is_a?(Hash))
|
93
|
+
assert(o.include?('b'))
|
94
|
+
assert(o['b'].is_a?(Hash))
|
95
|
+
assert(o['b'].include?('a'))
|
96
|
+
# check that the object identity is correctly respected.
|
97
|
+
assert(o.object_id == o['b']['a'].object_id)
|
98
|
+
end
|
99
|
+
end
|
metadata
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: unpickle
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Chris Collins
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-06-01 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: A library to unpickle simple python objects directly into ruby
|
15
|
+
email: kuroneko-rubygems@sysadninjas.net
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/unpickle.rb
|
21
|
+
- README.md
|
22
|
+
- Rakefile
|
23
|
+
- test/test_unpickle.rb
|
24
|
+
homepage: http://github.com/kuroneko/unpickle-rb
|
25
|
+
licenses: []
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ! '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
requirements: []
|
43
|
+
rubyforge_project:
|
44
|
+
rubygems_version: 1.8.15
|
45
|
+
signing_key:
|
46
|
+
specification_version: 3
|
47
|
+
summary: unpickle python objects in ruby
|
48
|
+
test_files:
|
49
|
+
- test/test_unpickle.rb
|