unpickle 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/README.md +37 -0
  2. data/Rakefile +8 -0
  3. data/lib/unpickle.rb +213 -0
  4. data/test/test_unpickle.rb +99 -0
  5. metadata +49 -0
@@ -0,0 +1,37 @@
1
+ # unpickle
2
+
3
+ This is a very limited tool to unpickle python 'pickle' objects.
4
+
5
+ # Usage
6
+
7
+ require 'unpickle'
8
+
9
+ fh = File.open('some.pickle')
10
+ o = unpickle(fh.read)
11
+
12
+ unpickle() will raise Unpickle::UnpickleException if it doesn't support an
13
+ opcode in the picklestream, or encounters some kind of problem or invalid
14
+ sequence.
15
+
16
+ # Limitations
17
+
18
+ Currently unpickle only supports protocol 0.
19
+
20
+ It only supports Integers (not Longs), Strings (non-unicode),
21
+ booleans, dictionaries, lists, tuples and None.
22
+
23
+ Tuples will be returned as a frozen array.
24
+
25
+ None will be returned as nil.
26
+
27
+ # TODO
28
+
29
+ * Support more of protocol 0.
30
+ * Support newer protocols.
31
+ * Test object identity mapping.
32
+ * Package as a gem
33
+ * fix namespace.
34
+
35
+ # Author
36
+
37
+ Chris Collins <chris.collins@anchor.net.au>
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc "Run tests"
8
+ task :default => :test
@@ -0,0 +1,213 @@
1
+ # unpickle.rb
2
+ #
3
+ # baby-unpickler to handle limited protocol 0 crap.
4
+ #
5
+ # vim:et sts=4 sw=4 ts=8:
6
+ #
7
+ module Unpickle
8
+ class UnpickleException < RuntimeError
9
+ end
10
+
11
+ class Mark
12
+ end
13
+
14
+ class PickleMachine
15
+ def initialize(input)
16
+ @stack = []
17
+ @memo = {}
18
+ @input = input
19
+ @idx = 0
20
+ end
21
+
22
+ def at_end?
23
+ @idx >= @input.length
24
+ end
25
+
26
+ def next_char
27
+ rv = @input[@idx..@idx]
28
+ @idx += 1
29
+ return rv
30
+ end
31
+
32
+ def peek_char
33
+ @input[@idx..@idx]
34
+ end
35
+
36
+ def read_int
37
+ strout = ""
38
+ while peek_char != "\n"
39
+ strout += next_char
40
+ end
41
+ next_char
42
+ case strout
43
+ when '00'
44
+ return false
45
+ when '01'
46
+ return true
47
+ else
48
+ return strout.to_i
49
+ end
50
+ end
51
+
52
+ def marker
53
+ idx = @stack.length-1
54
+ while idx >= 0
55
+ if @stack[idx].is_a?(Mark)
56
+ return idx
57
+ end
58
+ idx -= 1
59
+ end
60
+ raise UnpickleException, "Couldn't find Mark"
61
+ end
62
+
63
+ # read from the input stream to read the python string.
64
+ #
65
+ # returns the value.
66
+ def read_string
67
+ strout = ''
68
+ if next_char != '\''
69
+ raise UnpickleException, "Couldn't find leading quote for string"
70
+ end
71
+ while not at_end?
72
+ c = next_char
73
+ case c
74
+ when "\\"
75
+ opt = next_char
76
+ case opt
77
+ when 'x'
78
+ num = ''
79
+ while peek_char.match(/[\dA-Fa-f]/)
80
+ num += next_char
81
+ if num.length >= 2
82
+ break
83
+ end
84
+ end
85
+ unless (1..2).include?(num.length)
86
+ raise UnpickleException, "Bad \\x sequence in string"
87
+ end
88
+ strout += num.to_i(16).chr
89
+ when '0'
90
+ num = ''
91
+ while peek_char.match(/[0-7]/)
92
+ num += next_char
93
+ if num.length >= 3
94
+ break
95
+ end
96
+ end
97
+ unless (1..3).include?(num.length)
98
+ raise UnpickleException, "Bad \\0 sequence in string"
99
+ end
100
+ strout += num.to_i(8).chr
101
+ when 'n'
102
+ strout += "\n"
103
+ when "\\"
104
+ strout += "\\"
105
+ when 't'
106
+ strout += "\t"
107
+ when "'"
108
+ strout += "'"
109
+ else
110
+ raise UnpickleException, "Unexpected \\ escape: \\#{opt}"
111
+ end
112
+ when "'"
113
+ # valid end of string...
114
+ break
115
+ else
116
+ strout += c
117
+ end
118
+ end
119
+ if next_char != "\n"
120
+ raise UnpickleException, "Expected \\n after string"
121
+ end
122
+ return strout
123
+ end
124
+
125
+ def unpickle
126
+ while not at_end?
127
+ op = next_char
128
+ case op
129
+ when '(' # MARK
130
+ @stack.push(Mark.new)
131
+ when 'd' # DICT
132
+ newdict = {}
133
+ while true
134
+ if @stack.empty?
135
+ raise UnpickleException, "Stack empty during 'd'"
136
+ end
137
+ v = @stack.pop
138
+ if v.is_a?(Mark)
139
+ break
140
+ end
141
+ if @stack.empty?
142
+ raise UnpickleException, "Stack empty during 'd'"
143
+ end
144
+ k = stack.pop
145
+ if k.is_a?(Mark)
146
+ raise UnpickleException, "Odd number of elements during 'd' stack walk"
147
+ end
148
+ newdict[k] = v
149
+ end
150
+ @stack.push(newdict)
151
+ when 'S' # STRING
152
+ newstr = read_string
153
+ @stack.push(newstr)
154
+ when 'p' # PUT (string)
155
+ index = read_int
156
+ @memo[index] = @stack[-1]
157
+ when 'g' # GET (string)
158
+ index = read_int
159
+ @stack.push(@memo[index])
160
+ when 'I' # INT
161
+ intarg = read_int
162
+ @stack.push(intarg)
163
+ when 's' # SETITEM
164
+ value = @stack.pop
165
+ key = @stack.pop
166
+ dict = @stack[-1]
167
+ dict[key] = value
168
+ when 't' # TUPLE
169
+ midx = marker
170
+ tuple = @stack[midx+1..-1]
171
+ @stack = @stack[0...midx]
172
+ tuple.freeze
173
+ @stack.push(tuple)
174
+ when 'l' # LIST
175
+ midx = marker
176
+ list = @stack[midx+1..-1]
177
+ @stack = @stack[0...midx]
178
+ @stack.push(list)
179
+ when 'N' # NONE
180
+ @stack.push(nil)
181
+ when 'a' # APPEND
182
+ e = @stack.pop
183
+ @stack[-1].push(e)
184
+ when '0' # POP
185
+ @stack.pop
186
+ when '2' # DUP
187
+ @stack.push(@stack[-1])
188
+ when '.' # STOP
189
+ return @stack.pop
190
+ else
191
+ raise UnpickleException, "Unsupported unpickle operation '#{op}'"
192
+ end
193
+ end
194
+ raise UnpickleException, "Hit end of input stream"
195
+ end
196
+ end
197
+
198
+ # Unpickle the python object pickled into str.
199
+ #
200
+ # At this time, this ONLY works with a limited set of constructs
201
+ # (dicts, lists, tuples, strings, ints, bools, None) and only with
202
+ # protocol 0.
203
+ #
204
+ # Object uniqueness should obey the python semantics but is largely
205
+ # untested.
206
+ #
207
+ # Raises an UnpickleException if anything goes wrong.
208
+ def Unpickle.loads(str)
209
+ p = Unpickle::PickleMachine.new(str)
210
+ return p.unpickle
211
+ end
212
+ end
213
+
@@ -0,0 +1,99 @@
1
+ # tests for Unpickle.loads.rb
2
+ #
3
+ # vim:et sts=4 sw=4 ts=8:
4
+ require 'test/unit'
5
+ require 'unpickle'
6
+
7
+ class UnpickleTests < Test::Unit::TestCase
8
+ # all pickle_str's were generated using python 2.5
9
+ # and pickle.dumps(...., 0)
10
+
11
+ def test_simple_string
12
+ # >>> pickle_obj = 'abcdefg'
13
+ pickle_str = "S'abcdefg'\np0\n."
14
+
15
+ o = Unpickle.loads(pickle_str)
16
+
17
+ assert_equal('abcdefg', o)
18
+ end
19
+
20
+ def test_simple_bool_true
21
+ # >>> pickle_obj = True
22
+ pickle_str = "I01\n."
23
+
24
+ o = Unpickle.loads(pickle_str)
25
+
26
+ assert_equal(true, o)
27
+ end
28
+
29
+ def test_simple_bool_false
30
+ # >>> pickle_obj = False
31
+ pickle_str = "I00\n."
32
+
33
+ o = Unpickle.loads(pickle_str)
34
+
35
+ assert_equal(false, o)
36
+ end
37
+
38
+ def test_simple_list
39
+ # >>> pickle_obj = [1,2,3]
40
+ pickle_str = "(lp0\nI1\naI2\naI3\na."
41
+
42
+ o = Unpickle.loads(pickle_str)
43
+
44
+ assert_equal([1,2,3], o)
45
+ end
46
+
47
+ def test_simple_dict
48
+ # >>> pickle_obj = {'a': 1, 'b': 2, 'c': 3}
49
+ pickle_str = "(dp0\nS'a'\np1\nI1\nsS'c'\np2\nI3\nsS'b'\np3\nI2\ns."
50
+
51
+ o = Unpickle.loads(pickle_str)
52
+
53
+ assert_equal({'a' => 1, 'b' => 2, 'c' => 3}, o)
54
+ end
55
+
56
+ def test_simple_int_zero
57
+ # >>> pickle_obj = 0
58
+ pickle_str = "I0\n."
59
+
60
+ o = Unpickle.loads(pickle_str)
61
+
62
+ assert_equal(0, o)
63
+ end
64
+
65
+ def test_none
66
+ # >>> pickle_obj = None
67
+ pickle_str = 'N.'
68
+
69
+ o = Unpickle.loads(pickle_str)
70
+
71
+ assert_equal(nil, o)
72
+ end
73
+
74
+ def test_mixed
75
+ # >>> pickle_obj = {'a': [1,2,3,4], 'b': (1,2,3), 'c': None, 'd': 'abcd'}
76
+ pickle_str = "(dp0\nS'a'\np1\n(lp2\nI1\naI2\naI3\naI4\nasS'c'\np3\nNsS'b'\np4\n(I1\nI2\nI3\ntp5\nsS'd'\np6\nS'abcd'\np7\ns."
77
+
78
+ o = Unpickle.loads(pickle_str)
79
+
80
+ assert_equal({'a' => [1,2,3,4], 'b' => [1,2,3], 'c' => nil, 'd' => 'abcd'}, o)
81
+ end
82
+
83
+ def test_recursive
84
+ # >>> aobj = {}
85
+ # >>> bobj = {'a': aobj}
86
+ # >>> aobj['b'] = bobj
87
+ # >>> pickle.dumps(aobj, 0)
88
+ pickle_str = "(dp0\nS'b'\np1\n(dp2\nS'a'\np3\ng0\nss."
89
+
90
+ o = Unpickle.loads(pickle_str)
91
+
92
+ assert(o.is_a?(Hash))
93
+ assert(o.include?('b'))
94
+ assert(o['b'].is_a?(Hash))
95
+ assert(o['b'].include?('a'))
96
+ # check that the object identity is correctly respected.
97
+ assert(o.object_id == o['b']['a'].object_id)
98
+ end
99
+ end
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: unpickle
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Chris Collins
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-01 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A library to unpickle simple python objects directly into ruby
15
+ email: kuroneko-rubygems@sysadninjas.net
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/unpickle.rb
21
+ - README.md
22
+ - Rakefile
23
+ - test/test_unpickle.rb
24
+ homepage: http://github.com/kuroneko/unpickle-rb
25
+ licenses: []
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 1.8.15
45
+ signing_key:
46
+ specification_version: 3
47
+ summary: unpickle python objects in ruby
48
+ test_files:
49
+ - test/test_unpickle.rb