python-pickle 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.document +3 -0
  3. data/.github/workflows/ruby.yml +27 -0
  4. data/.gitignore +5 -0
  5. data/.rspec +1 -0
  6. data/.yardopts +1 -0
  7. data/ChangeLog.md +14 -0
  8. data/Gemfile +15 -0
  9. data/LICENSE.txt +20 -0
  10. data/README.md +149 -0
  11. data/Rakefile +13 -0
  12. data/gemspec.yml +25 -0
  13. data/lib/python/pickle/byte_array.rb +40 -0
  14. data/lib/python/pickle/deserializer.rb +595 -0
  15. data/lib/python/pickle/exceptions.rb +12 -0
  16. data/lib/python/pickle/instruction.rb +52 -0
  17. data/lib/python/pickle/instructions/add_items.rb +26 -0
  18. data/lib/python/pickle/instructions/append.rb +24 -0
  19. data/lib/python/pickle/instructions/appends.rb +26 -0
  20. data/lib/python/pickle/instructions/bin_bytes.rb +32 -0
  21. data/lib/python/pickle/instructions/bin_bytes8.rb +32 -0
  22. data/lib/python/pickle/instructions/bin_float.rb +29 -0
  23. data/lib/python/pickle/instructions/bin_get.rb +27 -0
  24. data/lib/python/pickle/instructions/bin_int1.rb +29 -0
  25. data/lib/python/pickle/instructions/bin_put.rb +29 -0
  26. data/lib/python/pickle/instructions/bin_string.rb +32 -0
  27. data/lib/python/pickle/instructions/bin_unicode.rb +32 -0
  28. data/lib/python/pickle/instructions/bin_unicode8.rb +32 -0
  29. data/lib/python/pickle/instructions/build.rb +24 -0
  30. data/lib/python/pickle/instructions/byte_array8.rb +32 -0
  31. data/lib/python/pickle/instructions/dict.rb +17 -0
  32. data/lib/python/pickle/instructions/dup.rb +24 -0
  33. data/lib/python/pickle/instructions/empty_dict.rb +26 -0
  34. data/lib/python/pickle/instructions/empty_list.rb +26 -0
  35. data/lib/python/pickle/instructions/empty_set.rb +26 -0
  36. data/lib/python/pickle/instructions/empty_tuple.rb +26 -0
  37. data/lib/python/pickle/instructions/ext1.rb +29 -0
  38. data/lib/python/pickle/instructions/ext2.rb +29 -0
  39. data/lib/python/pickle/instructions/ext4.rb +29 -0
  40. data/lib/python/pickle/instructions/float.rb +24 -0
  41. data/lib/python/pickle/instructions/frame.rb +29 -0
  42. data/lib/python/pickle/instructions/frozen_set.rb +26 -0
  43. data/lib/python/pickle/instructions/get.rb +27 -0
  44. data/lib/python/pickle/instructions/global.rb +62 -0
  45. data/lib/python/pickle/instructions/has_length_and_value.rb +58 -0
  46. data/lib/python/pickle/instructions/has_value.rb +50 -0
  47. data/lib/python/pickle/instructions/int.rb +24 -0
  48. data/lib/python/pickle/instructions/list.rb +24 -0
  49. data/lib/python/pickle/instructions/long.rb +24 -0
  50. data/lib/python/pickle/instructions/long1.rb +32 -0
  51. data/lib/python/pickle/instructions/long4.rb +32 -0
  52. data/lib/python/pickle/instructions/long_bin_get.rb +27 -0
  53. data/lib/python/pickle/instructions/mark.rb +24 -0
  54. data/lib/python/pickle/instructions/memoize.rb +26 -0
  55. data/lib/python/pickle/instructions/new_false.rb +24 -0
  56. data/lib/python/pickle/instructions/new_obj.rb +26 -0
  57. data/lib/python/pickle/instructions/new_obj_ex.rb +26 -0
  58. data/lib/python/pickle/instructions/new_true.rb +24 -0
  59. data/lib/python/pickle/instructions/next_buffer.rb +26 -0
  60. data/lib/python/pickle/instructions/none.rb +24 -0
  61. data/lib/python/pickle/instructions/pop.rb +24 -0
  62. data/lib/python/pickle/instructions/pop_mark.rb +24 -0
  63. data/lib/python/pickle/instructions/proto.rb +29 -0
  64. data/lib/python/pickle/instructions/put.rb +24 -0
  65. data/lib/python/pickle/instructions/readonly_buffer.rb +26 -0
  66. data/lib/python/pickle/instructions/reduce.rb +24 -0
  67. data/lib/python/pickle/instructions/set_item.rb +24 -0
  68. data/lib/python/pickle/instructions/set_items.rb +26 -0
  69. data/lib/python/pickle/instructions/short_bin_bytes.rb +32 -0
  70. data/lib/python/pickle/instructions/short_bin_string.rb +32 -0
  71. data/lib/python/pickle/instructions/short_bin_unicode.rb +32 -0
  72. data/lib/python/pickle/instructions/stack_global.rb +26 -0
  73. data/lib/python/pickle/instructions/stop.rb +24 -0
  74. data/lib/python/pickle/instructions/string.rb +24 -0
  75. data/lib/python/pickle/instructions/tuple.rb +24 -0
  76. data/lib/python/pickle/instructions/tuple1.rb +24 -0
  77. data/lib/python/pickle/instructions/tuple2.rb +24 -0
  78. data/lib/python/pickle/instructions/tuple3.rb +24 -0
  79. data/lib/python/pickle/protocol.rb +56 -0
  80. data/lib/python/pickle/protocol0.rb +399 -0
  81. data/lib/python/pickle/protocol1.rb +183 -0
  82. data/lib/python/pickle/protocol2.rb +229 -0
  83. data/lib/python/pickle/protocol3.rb +163 -0
  84. data/lib/python/pickle/protocol4.rb +285 -0
  85. data/lib/python/pickle/protocol5.rb +218 -0
  86. data/lib/python/pickle/py_class.rb +75 -0
  87. data/lib/python/pickle/py_object.rb +141 -0
  88. data/lib/python/pickle/tuple.rb +19 -0
  89. data/lib/python/pickle/version.rb +6 -0
  90. data/lib/python/pickle.rb +226 -0
  91. data/python-pickle.gemspec +62 -0
  92. data/spec/byte_array_spec.rb +54 -0
  93. data/spec/deserializer_spec.rb +1201 -0
  94. data/spec/fixtures/ascii_str_v3.pkl +0 -0
  95. data/spec/fixtures/ascii_str_v4.pkl +0 -0
  96. data/spec/fixtures/ascii_str_v5.pkl +0 -0
  97. data/spec/fixtures/bin_str_v0.pkl +3 -0
  98. data/spec/fixtures/bin_str_v1.pkl +0 -0
  99. data/spec/fixtures/bin_str_v2.pkl +0 -0
  100. data/spec/fixtures/bin_str_v3.pkl +0 -0
  101. data/spec/fixtures/bin_str_v4.pkl +0 -0
  102. data/spec/fixtures/bin_str_v5.pkl +0 -0
  103. data/spec/fixtures/bytearray_v0.pkl +10 -0
  104. data/spec/fixtures/bytearray_v1.pkl +0 -0
  105. data/spec/fixtures/bytearray_v2.pkl +0 -0
  106. data/spec/fixtures/bytearray_v3.pkl +0 -0
  107. data/spec/fixtures/bytearray_v4.pkl +0 -0
  108. data/spec/fixtures/bytearray_v5.pkl +0 -0
  109. data/spec/fixtures/class_v0.pkl +4 -0
  110. data/spec/fixtures/class_v1.pkl +0 -0
  111. data/spec/fixtures/class_v2.pkl +0 -0
  112. data/spec/fixtures/class_v3.pkl +0 -0
  113. data/spec/fixtures/class_v4.pkl +0 -0
  114. data/spec/fixtures/class_v5.pkl +0 -0
  115. data/spec/fixtures/dict_v0.pkl +6 -0
  116. data/spec/fixtures/dict_v1.pkl +0 -0
  117. data/spec/fixtures/dict_v2.pkl +0 -0
  118. data/spec/fixtures/dict_v3.pkl +0 -0
  119. data/spec/fixtures/dict_v4.pkl +0 -0
  120. data/spec/fixtures/dict_v5.pkl +0 -0
  121. data/spec/fixtures/escaped_str_v0.pkl +3 -0
  122. data/spec/fixtures/escaped_str_v1.pkl +0 -0
  123. data/spec/fixtures/escaped_str_v2.pkl +0 -0
  124. data/spec/fixtures/false_v0.pkl +2 -0
  125. data/spec/fixtures/false_v1.pkl +2 -0
  126. data/spec/fixtures/false_v2.pkl +1 -0
  127. data/spec/fixtures/false_v3.pkl +1 -0
  128. data/spec/fixtures/false_v4.pkl +1 -0
  129. data/spec/fixtures/false_v5.pkl +1 -0
  130. data/spec/fixtures/float_v0.pkl +2 -0
  131. data/spec/fixtures/float_v1.pkl +1 -0
  132. data/spec/fixtures/float_v2.pkl +1 -0
  133. data/spec/fixtures/float_v3.pkl +1 -0
  134. data/spec/fixtures/float_v4.pkl +0 -0
  135. data/spec/fixtures/float_v5.pkl +0 -0
  136. data/spec/fixtures/function_v0.pkl +4 -0
  137. data/spec/fixtures/function_v1.pkl +0 -0
  138. data/spec/fixtures/function_v2.pkl +0 -0
  139. data/spec/fixtures/function_v3.pkl +0 -0
  140. data/spec/fixtures/function_v4.pkl +0 -0
  141. data/spec/fixtures/function_v5.pkl +0 -0
  142. data/spec/fixtures/hex_str_v0.pkl +3 -0
  143. data/spec/fixtures/hex_str_v1.pkl +0 -0
  144. data/spec/fixtures/hex_str_v2.pkl +0 -0
  145. data/spec/fixtures/int_v0.pkl +2 -0
  146. data/spec/fixtures/int_v1.pkl +1 -0
  147. data/spec/fixtures/int_v2.pkl +1 -0
  148. data/spec/fixtures/int_v3.pkl +1 -0
  149. data/spec/fixtures/int_v4.pkl +1 -0
  150. data/spec/fixtures/int_v5.pkl +1 -0
  151. data/spec/fixtures/list_v0.pkl +7 -0
  152. data/spec/fixtures/list_v1.pkl +0 -0
  153. data/spec/fixtures/list_v2.pkl +0 -0
  154. data/spec/fixtures/list_v3.pkl +0 -0
  155. data/spec/fixtures/list_v4.pkl +0 -0
  156. data/spec/fixtures/list_v5.pkl +0 -0
  157. data/spec/fixtures/long_v0.pkl +2 -0
  158. data/spec/fixtures/long_v1.pkl +2 -0
  159. data/spec/fixtures/long_v2.pkl +0 -0
  160. data/spec/fixtures/long_v3.pkl +0 -0
  161. data/spec/fixtures/long_v4.pkl +0 -0
  162. data/spec/fixtures/long_v5.pkl +0 -0
  163. data/spec/fixtures/nested_dict_v0.pkl +12 -0
  164. data/spec/fixtures/nested_dict_v1.pkl +0 -0
  165. data/spec/fixtures/nested_dict_v2.pkl +0 -0
  166. data/spec/fixtures/nested_dict_v3.pkl +0 -0
  167. data/spec/fixtures/nested_dict_v4.pkl +0 -0
  168. data/spec/fixtures/nested_dict_v5.pkl +0 -0
  169. data/spec/fixtures/nested_list_v0.pkl +9 -0
  170. data/spec/fixtures/nested_list_v1.pkl +0 -0
  171. data/spec/fixtures/nested_list_v2.pkl +0 -0
  172. data/spec/fixtures/nested_list_v3.pkl +0 -0
  173. data/spec/fixtures/nested_list_v4.pkl +0 -0
  174. data/spec/fixtures/nested_list_v5.pkl +0 -0
  175. data/spec/fixtures/none_v0.pkl +1 -0
  176. data/spec/fixtures/none_v1.pkl +1 -0
  177. data/spec/fixtures/none_v2.pkl +1 -0
  178. data/spec/fixtures/none_v3.pkl +1 -0
  179. data/spec/fixtures/none_v4.pkl +1 -0
  180. data/spec/fixtures/none_v5.pkl +1 -0
  181. data/spec/fixtures/object_v0.pkl +19 -0
  182. data/spec/fixtures/object_v1.pkl +0 -0
  183. data/spec/fixtures/object_v2.pkl +0 -0
  184. data/spec/fixtures/object_v3.pkl +0 -0
  185. data/spec/fixtures/object_v4.pkl +0 -0
  186. data/spec/fixtures/object_v5.pkl +0 -0
  187. data/spec/fixtures/str_v0.pkl +3 -0
  188. data/spec/fixtures/str_v1.pkl +0 -0
  189. data/spec/fixtures/str_v2.pkl +0 -0
  190. data/spec/fixtures/str_v3.pkl +0 -0
  191. data/spec/fixtures/str_v4.pkl +0 -0
  192. data/spec/fixtures/str_v5.pkl +0 -0
  193. data/spec/fixtures/true_v0.pkl +2 -0
  194. data/spec/fixtures/true_v1.pkl +2 -0
  195. data/spec/fixtures/true_v2.pkl +1 -0
  196. data/spec/fixtures/true_v3.pkl +1 -0
  197. data/spec/fixtures/true_v4.pkl +1 -0
  198. data/spec/fixtures/true_v5.pkl +1 -0
  199. data/spec/fixtures/unicode_str_v0.pkl +3 -0
  200. data/spec/fixtures/unicode_str_v1.pkl +0 -0
  201. data/spec/fixtures/unicode_str_v2.pkl +0 -0
  202. data/spec/fixtures/unicode_str_v3.pkl +0 -0
  203. data/spec/fixtures/unicode_str_v4.pkl +0 -0
  204. data/spec/fixtures/unicode_str_v5.pkl +0 -0
  205. data/spec/generate_pickles2.py +41 -0
  206. data/spec/generate_pickles3.py +40 -0
  207. data/spec/integration/load/protocol0_spec.rb +258 -0
  208. data/spec/integration/load/protocol1_spec.rb +258 -0
  209. data/spec/integration/load/protocol2_spec.rb +258 -0
  210. data/spec/integration/load/protocol3_spec.rb +258 -0
  211. data/spec/integration/load/protocol4_spec.rb +258 -0
  212. data/spec/integration/load/protocol5_spec.rb +258 -0
  213. data/spec/integration/parse/protocol0_spec.rb +467 -0
  214. data/spec/integration/parse/protocol1_spec.rb +459 -0
  215. data/spec/integration/parse/protocol2_spec.rb +471 -0
  216. data/spec/integration/parse/protocol3_spec.rb +407 -0
  217. data/spec/integration/parse/protocol4_spec.rb +439 -0
  218. data/spec/integration/parse/protocol5_spec.rb +419 -0
  219. data/spec/pickle_spec.rb +163 -0
  220. data/spec/protocol0_read_instruction_examples.rb +211 -0
  221. data/spec/protocol0_spec.rb +445 -0
  222. data/spec/protocol1_read_instruction_examples.rb +156 -0
  223. data/spec/protocol1_spec.rb +59 -0
  224. data/spec/protocol2_read_instruction_examples.rb +135 -0
  225. data/spec/protocol2_spec.rb +128 -0
  226. data/spec/protocol3_read_instruction_examples.rb +29 -0
  227. data/spec/protocol3_spec.rb +32 -0
  228. data/spec/protocol4_read_instruction_examples.rb +142 -0
  229. data/spec/protocol4_spec.rb +58 -0
  230. data/spec/protocol5_spec.rb +68 -0
  231. data/spec/py_class_spec.rb +62 -0
  232. data/spec/py_object_spec.rb +149 -0
  233. data/spec/spec_helper.rb +3 -0
  234. data/spec/tuple_spec.rb +18 -0
  235. metadata +325 -0
@@ -0,0 +1,399 @@
1
+ require 'python/pickle/protocol'
2
+ require 'python/pickle/instructions/mark'
3
+ require 'python/pickle/instructions/dict'
4
+ require 'python/pickle/instructions/string'
5
+ require 'python/pickle/instructions/put'
6
+ require 'python/pickle/instructions/get'
7
+ require 'python/pickle/instructions/float'
8
+ require 'python/pickle/instructions/int'
9
+ require 'python/pickle/instructions/long'
10
+ require 'python/pickle/instructions/set_item'
11
+ require 'python/pickle/instructions/tuple'
12
+ require 'python/pickle/instructions/list'
13
+ require 'python/pickle/instructions/none'
14
+ require 'python/pickle/instructions/append'
15
+ require 'python/pickle/instructions/global'
16
+ require 'python/pickle/instructions/reduce'
17
+ require 'python/pickle/instructions/build'
18
+ require 'python/pickle/instructions/pop'
19
+ require 'python/pickle/instructions/pop_mark'
20
+ require 'python/pickle/instructions/dup'
21
+ require 'python/pickle/instructions/stop'
22
+ require 'python/pickle/exceptions'
23
+
24
+ require 'set'
25
+
26
+ module Python
27
+ module Pickle
28
+ #
29
+ # Implements reading and writing of Python Pickle protocol 0.
30
+ #
31
+ # @api private
32
+ #
33
+ class Protocol0 < Protocol
34
+
35
+ # Opcodes for Pickle protocol version 0.
36
+ #
37
+ # @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
38
+ OPCODES = Set[
39
+ 40, # MARK
40
+ 46, # STOP
41
+ 48, # POP
42
+ 49, # POP_MARK
43
+ 50, # DUP
44
+ 70, # FLOAT
45
+ 73, # INT
46
+ 76, # LONG
47
+ 78, # NONE
48
+ 82, # REDUCE
49
+ 83, # STRING
50
+ 86, # UNICODE
51
+ 97, # APPEND
52
+ 98, # BUILD
53
+ 99, # GLOBAL
54
+ 100, # DICT
55
+ 103, # GET
56
+ 108, # LIST
57
+ 112, # PUT
58
+ 115, # SETITEM
59
+ 116 # TUPLE
60
+ ]
61
+
62
+ #
63
+ # Reads an instruction from the pickle stream.
64
+ #
65
+ # @return [Instruction]
66
+ # The decoded instruction.
67
+ #
68
+ # @raise [InvalidFormat]
69
+ # The pickle stream could not be parsed.
70
+ #
71
+ def read_instruction
72
+ case (opcode = @io.getbyte)
73
+ when 40 # MARK
74
+ Instructions::MARK
75
+ when 46 # STOP
76
+ Instructions::STOP
77
+ when 48 # POP
78
+ Instructions::POP
79
+ when 49 # POP_MARK
80
+ Instructions::POP_MARK
81
+ when 50 # DUP
82
+ Instructions::DUP
83
+ when 70 # FLOAT
84
+ Instructions::Float.new(read_float)
85
+ when 73 # INT
86
+ Instructions::Int.new(read_int)
87
+ when 76 # LONG
88
+ Instructions::Long.new(read_long)
89
+ when 78 # NONE
90
+ Instructions::NONE
91
+ when 82 # REDUCE
92
+ Instructions::REDUCE
93
+ when 83 # STRING
94
+ Instructions::String.new(read_string)
95
+ when 86 # UNICODE
96
+ Instructions::String.new(read_unicode_string)
97
+ when 97 # APPEND
98
+ Instructions::APPEND
99
+ when 98 # BUILD
100
+ Instructions::BUILD
101
+ when 99 # GLOBAL
102
+ Instructions::Global.new(read_nl_string,read_nl_string)
103
+ when 100 # DICT
104
+ Instructions::DICT
105
+ when 103 # GET
106
+ Instructions::Get.new(read_int)
107
+ when 108 # LIST
108
+ Instructions::LIST
109
+ when 112 # PUT
110
+ Instructions::Put.new(read_int)
111
+ when 115 # SETITEM
112
+ Instructions::SETITEM
113
+ when 116 # TUPLE
114
+ Instructions::TUPLE
115
+ else
116
+ raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 0")
117
+ end
118
+ end
119
+
120
+ #
121
+ # Reads a hex number from the pickle stream.
122
+ #
123
+ # @param [Integer] digits
124
+ # The number of digits to read.
125
+ #
126
+ # @return [String]
127
+ # The decoded raw character.
128
+ #
129
+ def read_hex_escaped_char
130
+ string = @io.read(2)
131
+
132
+ unless string =~ /\A[0-9a-fA-F]{2}\z/
133
+ bad_hex = string.inspect[1..-2]
134
+
135
+ raise(InvalidFormat,"invalid hex escape character: \"\\x#{bad_hex}\"")
136
+ end
137
+
138
+ return string.to_i(16).chr
139
+ end
140
+
141
+ #
142
+ # Reads an escaped character from the pickle stream.
143
+ #
144
+ # @return [String]
145
+ # The unescaped raw character.
146
+ #
147
+ def read_escaped_char
148
+ case (letter = @io.getc)
149
+ when 'x' then read_hex_escaped_char
150
+ when 't' then "\t"
151
+ when 'n' then "\n"
152
+ when 'r' then "\r"
153
+ when '\\' then '\\'
154
+ when "'" then "'"
155
+ else
156
+ bad_escape = letter.inspect[1..-2]
157
+
158
+ raise(InvalidFormat,"invalid backslash escape character: \"\\#{bad_escape}\"")
159
+ end
160
+ end
161
+
162
+ #
163
+ # Reads a newline terminated string from the pickle string.
164
+ #
165
+ # @return [String]
166
+ # The read string.
167
+ #
168
+ # @raise [InvalidFormat]
169
+ # Encountered a premature end of the stream.
170
+ #
171
+ def read_nl_string
172
+ new_string = String.new
173
+
174
+ until @io.eof?
175
+ case (char = @io.getc)
176
+ when "\n"
177
+ return new_string
178
+ else
179
+ new_string << char
180
+ end
181
+ end
182
+
183
+ raise(InvalidFormat,"unexpected end of stream after the end of a newline terminated string")
184
+ end
185
+
186
+ #
187
+ # Reads an ASCII string from the pickle stream.
188
+ #
189
+ # @return [String]
190
+ # The decoded raw string.
191
+ #
192
+ def read_string
193
+ new_string = String.new(encoding: Encoding::ASCII_8BIT)
194
+
195
+ unless @io.getc == "'"
196
+ raise(InvalidFormat,"cannot find beginning single-quote of string")
197
+ end
198
+
199
+ until @io.eof?
200
+ case (char = @io.getc)
201
+ when "\\"
202
+ new_string << read_escaped_char
203
+ when "'" # end-of-string
204
+ break
205
+ else
206
+ new_string << char
207
+ end
208
+ end
209
+
210
+ newline = @io.getc
211
+
212
+ if newline == nil
213
+ raise(InvalidFormat,"unexpected end of stream after the end of a single-quoted string")
214
+ elsif newline != "\n"
215
+ raise(InvalidFormat,"expected a '\\n' character following the string, but was #{newline.inspect}")
216
+ end
217
+
218
+ return new_string
219
+ end
220
+
221
+ #
222
+ # Reads a short unicode escaped character.
223
+ #
224
+ # @return [String]
225
+ # The decoded UTF-8 character.
226
+ #
227
+ # @raise [InvalidFormat]
228
+ # The unicode escaped character was invalid.
229
+ #
230
+ def read_unicode_escaped_char16
231
+ string = @io.read(4)
232
+
233
+ unless string =~ /\A[0-9a-fA-F]{4}\z/
234
+ bad_unicode = string.inspect[1..-2]
235
+
236
+ raise(InvalidFormat,"invalid unicode escape character: \"\\u#{bad_unicode}\"")
237
+ end
238
+
239
+ return string.to_i(16).chr(Encoding::UTF_8)
240
+ end
241
+
242
+ #
243
+ # Reads a long unicode escaped character.
244
+ #
245
+ # @return [String]
246
+ # The decoded UTF-8 character.
247
+ #
248
+ # @raise [InvalidFormat]
249
+ # The unicode escaped character was invalid.
250
+ #
251
+ def read_unicode_escaped_char32
252
+ string = @io.read(8)
253
+
254
+ unless string =~ /\A[0-9a-fA-F]{8}\z/
255
+ bad_unicode = string.inspect[1..-2]
256
+
257
+ raise(InvalidFormat,"invalid unicode escape character: \"\\U#{bad_unicode}\"")
258
+ end
259
+
260
+ return string.to_i(16).chr(Encoding::UTF_8)
261
+ end
262
+
263
+ #
264
+ # Reads a unicode escaped character from the pickle stream.
265
+ #
266
+ # @return [String]
267
+ # The unescaped raw unicode character.
268
+ #
269
+ def read_unicode_escaped_char
270
+ case (letter = @io.getc)
271
+ when 'x' then read_hex_escaped_char
272
+ when 'u' then read_unicode_escaped_char16
273
+ when 'U' then read_unicode_escaped_char32
274
+ when "\\" then "\\"
275
+ else
276
+ bad_escape = letter.inspect[1..-2]
277
+
278
+ raise(InvalidFormat,"invalid unicode escape character: \"\\#{bad_escape}\"")
279
+ end
280
+ end
281
+
282
+ #
283
+ # Reads a unicode String from the pickle stream.
284
+ #
285
+ # @return [String]
286
+ # The decoded raw unicode String.
287
+ #
288
+ def read_unicode_string
289
+ new_string = String.new(encoding: Encoding::UTF_8)
290
+
291
+ until @io.eof?
292
+ case (char = @io.getc)
293
+ when "\\" # backslash escaped character
294
+ new_string << read_unicode_escaped_char
295
+ when "\n" # end-of-string
296
+ return new_string
297
+ else
298
+ new_string << char
299
+ end
300
+ end
301
+
302
+ raise(InvalidFormat,"unexpected end of stream while parsing unicode string: #{new_string.inspect}")
303
+ end
304
+
305
+ #
306
+ # Reads a floating-point decimal from the pickle stream.
307
+ #
308
+ # @return [Float]
309
+ # The decoded float.
310
+ #
311
+ # @raise [InvalidFormat]
312
+ # Encountered a non-numeric character or a premature end of the stream.
313
+ #
314
+ def read_float
315
+ new_string = String.new
316
+
317
+ until @io.eof?
318
+ case (char = @io.getc)
319
+ when /[0-9\.]/
320
+ new_string << char
321
+ when "\n" # end-of-float
322
+ return new_string.to_f
323
+ else
324
+ raise(InvalidFormat,"encountered a non-numeric character while reading a float: #{char.inspect}")
325
+ end
326
+ end
327
+
328
+ raise(InvalidFormat,"unexpected end of stream while parsing a float: #{new_string.inspect}")
329
+ end
330
+
331
+ #
332
+ # Reads an integer from the pickle stream.
333
+ #
334
+ # @return [Integer, true, false]
335
+ # The decoded Integer.
336
+ # If the integer is `00`, then `false` will be returned.
337
+ # If the integer is `01`, then `true` will be returned.
338
+ #
339
+ # @raise [InvalidFormat]
340
+ # Encountered a non-numeric character or a premature end of the stream.
341
+ #
342
+ def read_int
343
+ new_string = String.new
344
+
345
+ until @io.eof?
346
+ case (char = @io.getc)
347
+ when /[0-9]/
348
+ new_string << char
349
+ when "\n" # end-of-integer
350
+ return case new_string
351
+ when '00' then false
352
+ when '01' then true
353
+ else new_string.to_i
354
+ end
355
+ else
356
+ raise(InvalidFormat,"encountered a non-numeric character while reading an integer: #{char.inspect}")
357
+ end
358
+ end
359
+
360
+ raise(InvalidFormat,"unexpected end of stream while parsing an integer: #{new_string.inspect}")
361
+ end
362
+
363
+ #
364
+ # Reads a long integer.
365
+ #
366
+ # @return [Integer]
367
+ # The decoded Integer.
368
+ #
369
+ # @raise [InvalidFormat]
370
+ # Encountered a non-numeric character or a premature end of the stream.
371
+ #
372
+ def read_long
373
+ new_string = String.new
374
+
375
+ until @io.eof?
376
+ case (char = @io.getc)
377
+ when /[0-9]/
378
+ new_string << char
379
+ when 'L'
380
+ newline = @io.getc
381
+
382
+ if newline == nil
383
+ raise(InvalidFormat,"unexpected end of stream after the end of an integer")
384
+ elsif newline != "\n"
385
+ raise(InvalidFormat,"expected a '\\n' character following the integer, but was #{newline.inspect}")
386
+ end
387
+
388
+ return new_string.to_i
389
+ else
390
+ raise(InvalidFormat,"encountered a non-numeric character while reading a long integer: #{char.inspect}")
391
+ end
392
+ end
393
+
394
+ raise(InvalidFormat,"unexpected end of stream while parsing a long integer: #{new_string.inspect}")
395
+ end
396
+
397
+ end
398
+ end
399
+ end
@@ -0,0 +1,183 @@
1
+ require 'python/pickle/protocol0'
2
+ require 'python/pickle/instructions/mark'
3
+ require 'python/pickle/instructions/empty_tuple'
4
+ require 'python/pickle/instructions/stop'
5
+ require 'python/pickle/instructions/bin_float'
6
+ require 'python/pickle/instructions/bin_int1'
7
+ require 'python/pickle/instructions/int'
8
+ require 'python/pickle/instructions/long'
9
+ require 'python/pickle/instructions/none'
10
+ require 'python/pickle/instructions/reduce'
11
+ require 'python/pickle/instructions/bin_string'
12
+ require 'python/pickle/instructions/short_bin_string'
13
+ require 'python/pickle/instructions/bin_unicode'
14
+ require 'python/pickle/instructions/global'
15
+ require 'python/pickle/instructions/empty_list'
16
+ require 'python/pickle/instructions/append'
17
+ require 'python/pickle/instructions/bin_get'
18
+ require 'python/pickle/instructions/long_bin_get'
19
+ require 'python/pickle/instructions/bin_put'
20
+ require 'python/pickle/instructions/build'
21
+ require 'python/pickle/instructions/appends'
22
+ require 'python/pickle/instructions/set_item'
23
+ require 'python/pickle/instructions/set_items'
24
+ require 'python/pickle/instructions/tuple'
25
+ require 'python/pickle/instructions/empty_dict'
26
+
27
+ module Python
28
+ module Pickle
29
+ #
30
+ # Implements reading and writing of Python Pickle protocol 1.
31
+ #
32
+ # @api private
33
+ #
34
+ class Protocol1 < Protocol0
35
+
36
+ # Opcodes for Pickle protocol version 1.
37
+ #
38
+ # @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
39
+ OPCODES = Protocol0::OPCODES + Set[
40
+ 41, # EMPTY_TUPLE
41
+ 71, # BINFLOAT
42
+ 75, # BININT1
43
+ 84, # BINSTRING
44
+ 85, # SHORT_BINSTRING
45
+ 88, # BINUNICODE
46
+ 93, # EMPTY_LIST
47
+ 101, # APPENDS
48
+ 113, # BINPUT
49
+ 117, # SETITEMS
50
+ 125 # EMPTY_DICT
51
+ ]
52
+
53
+ #
54
+ # Reads an instruction from the pickle stream.
55
+ #
56
+ # @return [Instruction]
57
+ # The decoded instruction.
58
+ #
59
+ # @raise [InvalidFormat]
60
+ # The pickle stream could not be parsed.
61
+ #
62
+ def read_instruction
63
+ case (opcode = @io.getbyte)
64
+ #
65
+ # Protocol 0 instructions
66
+ #
67
+ when 40 # MARK
68
+ Instructions::MARK
69
+ when 46 # STOP
70
+ Instructions::STOP
71
+ when 48 # POP
72
+ Instructions::POP
73
+ when 49 # POP_MARK
74
+ Instructions::POP_MARK
75
+ when 50 # DUP
76
+ Instructions::DUP
77
+ when 70 # FLOAT
78
+ Instructions::Float.new(read_float)
79
+ when 73 # INT
80
+ Instructions::Int.new(read_int)
81
+ when 76 # LONG
82
+ Instructions::Long.new(read_long)
83
+ when 78 # NONE
84
+ Instructions::NONE
85
+ when 82 # REDUCE
86
+ Instructions::REDUCE
87
+ when 83 # STRING
88
+ Instructions::String.new(read_string)
89
+ when 86 # UNICODE
90
+ Instructions::String.new(read_unicode_string)
91
+ when 97 # APPEND
92
+ Instructions::APPEND
93
+ when 98 # BUILD
94
+ Instructions::BUILD
95
+ when 99 # GLOBAL
96
+ Instructions::Global.new(read_nl_string,read_nl_string)
97
+ when 100 # DICT
98
+ Instructions::DICT
99
+ when 103 # GET
100
+ Instructions::Get.new(read_int)
101
+ when 108 # LIST
102
+ Instructions::LIST
103
+ when 112 # PUT
104
+ Instructions::Put.new(read_int)
105
+ when 115 # SETITEM
106
+ Instructions::SETITEM
107
+ when 116 # TUPLE
108
+ Instructions::TUPLE
109
+ #
110
+ # Protocol 1 instructions
111
+ #
112
+ when 41 # EMPTY_TUPLE
113
+ Instructions::EMPTY_TUPLE
114
+ when 71 # BINFLOAT
115
+ Instructions::BinFloat.new(read_float64_be)
116
+ when 75 # BININT1
117
+ Instructions::BinInt1.new(read_uint8)
118
+ when 84 # BINSTRING
119
+ length = read_uint32_le
120
+ string = @io.read(length)
121
+
122
+ Instructions::BinString.new(length,string)
123
+ when 85 # SHORT_BINSTRING
124
+ length = read_uint8
125
+ string = @io.read(length)
126
+
127
+ Instructions::ShortBinString.new(length,string)
128
+ when 88 # BINUNICODE
129
+ length = read_uint32_le
130
+ string = @io.read(length).force_encoding(Encoding::UTF_8)
131
+
132
+ Instructions::BinUnicode.new(length,string)
133
+ when 93 # EMPTY_LIST
134
+ Instructions::EMPTY_LIST
135
+ when 101 # APPENDS
136
+ Instructions::APPENDS
137
+ when 104 # BINGET
138
+ Instructions::BinGet.new(read_uint8)
139
+ when 106 # LONG_BINGET
140
+ Instructions::LongBinGet.new(read_uint32_le)
141
+ when 113 # BINPUT
142
+ Instructions::BinPut.new(read_uint8)
143
+ when 117 # SETITEMS
144
+ Instructions::SETITEMS
145
+ when 125 # EMPTY_DICT
146
+ Instructions::EMPTY_DICT
147
+ else
148
+ raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 1")
149
+ end
150
+ end
151
+
152
+ #
153
+ # Reads a double precision (64bit) floating point number, in network
154
+ # byte-order (big-endian).
155
+ #
156
+ # @return [Float]
157
+ # The decoded float.
158
+ #
159
+ def read_float64_be
160
+ @io.read(8).unpack1('G')
161
+ end
162
+
163
+ #
164
+ # Reads a single 8bit unsigned integer (byte).
165
+ #
166
+ # @return [Integer]
167
+ #
168
+ def read_uint8
169
+ @io.getbyte
170
+ end
171
+
172
+ #
173
+ # Reads an unsigned 32bit integer, in little-endian byte-order.
174
+ #
175
+ # @return [Integer]
176
+ #
177
+ def read_uint32_le
178
+ @io.read(4).unpack1('L<')
179
+ end
180
+
181
+ end
182
+ end
183
+ end