python-pickle 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.document +3 -0
  3. data/.github/workflows/ruby.yml +27 -0
  4. data/.gitignore +5 -0
  5. data/.rspec +1 -0
  6. data/.yardopts +1 -0
  7. data/ChangeLog.md +14 -0
  8. data/Gemfile +15 -0
  9. data/LICENSE.txt +20 -0
  10. data/README.md +149 -0
  11. data/Rakefile +13 -0
  12. data/gemspec.yml +25 -0
  13. data/lib/python/pickle/byte_array.rb +40 -0
  14. data/lib/python/pickle/deserializer.rb +595 -0
  15. data/lib/python/pickle/exceptions.rb +12 -0
  16. data/lib/python/pickle/instruction.rb +52 -0
  17. data/lib/python/pickle/instructions/add_items.rb +26 -0
  18. data/lib/python/pickle/instructions/append.rb +24 -0
  19. data/lib/python/pickle/instructions/appends.rb +26 -0
  20. data/lib/python/pickle/instructions/bin_bytes.rb +32 -0
  21. data/lib/python/pickle/instructions/bin_bytes8.rb +32 -0
  22. data/lib/python/pickle/instructions/bin_float.rb +29 -0
  23. data/lib/python/pickle/instructions/bin_get.rb +27 -0
  24. data/lib/python/pickle/instructions/bin_int1.rb +29 -0
  25. data/lib/python/pickle/instructions/bin_put.rb +29 -0
  26. data/lib/python/pickle/instructions/bin_string.rb +32 -0
  27. data/lib/python/pickle/instructions/bin_unicode.rb +32 -0
  28. data/lib/python/pickle/instructions/bin_unicode8.rb +32 -0
  29. data/lib/python/pickle/instructions/build.rb +24 -0
  30. data/lib/python/pickle/instructions/byte_array8.rb +32 -0
  31. data/lib/python/pickle/instructions/dict.rb +17 -0
  32. data/lib/python/pickle/instructions/dup.rb +24 -0
  33. data/lib/python/pickle/instructions/empty_dict.rb +26 -0
  34. data/lib/python/pickle/instructions/empty_list.rb +26 -0
  35. data/lib/python/pickle/instructions/empty_set.rb +26 -0
  36. data/lib/python/pickle/instructions/empty_tuple.rb +26 -0
  37. data/lib/python/pickle/instructions/ext1.rb +29 -0
  38. data/lib/python/pickle/instructions/ext2.rb +29 -0
  39. data/lib/python/pickle/instructions/ext4.rb +29 -0
  40. data/lib/python/pickle/instructions/float.rb +24 -0
  41. data/lib/python/pickle/instructions/frame.rb +29 -0
  42. data/lib/python/pickle/instructions/frozen_set.rb +26 -0
  43. data/lib/python/pickle/instructions/get.rb +27 -0
  44. data/lib/python/pickle/instructions/global.rb +62 -0
  45. data/lib/python/pickle/instructions/has_length_and_value.rb +58 -0
  46. data/lib/python/pickle/instructions/has_value.rb +50 -0
  47. data/lib/python/pickle/instructions/int.rb +24 -0
  48. data/lib/python/pickle/instructions/list.rb +24 -0
  49. data/lib/python/pickle/instructions/long.rb +24 -0
  50. data/lib/python/pickle/instructions/long1.rb +32 -0
  51. data/lib/python/pickle/instructions/long4.rb +32 -0
  52. data/lib/python/pickle/instructions/long_bin_get.rb +27 -0
  53. data/lib/python/pickle/instructions/mark.rb +24 -0
  54. data/lib/python/pickle/instructions/memoize.rb +26 -0
  55. data/lib/python/pickle/instructions/new_false.rb +24 -0
  56. data/lib/python/pickle/instructions/new_obj.rb +26 -0
  57. data/lib/python/pickle/instructions/new_obj_ex.rb +26 -0
  58. data/lib/python/pickle/instructions/new_true.rb +24 -0
  59. data/lib/python/pickle/instructions/next_buffer.rb +26 -0
  60. data/lib/python/pickle/instructions/none.rb +24 -0
  61. data/lib/python/pickle/instructions/pop.rb +24 -0
  62. data/lib/python/pickle/instructions/pop_mark.rb +24 -0
  63. data/lib/python/pickle/instructions/proto.rb +29 -0
  64. data/lib/python/pickle/instructions/put.rb +24 -0
  65. data/lib/python/pickle/instructions/readonly_buffer.rb +26 -0
  66. data/lib/python/pickle/instructions/reduce.rb +24 -0
  67. data/lib/python/pickle/instructions/set_item.rb +24 -0
  68. data/lib/python/pickle/instructions/set_items.rb +26 -0
  69. data/lib/python/pickle/instructions/short_bin_bytes.rb +32 -0
  70. data/lib/python/pickle/instructions/short_bin_string.rb +32 -0
  71. data/lib/python/pickle/instructions/short_bin_unicode.rb +32 -0
  72. data/lib/python/pickle/instructions/stack_global.rb +26 -0
  73. data/lib/python/pickle/instructions/stop.rb +24 -0
  74. data/lib/python/pickle/instructions/string.rb +24 -0
  75. data/lib/python/pickle/instructions/tuple.rb +24 -0
  76. data/lib/python/pickle/instructions/tuple1.rb +24 -0
  77. data/lib/python/pickle/instructions/tuple2.rb +24 -0
  78. data/lib/python/pickle/instructions/tuple3.rb +24 -0
  79. data/lib/python/pickle/protocol.rb +56 -0
  80. data/lib/python/pickle/protocol0.rb +399 -0
  81. data/lib/python/pickle/protocol1.rb +183 -0
  82. data/lib/python/pickle/protocol2.rb +229 -0
  83. data/lib/python/pickle/protocol3.rb +163 -0
  84. data/lib/python/pickle/protocol4.rb +285 -0
  85. data/lib/python/pickle/protocol5.rb +218 -0
  86. data/lib/python/pickle/py_class.rb +75 -0
  87. data/lib/python/pickle/py_object.rb +141 -0
  88. data/lib/python/pickle/tuple.rb +19 -0
  89. data/lib/python/pickle/version.rb +6 -0
  90. data/lib/python/pickle.rb +226 -0
  91. data/python-pickle.gemspec +62 -0
  92. data/spec/byte_array_spec.rb +54 -0
  93. data/spec/deserializer_spec.rb +1201 -0
  94. data/spec/fixtures/ascii_str_v3.pkl +0 -0
  95. data/spec/fixtures/ascii_str_v4.pkl +0 -0
  96. data/spec/fixtures/ascii_str_v5.pkl +0 -0
  97. data/spec/fixtures/bin_str_v0.pkl +3 -0
  98. data/spec/fixtures/bin_str_v1.pkl +0 -0
  99. data/spec/fixtures/bin_str_v2.pkl +0 -0
  100. data/spec/fixtures/bin_str_v3.pkl +0 -0
  101. data/spec/fixtures/bin_str_v4.pkl +0 -0
  102. data/spec/fixtures/bin_str_v5.pkl +0 -0
  103. data/spec/fixtures/bytearray_v0.pkl +10 -0
  104. data/spec/fixtures/bytearray_v1.pkl +0 -0
  105. data/spec/fixtures/bytearray_v2.pkl +0 -0
  106. data/spec/fixtures/bytearray_v3.pkl +0 -0
  107. data/spec/fixtures/bytearray_v4.pkl +0 -0
  108. data/spec/fixtures/bytearray_v5.pkl +0 -0
  109. data/spec/fixtures/class_v0.pkl +4 -0
  110. data/spec/fixtures/class_v1.pkl +0 -0
  111. data/spec/fixtures/class_v2.pkl +0 -0
  112. data/spec/fixtures/class_v3.pkl +0 -0
  113. data/spec/fixtures/class_v4.pkl +0 -0
  114. data/spec/fixtures/class_v5.pkl +0 -0
  115. data/spec/fixtures/dict_v0.pkl +6 -0
  116. data/spec/fixtures/dict_v1.pkl +0 -0
  117. data/spec/fixtures/dict_v2.pkl +0 -0
  118. data/spec/fixtures/dict_v3.pkl +0 -0
  119. data/spec/fixtures/dict_v4.pkl +0 -0
  120. data/spec/fixtures/dict_v5.pkl +0 -0
  121. data/spec/fixtures/escaped_str_v0.pkl +3 -0
  122. data/spec/fixtures/escaped_str_v1.pkl +0 -0
  123. data/spec/fixtures/escaped_str_v2.pkl +0 -0
  124. data/spec/fixtures/false_v0.pkl +2 -0
  125. data/spec/fixtures/false_v1.pkl +2 -0
  126. data/spec/fixtures/false_v2.pkl +1 -0
  127. data/spec/fixtures/false_v3.pkl +1 -0
  128. data/spec/fixtures/false_v4.pkl +1 -0
  129. data/spec/fixtures/false_v5.pkl +1 -0
  130. data/spec/fixtures/float_v0.pkl +2 -0
  131. data/spec/fixtures/float_v1.pkl +1 -0
  132. data/spec/fixtures/float_v2.pkl +1 -0
  133. data/spec/fixtures/float_v3.pkl +1 -0
  134. data/spec/fixtures/float_v4.pkl +0 -0
  135. data/spec/fixtures/float_v5.pkl +0 -0
  136. data/spec/fixtures/function_v0.pkl +4 -0
  137. data/spec/fixtures/function_v1.pkl +0 -0
  138. data/spec/fixtures/function_v2.pkl +0 -0
  139. data/spec/fixtures/function_v3.pkl +0 -0
  140. data/spec/fixtures/function_v4.pkl +0 -0
  141. data/spec/fixtures/function_v5.pkl +0 -0
  142. data/spec/fixtures/hex_str_v0.pkl +3 -0
  143. data/spec/fixtures/hex_str_v1.pkl +0 -0
  144. data/spec/fixtures/hex_str_v2.pkl +0 -0
  145. data/spec/fixtures/int_v0.pkl +2 -0
  146. data/spec/fixtures/int_v1.pkl +1 -0
  147. data/spec/fixtures/int_v2.pkl +1 -0
  148. data/spec/fixtures/int_v3.pkl +1 -0
  149. data/spec/fixtures/int_v4.pkl +1 -0
  150. data/spec/fixtures/int_v5.pkl +1 -0
  151. data/spec/fixtures/list_v0.pkl +7 -0
  152. data/spec/fixtures/list_v1.pkl +0 -0
  153. data/spec/fixtures/list_v2.pkl +0 -0
  154. data/spec/fixtures/list_v3.pkl +0 -0
  155. data/spec/fixtures/list_v4.pkl +0 -0
  156. data/spec/fixtures/list_v5.pkl +0 -0
  157. data/spec/fixtures/long_v0.pkl +2 -0
  158. data/spec/fixtures/long_v1.pkl +2 -0
  159. data/spec/fixtures/long_v2.pkl +0 -0
  160. data/spec/fixtures/long_v3.pkl +0 -0
  161. data/spec/fixtures/long_v4.pkl +0 -0
  162. data/spec/fixtures/long_v5.pkl +0 -0
  163. data/spec/fixtures/nested_dict_v0.pkl +12 -0
  164. data/spec/fixtures/nested_dict_v1.pkl +0 -0
  165. data/spec/fixtures/nested_dict_v2.pkl +0 -0
  166. data/spec/fixtures/nested_dict_v3.pkl +0 -0
  167. data/spec/fixtures/nested_dict_v4.pkl +0 -0
  168. data/spec/fixtures/nested_dict_v5.pkl +0 -0
  169. data/spec/fixtures/nested_list_v0.pkl +9 -0
  170. data/spec/fixtures/nested_list_v1.pkl +0 -0
  171. data/spec/fixtures/nested_list_v2.pkl +0 -0
  172. data/spec/fixtures/nested_list_v3.pkl +0 -0
  173. data/spec/fixtures/nested_list_v4.pkl +0 -0
  174. data/spec/fixtures/nested_list_v5.pkl +0 -0
  175. data/spec/fixtures/none_v0.pkl +1 -0
  176. data/spec/fixtures/none_v1.pkl +1 -0
  177. data/spec/fixtures/none_v2.pkl +1 -0
  178. data/spec/fixtures/none_v3.pkl +1 -0
  179. data/spec/fixtures/none_v4.pkl +1 -0
  180. data/spec/fixtures/none_v5.pkl +1 -0
  181. data/spec/fixtures/object_v0.pkl +19 -0
  182. data/spec/fixtures/object_v1.pkl +0 -0
  183. data/spec/fixtures/object_v2.pkl +0 -0
  184. data/spec/fixtures/object_v3.pkl +0 -0
  185. data/spec/fixtures/object_v4.pkl +0 -0
  186. data/spec/fixtures/object_v5.pkl +0 -0
  187. data/spec/fixtures/str_v0.pkl +3 -0
  188. data/spec/fixtures/str_v1.pkl +0 -0
  189. data/spec/fixtures/str_v2.pkl +0 -0
  190. data/spec/fixtures/str_v3.pkl +0 -0
  191. data/spec/fixtures/str_v4.pkl +0 -0
  192. data/spec/fixtures/str_v5.pkl +0 -0
  193. data/spec/fixtures/true_v0.pkl +2 -0
  194. data/spec/fixtures/true_v1.pkl +2 -0
  195. data/spec/fixtures/true_v2.pkl +1 -0
  196. data/spec/fixtures/true_v3.pkl +1 -0
  197. data/spec/fixtures/true_v4.pkl +1 -0
  198. data/spec/fixtures/true_v5.pkl +1 -0
  199. data/spec/fixtures/unicode_str_v0.pkl +3 -0
  200. data/spec/fixtures/unicode_str_v1.pkl +0 -0
  201. data/spec/fixtures/unicode_str_v2.pkl +0 -0
  202. data/spec/fixtures/unicode_str_v3.pkl +0 -0
  203. data/spec/fixtures/unicode_str_v4.pkl +0 -0
  204. data/spec/fixtures/unicode_str_v5.pkl +0 -0
  205. data/spec/generate_pickles2.py +41 -0
  206. data/spec/generate_pickles3.py +40 -0
  207. data/spec/integration/load/protocol0_spec.rb +258 -0
  208. data/spec/integration/load/protocol1_spec.rb +258 -0
  209. data/spec/integration/load/protocol2_spec.rb +258 -0
  210. data/spec/integration/load/protocol3_spec.rb +258 -0
  211. data/spec/integration/load/protocol4_spec.rb +258 -0
  212. data/spec/integration/load/protocol5_spec.rb +258 -0
  213. data/spec/integration/parse/protocol0_spec.rb +467 -0
  214. data/spec/integration/parse/protocol1_spec.rb +459 -0
  215. data/spec/integration/parse/protocol2_spec.rb +471 -0
  216. data/spec/integration/parse/protocol3_spec.rb +407 -0
  217. data/spec/integration/parse/protocol4_spec.rb +439 -0
  218. data/spec/integration/parse/protocol5_spec.rb +419 -0
  219. data/spec/pickle_spec.rb +163 -0
  220. data/spec/protocol0_read_instruction_examples.rb +211 -0
  221. data/spec/protocol0_spec.rb +445 -0
  222. data/spec/protocol1_read_instruction_examples.rb +156 -0
  223. data/spec/protocol1_spec.rb +59 -0
  224. data/spec/protocol2_read_instruction_examples.rb +135 -0
  225. data/spec/protocol2_spec.rb +128 -0
  226. data/spec/protocol3_read_instruction_examples.rb +29 -0
  227. data/spec/protocol3_spec.rb +32 -0
  228. data/spec/protocol4_read_instruction_examples.rb +142 -0
  229. data/spec/protocol4_spec.rb +58 -0
  230. data/spec/protocol5_spec.rb +68 -0
  231. data/spec/py_class_spec.rb +62 -0
  232. data/spec/py_object_spec.rb +149 -0
  233. data/spec/spec_helper.rb +3 -0
  234. data/spec/tuple_spec.rb +18 -0
  235. metadata +325 -0
@@ -0,0 +1,399 @@
1
+ require 'python/pickle/protocol'
2
+ require 'python/pickle/instructions/mark'
3
+ require 'python/pickle/instructions/dict'
4
+ require 'python/pickle/instructions/string'
5
+ require 'python/pickle/instructions/put'
6
+ require 'python/pickle/instructions/get'
7
+ require 'python/pickle/instructions/float'
8
+ require 'python/pickle/instructions/int'
9
+ require 'python/pickle/instructions/long'
10
+ require 'python/pickle/instructions/set_item'
11
+ require 'python/pickle/instructions/tuple'
12
+ require 'python/pickle/instructions/list'
13
+ require 'python/pickle/instructions/none'
14
+ require 'python/pickle/instructions/append'
15
+ require 'python/pickle/instructions/global'
16
+ require 'python/pickle/instructions/reduce'
17
+ require 'python/pickle/instructions/build'
18
+ require 'python/pickle/instructions/pop'
19
+ require 'python/pickle/instructions/pop_mark'
20
+ require 'python/pickle/instructions/dup'
21
+ require 'python/pickle/instructions/stop'
22
+ require 'python/pickle/exceptions'
23
+
24
+ require 'set'
25
+
26
+ module Python
27
+ module Pickle
28
+ #
29
+ # Implements reading and writing of Python Pickle protocol 0.
30
+ #
31
+ # @api private
32
+ #
33
+ class Protocol0 < Protocol
34
+
35
+ # Opcodes for Pickle protocol version 0.
36
+ #
37
+ # @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
38
+ OPCODES = Set[
39
+ 40, # MARK
40
+ 46, # STOP
41
+ 48, # POP
42
+ 49, # POP_MARK
43
+ 50, # DUP
44
+ 70, # FLOAT
45
+ 73, # INT
46
+ 76, # LONG
47
+ 78, # NONE
48
+ 82, # REDUCE
49
+ 83, # STRING
50
+ 86, # UNICODE
51
+ 97, # APPEND
52
+ 98, # BUILD
53
+ 99, # GLOBAL
54
+ 100, # DICT
55
+ 103, # GET
56
+ 108, # LIST
57
+ 112, # PUT
58
+ 115, # SETITEM
59
+ 116 # TUPLE
60
+ ]
61
+
62
+ #
63
+ # Reads an instruction from the pickle stream.
64
+ #
65
+ # @return [Instruction]
66
+ # The decoded instruction.
67
+ #
68
+ # @raise [InvalidFormat]
69
+ # The pickle stream could not be parsed.
70
+ #
71
+ def read_instruction
72
+ case (opcode = @io.getbyte)
73
+ when 40 # MARK
74
+ Instructions::MARK
75
+ when 46 # STOP
76
+ Instructions::STOP
77
+ when 48 # POP
78
+ Instructions::POP
79
+ when 49 # POP_MARK
80
+ Instructions::POP_MARK
81
+ when 50 # DUP
82
+ Instructions::DUP
83
+ when 70 # FLOAT
84
+ Instructions::Float.new(read_float)
85
+ when 73 # INT
86
+ Instructions::Int.new(read_int)
87
+ when 76 # LONG
88
+ Instructions::Long.new(read_long)
89
+ when 78 # NONE
90
+ Instructions::NONE
91
+ when 82 # REDUCE
92
+ Instructions::REDUCE
93
+ when 83 # STRING
94
+ Instructions::String.new(read_string)
95
+ when 86 # UNICODE
96
+ Instructions::String.new(read_unicode_string)
97
+ when 97 # APPEND
98
+ Instructions::APPEND
99
+ when 98 # BUILD
100
+ Instructions::BUILD
101
+ when 99 # GLOBAL
102
+ Instructions::Global.new(read_nl_string,read_nl_string)
103
+ when 100 # DICT
104
+ Instructions::DICT
105
+ when 103 # GET
106
+ Instructions::Get.new(read_int)
107
+ when 108 # LIST
108
+ Instructions::LIST
109
+ when 112 # PUT
110
+ Instructions::Put.new(read_int)
111
+ when 115 # SETITEM
112
+ Instructions::SETITEM
113
+ when 116 # TUPLE
114
+ Instructions::TUPLE
115
+ else
116
+ raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 0")
117
+ end
118
+ end
119
+
120
+ #
121
+ # Reads a hex number from the pickle stream.
122
+ #
123
+ # @param [Integer] digits
124
+ # The number of digits to read.
125
+ #
126
+ # @return [String]
127
+ # The decoded raw character.
128
+ #
129
+ def read_hex_escaped_char
130
+ string = @io.read(2)
131
+
132
+ unless string =~ /\A[0-9a-fA-F]{2}\z/
133
+ bad_hex = string.inspect[1..-2]
134
+
135
+ raise(InvalidFormat,"invalid hex escape character: \"\\x#{bad_hex}\"")
136
+ end
137
+
138
+ return string.to_i(16).chr
139
+ end
140
+
141
+ #
142
+ # Reads an escaped character from the pickle stream.
143
+ #
144
+ # @return [String]
145
+ # The unescaped raw character.
146
+ #
147
+ def read_escaped_char
148
+ case (letter = @io.getc)
149
+ when 'x' then read_hex_escaped_char
150
+ when 't' then "\t"
151
+ when 'n' then "\n"
152
+ when 'r' then "\r"
153
+ when '\\' then '\\'
154
+ when "'" then "'"
155
+ else
156
+ bad_escape = letter.inspect[1..-2]
157
+
158
+ raise(InvalidFormat,"invalid backslash escape character: \"\\#{bad_escape}\"")
159
+ end
160
+ end
161
+
162
+ #
163
+ # Reads a newline terminated string from the pickle string.
164
+ #
165
+ # @return [String]
166
+ # The read string.
167
+ #
168
+ # @raise [InvalidFormat]
169
+ # Encountered a premature end of the stream.
170
+ #
171
+ def read_nl_string
172
+ new_string = String.new
173
+
174
+ until @io.eof?
175
+ case (char = @io.getc)
176
+ when "\n"
177
+ return new_string
178
+ else
179
+ new_string << char
180
+ end
181
+ end
182
+
183
+ raise(InvalidFormat,"unexpected end of stream after the end of a newline terminated string")
184
+ end
185
+
186
+ #
187
+ # Reads an ASCII string from the pickle stream.
188
+ #
189
+ # @return [String]
190
+ # The decoded raw string.
191
+ #
192
+ def read_string
193
+ new_string = String.new(encoding: Encoding::ASCII_8BIT)
194
+
195
+ unless @io.getc == "'"
196
+ raise(InvalidFormat,"cannot find beginning single-quote of string")
197
+ end
198
+
199
+ until @io.eof?
200
+ case (char = @io.getc)
201
+ when "\\"
202
+ new_string << read_escaped_char
203
+ when "'" # end-of-string
204
+ break
205
+ else
206
+ new_string << char
207
+ end
208
+ end
209
+
210
+ newline = @io.getc
211
+
212
+ if newline == nil
213
+ raise(InvalidFormat,"unexpected end of stream after the end of a single-quoted string")
214
+ elsif newline != "\n"
215
+ raise(InvalidFormat,"expected a '\\n' character following the string, but was #{newline.inspect}")
216
+ end
217
+
218
+ return new_string
219
+ end
220
+
221
+ #
222
+ # Reads a short unicode escaped character.
223
+ #
224
+ # @return [String]
225
+ # The decoded UTF-8 character.
226
+ #
227
+ # @raise [InvalidFormat]
228
+ # The unicode escaped character was invalid.
229
+ #
230
+ def read_unicode_escaped_char16
231
+ string = @io.read(4)
232
+
233
+ unless string =~ /\A[0-9a-fA-F]{4}\z/
234
+ bad_unicode = string.inspect[1..-2]
235
+
236
+ raise(InvalidFormat,"invalid unicode escape character: \"\\u#{bad_unicode}\"")
237
+ end
238
+
239
+ return string.to_i(16).chr(Encoding::UTF_8)
240
+ end
241
+
242
+ #
243
+ # Reads a long unicode escaped character.
244
+ #
245
+ # @return [String]
246
+ # The decoded UTF-8 character.
247
+ #
248
+ # @raise [InvalidFormat]
249
+ # The unicode escaped character was invalid.
250
+ #
251
+ def read_unicode_escaped_char32
252
+ string = @io.read(8)
253
+
254
+ unless string =~ /\A[0-9a-fA-F]{8}\z/
255
+ bad_unicode = string.inspect[1..-2]
256
+
257
+ raise(InvalidFormat,"invalid unicode escape character: \"\\U#{bad_unicode}\"")
258
+ end
259
+
260
+ return string.to_i(16).chr(Encoding::UTF_8)
261
+ end
262
+
263
+ #
264
+ # Reads a unicode escaped character from the pickle stream.
265
+ #
266
+ # @return [String]
267
+ # The unescaped raw unicode character.
268
+ #
269
+ def read_unicode_escaped_char
270
+ case (letter = @io.getc)
271
+ when 'x' then read_hex_escaped_char
272
+ when 'u' then read_unicode_escaped_char16
273
+ when 'U' then read_unicode_escaped_char32
274
+ when "\\" then "\\"
275
+ else
276
+ bad_escape = letter.inspect[1..-2]
277
+
278
+ raise(InvalidFormat,"invalid unicode escape character: \"\\#{bad_escape}\"")
279
+ end
280
+ end
281
+
282
+ #
283
+ # Reads a unicode String from the pickle stream.
284
+ #
285
+ # @return [String]
286
+ # The decoded raw unicode String.
287
+ #
288
+ def read_unicode_string
289
+ new_string = String.new(encoding: Encoding::UTF_8)
290
+
291
+ until @io.eof?
292
+ case (char = @io.getc)
293
+ when "\\" # backslash escaped character
294
+ new_string << read_unicode_escaped_char
295
+ when "\n" # end-of-string
296
+ return new_string
297
+ else
298
+ new_string << char
299
+ end
300
+ end
301
+
302
+ raise(InvalidFormat,"unexpected end of stream while parsing unicode string: #{new_string.inspect}")
303
+ end
304
+
305
+ #
306
+ # Reads a floating-point decimal from the pickle stream.
307
+ #
308
+ # @return [Float]
309
+ # The decoded float.
310
+ #
311
+ # @raise [InvalidFormat]
312
+ # Encountered a non-numeric character or a premature end of the stream.
313
+ #
314
+ def read_float
315
+ new_string = String.new
316
+
317
+ until @io.eof?
318
+ case (char = @io.getc)
319
+ when /[0-9\.]/
320
+ new_string << char
321
+ when "\n" # end-of-float
322
+ return new_string.to_f
323
+ else
324
+ raise(InvalidFormat,"encountered a non-numeric character while reading a float: #{char.inspect}")
325
+ end
326
+ end
327
+
328
+ raise(InvalidFormat,"unexpected end of stream while parsing a float: #{new_string.inspect}")
329
+ end
330
+
331
+ #
332
+ # Reads an integer from the pickle stream.
333
+ #
334
+ # @return [Integer, true, false]
335
+ # The decoded Integer.
336
+ # If the integer is `00`, then `false` will be returned.
337
+ # If the integer is `01`, then `true` will be returned.
338
+ #
339
+ # @raise [InvalidFormat]
340
+ # Encountered a non-numeric character or a premature end of the stream.
341
+ #
342
+ def read_int
343
+ new_string = String.new
344
+
345
+ until @io.eof?
346
+ case (char = @io.getc)
347
+ when /[0-9]/
348
+ new_string << char
349
+ when "\n" # end-of-integer
350
+ return case new_string
351
+ when '00' then false
352
+ when '01' then true
353
+ else new_string.to_i
354
+ end
355
+ else
356
+ raise(InvalidFormat,"encountered a non-numeric character while reading an integer: #{char.inspect}")
357
+ end
358
+ end
359
+
360
+ raise(InvalidFormat,"unexpected end of stream while parsing an integer: #{new_string.inspect}")
361
+ end
362
+
363
+ #
364
+ # Reads a long integer.
365
+ #
366
+ # @return [Integer]
367
+ # The decoded Integer.
368
+ #
369
+ # @raise [InvalidFormat]
370
+ # Encountered a non-numeric character or a premature end of the stream.
371
+ #
372
+ def read_long
373
+ new_string = String.new
374
+
375
+ until @io.eof?
376
+ case (char = @io.getc)
377
+ when /[0-9]/
378
+ new_string << char
379
+ when 'L'
380
+ newline = @io.getc
381
+
382
+ if newline == nil
383
+ raise(InvalidFormat,"unexpected end of stream after the end of an integer")
384
+ elsif newline != "\n"
385
+ raise(InvalidFormat,"expected a '\\n' character following the integer, but was #{newline.inspect}")
386
+ end
387
+
388
+ return new_string.to_i
389
+ else
390
+ raise(InvalidFormat,"encountered a non-numeric character while reading a long integer: #{char.inspect}")
391
+ end
392
+ end
393
+
394
+ raise(InvalidFormat,"unexpected end of stream while parsing a long integer: #{new_string.inspect}")
395
+ end
396
+
397
+ end
398
+ end
399
+ end
@@ -0,0 +1,183 @@
1
+ require 'python/pickle/protocol0'
2
+ require 'python/pickle/instructions/mark'
3
+ require 'python/pickle/instructions/empty_tuple'
4
+ require 'python/pickle/instructions/stop'
5
+ require 'python/pickle/instructions/bin_float'
6
+ require 'python/pickle/instructions/bin_int1'
7
+ require 'python/pickle/instructions/int'
8
+ require 'python/pickle/instructions/long'
9
+ require 'python/pickle/instructions/none'
10
+ require 'python/pickle/instructions/reduce'
11
+ require 'python/pickle/instructions/bin_string'
12
+ require 'python/pickle/instructions/short_bin_string'
13
+ require 'python/pickle/instructions/bin_unicode'
14
+ require 'python/pickle/instructions/global'
15
+ require 'python/pickle/instructions/empty_list'
16
+ require 'python/pickle/instructions/append'
17
+ require 'python/pickle/instructions/bin_get'
18
+ require 'python/pickle/instructions/long_bin_get'
19
+ require 'python/pickle/instructions/bin_put'
20
+ require 'python/pickle/instructions/build'
21
+ require 'python/pickle/instructions/appends'
22
+ require 'python/pickle/instructions/set_item'
23
+ require 'python/pickle/instructions/set_items'
24
+ require 'python/pickle/instructions/tuple'
25
+ require 'python/pickle/instructions/empty_dict'
26
+
27
+ module Python
28
+ module Pickle
29
+ #
30
+ # Implements reading and writing of Python Pickle protocol 1.
31
+ #
32
+ # @api private
33
+ #
34
+ class Protocol1 < Protocol0
35
+
36
+ # Opcodes for Pickle protocol version 1.
37
+ #
38
+ # @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
39
+ OPCODES = Protocol0::OPCODES + Set[
40
+ 41, # EMPTY_TUPLE
41
+ 71, # BINFLOAT
42
+ 75, # BININT1
43
+ 84, # BINSTRING
44
+ 85, # SHORT_BINSTRING
45
+ 88, # BINUNICODE
46
+ 93, # EMPTY_LIST
47
+ 101, # APPENDS
48
+ 113, # BINPUT
49
+ 117, # SETITEMS
50
+ 125 # EMPTY_DICT
51
+ ]
52
+
53
+ #
54
+ # Reads an instruction from the pickle stream.
55
+ #
56
+ # @return [Instruction]
57
+ # The decoded instruction.
58
+ #
59
+ # @raise [InvalidFormat]
60
+ # The pickle stream could not be parsed.
61
+ #
62
+ def read_instruction
63
+ case (opcode = @io.getbyte)
64
+ #
65
+ # Protocol 0 instructions
66
+ #
67
+ when 40 # MARK
68
+ Instructions::MARK
69
+ when 46 # STOP
70
+ Instructions::STOP
71
+ when 48 # POP
72
+ Instructions::POP
73
+ when 49 # POP_MARK
74
+ Instructions::POP_MARK
75
+ when 50 # DUP
76
+ Instructions::DUP
77
+ when 70 # FLOAT
78
+ Instructions::Float.new(read_float)
79
+ when 73 # INT
80
+ Instructions::Int.new(read_int)
81
+ when 76 # LONG
82
+ Instructions::Long.new(read_long)
83
+ when 78 # NONE
84
+ Instructions::NONE
85
+ when 82 # REDUCE
86
+ Instructions::REDUCE
87
+ when 83 # STRING
88
+ Instructions::String.new(read_string)
89
+ when 86 # UNICODE
90
+ Instructions::String.new(read_unicode_string)
91
+ when 97 # APPEND
92
+ Instructions::APPEND
93
+ when 98 # BUILD
94
+ Instructions::BUILD
95
+ when 99 # GLOBAL
96
+ Instructions::Global.new(read_nl_string,read_nl_string)
97
+ when 100 # DICT
98
+ Instructions::DICT
99
+ when 103 # GET
100
+ Instructions::Get.new(read_int)
101
+ when 108 # LIST
102
+ Instructions::LIST
103
+ when 112 # PUT
104
+ Instructions::Put.new(read_int)
105
+ when 115 # SETITEM
106
+ Instructions::SETITEM
107
+ when 116 # TUPLE
108
+ Instructions::TUPLE
109
+ #
110
+ # Protocol 1 instructions
111
+ #
112
+ when 41 # EMPTY_TUPLE
113
+ Instructions::EMPTY_TUPLE
114
+ when 71 # BINFLOAT
115
+ Instructions::BinFloat.new(read_float64_be)
116
+ when 75 # BININT1
117
+ Instructions::BinInt1.new(read_uint8)
118
+ when 84 # BINSTRING
119
+ length = read_uint32_le
120
+ string = @io.read(length)
121
+
122
+ Instructions::BinString.new(length,string)
123
+ when 85 # SHORT_BINSTRING
124
+ length = read_uint8
125
+ string = @io.read(length)
126
+
127
+ Instructions::ShortBinString.new(length,string)
128
+ when 88 # BINUNICODE
129
+ length = read_uint32_le
130
+ string = @io.read(length).force_encoding(Encoding::UTF_8)
131
+
132
+ Instructions::BinUnicode.new(length,string)
133
+ when 93 # EMPTY_LIST
134
+ Instructions::EMPTY_LIST
135
+ when 101 # APPENDS
136
+ Instructions::APPENDS
137
+ when 104 # BINGET
138
+ Instructions::BinGet.new(read_uint8)
139
+ when 106 # LONG_BINGET
140
+ Instructions::LongBinGet.new(read_uint32_le)
141
+ when 113 # BINPUT
142
+ Instructions::BinPut.new(read_uint8)
143
+ when 117 # SETITEMS
144
+ Instructions::SETITEMS
145
+ when 125 # EMPTY_DICT
146
+ Instructions::EMPTY_DICT
147
+ else
148
+ raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 1")
149
+ end
150
+ end
151
+
152
+ #
153
+ # Reads a double precision (64bit) floating point number, in network
154
+ # byte-order (big-endian).
155
+ #
156
+ # @return [Float]
157
+ # The decoded float.
158
+ #
159
+ def read_float64_be
160
+ @io.read(8).unpack1('G')
161
+ end
162
+
163
+ #
164
+ # Reads a single 8bit unsigned integer (byte).
165
+ #
166
+ # @return [Integer]
167
+ #
168
+ def read_uint8
169
+ @io.getbyte
170
+ end
171
+
172
+ #
173
+ # Reads an unsigned 32bit integer, in little-endian byte-order.
174
+ #
175
+ # @return [Integer]
176
+ #
177
+ def read_uint32_le
178
+ @io.read(4).unpack1('L<')
179
+ end
180
+
181
+ end
182
+ end
183
+ end