python-pickle 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.document +3 -0
  3. data/.github/workflows/ruby.yml +27 -0
  4. data/.gitignore +5 -0
  5. data/.rspec +1 -0
  6. data/.yardopts +1 -0
  7. data/ChangeLog.md +14 -0
  8. data/Gemfile +15 -0
  9. data/LICENSE.txt +20 -0
  10. data/README.md +149 -0
  11. data/Rakefile +13 -0
  12. data/gemspec.yml +25 -0
  13. data/lib/python/pickle/byte_array.rb +40 -0
  14. data/lib/python/pickle/deserializer.rb +595 -0
  15. data/lib/python/pickle/exceptions.rb +12 -0
  16. data/lib/python/pickle/instruction.rb +52 -0
  17. data/lib/python/pickle/instructions/add_items.rb +26 -0
  18. data/lib/python/pickle/instructions/append.rb +24 -0
  19. data/lib/python/pickle/instructions/appends.rb +26 -0
  20. data/lib/python/pickle/instructions/bin_bytes.rb +32 -0
  21. data/lib/python/pickle/instructions/bin_bytes8.rb +32 -0
  22. data/lib/python/pickle/instructions/bin_float.rb +29 -0
  23. data/lib/python/pickle/instructions/bin_get.rb +27 -0
  24. data/lib/python/pickle/instructions/bin_int1.rb +29 -0
  25. data/lib/python/pickle/instructions/bin_put.rb +29 -0
  26. data/lib/python/pickle/instructions/bin_string.rb +32 -0
  27. data/lib/python/pickle/instructions/bin_unicode.rb +32 -0
  28. data/lib/python/pickle/instructions/bin_unicode8.rb +32 -0
  29. data/lib/python/pickle/instructions/build.rb +24 -0
  30. data/lib/python/pickle/instructions/byte_array8.rb +32 -0
  31. data/lib/python/pickle/instructions/dict.rb +17 -0
  32. data/lib/python/pickle/instructions/dup.rb +24 -0
  33. data/lib/python/pickle/instructions/empty_dict.rb +26 -0
  34. data/lib/python/pickle/instructions/empty_list.rb +26 -0
  35. data/lib/python/pickle/instructions/empty_set.rb +26 -0
  36. data/lib/python/pickle/instructions/empty_tuple.rb +26 -0
  37. data/lib/python/pickle/instructions/ext1.rb +29 -0
  38. data/lib/python/pickle/instructions/ext2.rb +29 -0
  39. data/lib/python/pickle/instructions/ext4.rb +29 -0
  40. data/lib/python/pickle/instructions/float.rb +24 -0
  41. data/lib/python/pickle/instructions/frame.rb +29 -0
  42. data/lib/python/pickle/instructions/frozen_set.rb +26 -0
  43. data/lib/python/pickle/instructions/get.rb +27 -0
  44. data/lib/python/pickle/instructions/global.rb +62 -0
  45. data/lib/python/pickle/instructions/has_length_and_value.rb +58 -0
  46. data/lib/python/pickle/instructions/has_value.rb +50 -0
  47. data/lib/python/pickle/instructions/int.rb +24 -0
  48. data/lib/python/pickle/instructions/list.rb +24 -0
  49. data/lib/python/pickle/instructions/long.rb +24 -0
  50. data/lib/python/pickle/instructions/long1.rb +32 -0
  51. data/lib/python/pickle/instructions/long4.rb +32 -0
  52. data/lib/python/pickle/instructions/long_bin_get.rb +27 -0
  53. data/lib/python/pickle/instructions/mark.rb +24 -0
  54. data/lib/python/pickle/instructions/memoize.rb +26 -0
  55. data/lib/python/pickle/instructions/new_false.rb +24 -0
  56. data/lib/python/pickle/instructions/new_obj.rb +26 -0
  57. data/lib/python/pickle/instructions/new_obj_ex.rb +26 -0
  58. data/lib/python/pickle/instructions/new_true.rb +24 -0
  59. data/lib/python/pickle/instructions/next_buffer.rb +26 -0
  60. data/lib/python/pickle/instructions/none.rb +24 -0
  61. data/lib/python/pickle/instructions/pop.rb +24 -0
  62. data/lib/python/pickle/instructions/pop_mark.rb +24 -0
  63. data/lib/python/pickle/instructions/proto.rb +29 -0
  64. data/lib/python/pickle/instructions/put.rb +24 -0
  65. data/lib/python/pickle/instructions/readonly_buffer.rb +26 -0
  66. data/lib/python/pickle/instructions/reduce.rb +24 -0
  67. data/lib/python/pickle/instructions/set_item.rb +24 -0
  68. data/lib/python/pickle/instructions/set_items.rb +26 -0
  69. data/lib/python/pickle/instructions/short_bin_bytes.rb +32 -0
  70. data/lib/python/pickle/instructions/short_bin_string.rb +32 -0
  71. data/lib/python/pickle/instructions/short_bin_unicode.rb +32 -0
  72. data/lib/python/pickle/instructions/stack_global.rb +26 -0
  73. data/lib/python/pickle/instructions/stop.rb +24 -0
  74. data/lib/python/pickle/instructions/string.rb +24 -0
  75. data/lib/python/pickle/instructions/tuple.rb +24 -0
  76. data/lib/python/pickle/instructions/tuple1.rb +24 -0
  77. data/lib/python/pickle/instructions/tuple2.rb +24 -0
  78. data/lib/python/pickle/instructions/tuple3.rb +24 -0
  79. data/lib/python/pickle/protocol.rb +56 -0
  80. data/lib/python/pickle/protocol0.rb +399 -0
  81. data/lib/python/pickle/protocol1.rb +183 -0
  82. data/lib/python/pickle/protocol2.rb +229 -0
  83. data/lib/python/pickle/protocol3.rb +163 -0
  84. data/lib/python/pickle/protocol4.rb +285 -0
  85. data/lib/python/pickle/protocol5.rb +218 -0
  86. data/lib/python/pickle/py_class.rb +75 -0
  87. data/lib/python/pickle/py_object.rb +141 -0
  88. data/lib/python/pickle/tuple.rb +19 -0
  89. data/lib/python/pickle/version.rb +6 -0
  90. data/lib/python/pickle.rb +226 -0
  91. data/python-pickle.gemspec +62 -0
  92. data/spec/byte_array_spec.rb +54 -0
  93. data/spec/deserializer_spec.rb +1201 -0
  94. data/spec/fixtures/ascii_str_v3.pkl +0 -0
  95. data/spec/fixtures/ascii_str_v4.pkl +0 -0
  96. data/spec/fixtures/ascii_str_v5.pkl +0 -0
  97. data/spec/fixtures/bin_str_v0.pkl +3 -0
  98. data/spec/fixtures/bin_str_v1.pkl +0 -0
  99. data/spec/fixtures/bin_str_v2.pkl +0 -0
  100. data/spec/fixtures/bin_str_v3.pkl +0 -0
  101. data/spec/fixtures/bin_str_v4.pkl +0 -0
  102. data/spec/fixtures/bin_str_v5.pkl +0 -0
  103. data/spec/fixtures/bytearray_v0.pkl +10 -0
  104. data/spec/fixtures/bytearray_v1.pkl +0 -0
  105. data/spec/fixtures/bytearray_v2.pkl +0 -0
  106. data/spec/fixtures/bytearray_v3.pkl +0 -0
  107. data/spec/fixtures/bytearray_v4.pkl +0 -0
  108. data/spec/fixtures/bytearray_v5.pkl +0 -0
  109. data/spec/fixtures/class_v0.pkl +4 -0
  110. data/spec/fixtures/class_v1.pkl +0 -0
  111. data/spec/fixtures/class_v2.pkl +0 -0
  112. data/spec/fixtures/class_v3.pkl +0 -0
  113. data/spec/fixtures/class_v4.pkl +0 -0
  114. data/spec/fixtures/class_v5.pkl +0 -0
  115. data/spec/fixtures/dict_v0.pkl +6 -0
  116. data/spec/fixtures/dict_v1.pkl +0 -0
  117. data/spec/fixtures/dict_v2.pkl +0 -0
  118. data/spec/fixtures/dict_v3.pkl +0 -0
  119. data/spec/fixtures/dict_v4.pkl +0 -0
  120. data/spec/fixtures/dict_v5.pkl +0 -0
  121. data/spec/fixtures/escaped_str_v0.pkl +3 -0
  122. data/spec/fixtures/escaped_str_v1.pkl +0 -0
  123. data/spec/fixtures/escaped_str_v2.pkl +0 -0
  124. data/spec/fixtures/false_v0.pkl +2 -0
  125. data/spec/fixtures/false_v1.pkl +2 -0
  126. data/spec/fixtures/false_v2.pkl +1 -0
  127. data/spec/fixtures/false_v3.pkl +1 -0
  128. data/spec/fixtures/false_v4.pkl +1 -0
  129. data/spec/fixtures/false_v5.pkl +1 -0
  130. data/spec/fixtures/float_v0.pkl +2 -0
  131. data/spec/fixtures/float_v1.pkl +1 -0
  132. data/spec/fixtures/float_v2.pkl +1 -0
  133. data/spec/fixtures/float_v3.pkl +1 -0
  134. data/spec/fixtures/float_v4.pkl +0 -0
  135. data/spec/fixtures/float_v5.pkl +0 -0
  136. data/spec/fixtures/function_v0.pkl +4 -0
  137. data/spec/fixtures/function_v1.pkl +0 -0
  138. data/spec/fixtures/function_v2.pkl +0 -0
  139. data/spec/fixtures/function_v3.pkl +0 -0
  140. data/spec/fixtures/function_v4.pkl +0 -0
  141. data/spec/fixtures/function_v5.pkl +0 -0
  142. data/spec/fixtures/hex_str_v0.pkl +3 -0
  143. data/spec/fixtures/hex_str_v1.pkl +0 -0
  144. data/spec/fixtures/hex_str_v2.pkl +0 -0
  145. data/spec/fixtures/int_v0.pkl +2 -0
  146. data/spec/fixtures/int_v1.pkl +1 -0
  147. data/spec/fixtures/int_v2.pkl +1 -0
  148. data/spec/fixtures/int_v3.pkl +1 -0
  149. data/spec/fixtures/int_v4.pkl +1 -0
  150. data/spec/fixtures/int_v5.pkl +1 -0
  151. data/spec/fixtures/list_v0.pkl +7 -0
  152. data/spec/fixtures/list_v1.pkl +0 -0
  153. data/spec/fixtures/list_v2.pkl +0 -0
  154. data/spec/fixtures/list_v3.pkl +0 -0
  155. data/spec/fixtures/list_v4.pkl +0 -0
  156. data/spec/fixtures/list_v5.pkl +0 -0
  157. data/spec/fixtures/long_v0.pkl +2 -0
  158. data/spec/fixtures/long_v1.pkl +2 -0
  159. data/spec/fixtures/long_v2.pkl +0 -0
  160. data/spec/fixtures/long_v3.pkl +0 -0
  161. data/spec/fixtures/long_v4.pkl +0 -0
  162. data/spec/fixtures/long_v5.pkl +0 -0
  163. data/spec/fixtures/nested_dict_v0.pkl +12 -0
  164. data/spec/fixtures/nested_dict_v1.pkl +0 -0
  165. data/spec/fixtures/nested_dict_v2.pkl +0 -0
  166. data/spec/fixtures/nested_dict_v3.pkl +0 -0
  167. data/spec/fixtures/nested_dict_v4.pkl +0 -0
  168. data/spec/fixtures/nested_dict_v5.pkl +0 -0
  169. data/spec/fixtures/nested_list_v0.pkl +9 -0
  170. data/spec/fixtures/nested_list_v1.pkl +0 -0
  171. data/spec/fixtures/nested_list_v2.pkl +0 -0
  172. data/spec/fixtures/nested_list_v3.pkl +0 -0
  173. data/spec/fixtures/nested_list_v4.pkl +0 -0
  174. data/spec/fixtures/nested_list_v5.pkl +0 -0
  175. data/spec/fixtures/none_v0.pkl +1 -0
  176. data/spec/fixtures/none_v1.pkl +1 -0
  177. data/spec/fixtures/none_v2.pkl +1 -0
  178. data/spec/fixtures/none_v3.pkl +1 -0
  179. data/spec/fixtures/none_v4.pkl +1 -0
  180. data/spec/fixtures/none_v5.pkl +1 -0
  181. data/spec/fixtures/object_v0.pkl +19 -0
  182. data/spec/fixtures/object_v1.pkl +0 -0
  183. data/spec/fixtures/object_v2.pkl +0 -0
  184. data/spec/fixtures/object_v3.pkl +0 -0
  185. data/spec/fixtures/object_v4.pkl +0 -0
  186. data/spec/fixtures/object_v5.pkl +0 -0
  187. data/spec/fixtures/str_v0.pkl +3 -0
  188. data/spec/fixtures/str_v1.pkl +0 -0
  189. data/spec/fixtures/str_v2.pkl +0 -0
  190. data/spec/fixtures/str_v3.pkl +0 -0
  191. data/spec/fixtures/str_v4.pkl +0 -0
  192. data/spec/fixtures/str_v5.pkl +0 -0
  193. data/spec/fixtures/true_v0.pkl +2 -0
  194. data/spec/fixtures/true_v1.pkl +2 -0
  195. data/spec/fixtures/true_v2.pkl +1 -0
  196. data/spec/fixtures/true_v3.pkl +1 -0
  197. data/spec/fixtures/true_v4.pkl +1 -0
  198. data/spec/fixtures/true_v5.pkl +1 -0
  199. data/spec/fixtures/unicode_str_v0.pkl +3 -0
  200. data/spec/fixtures/unicode_str_v1.pkl +0 -0
  201. data/spec/fixtures/unicode_str_v2.pkl +0 -0
  202. data/spec/fixtures/unicode_str_v3.pkl +0 -0
  203. data/spec/fixtures/unicode_str_v4.pkl +0 -0
  204. data/spec/fixtures/unicode_str_v5.pkl +0 -0
  205. data/spec/generate_pickles2.py +41 -0
  206. data/spec/generate_pickles3.py +40 -0
  207. data/spec/integration/load/protocol0_spec.rb +258 -0
  208. data/spec/integration/load/protocol1_spec.rb +258 -0
  209. data/spec/integration/load/protocol2_spec.rb +258 -0
  210. data/spec/integration/load/protocol3_spec.rb +258 -0
  211. data/spec/integration/load/protocol4_spec.rb +258 -0
  212. data/spec/integration/load/protocol5_spec.rb +258 -0
  213. data/spec/integration/parse/protocol0_spec.rb +467 -0
  214. data/spec/integration/parse/protocol1_spec.rb +459 -0
  215. data/spec/integration/parse/protocol2_spec.rb +471 -0
  216. data/spec/integration/parse/protocol3_spec.rb +407 -0
  217. data/spec/integration/parse/protocol4_spec.rb +439 -0
  218. data/spec/integration/parse/protocol5_spec.rb +419 -0
  219. data/spec/pickle_spec.rb +163 -0
  220. data/spec/protocol0_read_instruction_examples.rb +211 -0
  221. data/spec/protocol0_spec.rb +445 -0
  222. data/spec/protocol1_read_instruction_examples.rb +156 -0
  223. data/spec/protocol1_spec.rb +59 -0
  224. data/spec/protocol2_read_instruction_examples.rb +135 -0
  225. data/spec/protocol2_spec.rb +128 -0
  226. data/spec/protocol3_read_instruction_examples.rb +29 -0
  227. data/spec/protocol3_spec.rb +32 -0
  228. data/spec/protocol4_read_instruction_examples.rb +142 -0
  229. data/spec/protocol4_spec.rb +58 -0
  230. data/spec/protocol5_spec.rb +68 -0
  231. data/spec/py_class_spec.rb +62 -0
  232. data/spec/py_object_spec.rb +149 -0
  233. data/spec/spec_helper.rb +3 -0
  234. data/spec/tuple_spec.rb +18 -0
  235. metadata +325 -0
@@ -0,0 +1,229 @@
1
+ require 'python/pickle/protocol1'
2
+ require 'python/pickle/instructions/proto'
3
+ require 'python/pickle/instructions/new_obj'
4
+ require 'python/pickle/instructions/ext1'
5
+ require 'python/pickle/instructions/ext2'
6
+ require 'python/pickle/instructions/ext4'
7
+ require 'python/pickle/instructions/tuple1'
8
+ require 'python/pickle/instructions/tuple2'
9
+ require 'python/pickle/instructions/tuple3'
10
+ require 'python/pickle/instructions/new_true'
11
+ require 'python/pickle/instructions/new_false'
12
+ require 'python/pickle/instructions/long1'
13
+ require 'python/pickle/instructions/long4'
14
+
15
+ module Python
16
+ module Pickle
17
+ class Protocol2 < Protocol1
18
+
19
+ # Opcodes for Pickle protocol version 2.
20
+ #
21
+ # @see https://github.com/python/cpython/blob/main/Lib/pickletools.py
22
+ OPCODES = Protocol1::OPCODES + Set[
23
+ 128, # PROTO
24
+ 129, # NEWOBJ
25
+ 130, # EXT1
26
+ 131, # EXT2
27
+ 132, # EXT4
28
+ 133, # TUPLE1
29
+ 134, # TUPLE2
30
+ 135, # TUPLE3
31
+ 136, # NEWTRUE
32
+ 137, # NEWFALSE
33
+ 138, # LONG1
34
+ 139 # LONG4
35
+ ]
36
+
37
+ #
38
+ # Reads an instruction from the pickle stream.
39
+ #
40
+ # @return [Instruction]
41
+ # The decoded instruction.
42
+ #
43
+ # @raise [InvalidFormat]
44
+ # The pickle stream could not be parsed.
45
+ #
46
+ def read_instruction
47
+ case (opcode = @io.getbyte)
48
+ #
49
+ # Protocol 0 instructions
50
+ #
51
+ when 40 # MARK
52
+ Instructions::MARK
53
+ when 46 # STOP
54
+ Instructions::STOP
55
+ when 48 # POP
56
+ Instructions::POP
57
+ when 49 # POP_MARK
58
+ Instructions::POP_MARK
59
+ when 50 # DUP
60
+ Instructions::DUP
61
+ when 70 # FLOAT
62
+ Instructions::Float.new(read_float)
63
+ when 73 # INT
64
+ Instructions::Int.new(read_int)
65
+ when 76 # LONG
66
+ Instructions::Long.new(read_long)
67
+ when 78 # NONE
68
+ Instructions::NONE
69
+ when 82 # REDUCE
70
+ Instructions::REDUCE
71
+ when 83 # STRING
72
+ Instructions::String.new(read_string)
73
+ when 86 # UNICODE
74
+ Instructions::String.new(read_unicode_string)
75
+ when 97 # APPEND
76
+ Instructions::APPEND
77
+ when 98 # BUILD
78
+ Instructions::BUILD
79
+ when 99 # GLOBAL
80
+ Instructions::Global.new(read_nl_string,read_nl_string)
81
+ when 100 # DICT
82
+ Instructions::DICT
83
+ when 103 # GET
84
+ Instructions::Get.new(read_int)
85
+ when 108 # LIST
86
+ Instructions::LIST
87
+ when 112 # PUT
88
+ Instructions::Put.new(read_int)
89
+ when 115 # SETITEM
90
+ Instructions::SETITEM
91
+ when 116 # TUPLE
92
+ Instructions::TUPLE
93
+ #
94
+ # Protocol 1 instructions
95
+ #
96
+ when 41 # EMPTY_TUPLE
97
+ Instructions::EMPTY_TUPLE
98
+ when 71 # BINFLOAT
99
+ Instructions::BinFloat.new(read_float64_be)
100
+ when 75 # BININT1
101
+ Instructions::BinInt1.new(read_uint8)
102
+ when 84 # BINSTRING
103
+ length = read_uint32_le
104
+ string = @io.read(length)
105
+
106
+ Instructions::BinString.new(length,string)
107
+ when 85 # SHORT_BINSTRING
108
+ length = read_uint8
109
+ string = @io.read(length)
110
+
111
+ Instructions::ShortBinString.new(length,string)
112
+ when 88 # BINUNICODE
113
+ length = read_uint32_le
114
+ string = @io.read(length).force_encoding(Encoding::UTF_8)
115
+
116
+ Instructions::BinUnicode.new(length,string)
117
+ when 93 # EMPTY_LIST
118
+ Instructions::EMPTY_LIST
119
+ when 101 # APPENDS
120
+ Instructions::APPENDS
121
+ when 104 # BINGET
122
+ Instructions::BinGet.new(read_uint8)
123
+ when 106 # LONG_BINGET
124
+ Instructions::LongBinGet.new(read_uint32_le)
125
+ when 113 # BINPUT
126
+ Instructions::BinPut.new(read_uint8)
127
+ when 117 # SETITEMS
128
+ Instructions::SETITEMS
129
+ when 125 # EMPTY_DICT
130
+ Instructions::EMPTY_DICT
131
+ #
132
+ # Protocol 2 instructions
133
+ #
134
+ when 128 # PROT
135
+ Instructions::Proto.new(read_uint8)
136
+ when 129 # NEWOBJ
137
+ Instructions::NEWOBJ
138
+ when 130 # EXT1
139
+ Instructions::Ext1.new(read_uint8)
140
+ when 131 # EXT2
141
+ Instructions::Ext2.new(read_uint16_le)
142
+ when 132 # EXT4
143
+ Instructions::Ext4.new(read_uint32_le)
144
+ when 133 # TUPLE1
145
+ Instructions::TUPLE1
146
+ when 134 # TUPLE2
147
+ Instructions::TUPLE2
148
+ when 135 # TUPLE3
149
+ Instructions::TUPLE3
150
+ when 136 # NEWTRUE
151
+ Instructions::NEWTRUE
152
+ when 137 # NEWFALSE
153
+ Instructions::NEWFALSE
154
+ when 138 # LONG1
155
+ length = read_uint8
156
+ long = read_int_le(length)
157
+
158
+ Instructions::Long1.new(length,long)
159
+ when 139 # LONG4
160
+ length = read_uint32_le
161
+ long = read_int_le(length)
162
+
163
+ Instructions::Long4.new(length,long)
164
+ else
165
+ raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 2")
166
+ end
167
+ end
168
+
169
+ #
170
+ # Reads an unsigned 16bit integer in little-endian byte-order.
171
+ #
172
+ # @return [Integer]
173
+ # The decoded integer.
174
+ #
175
+ def read_uint16_le
176
+ @io.read(2).unpack1('S<')
177
+ end
178
+
179
+ #
180
+ # Reads and unpacks a signed integer of arbitrary length.
181
+ #
182
+ # @param [Integer] length
183
+ # The number of bytes to read.
184
+ #
185
+ # @return [Integer]
186
+ # The decoded long integer.
187
+ #
188
+ def read_int_le(length)
189
+ data = @io.read(length)
190
+
191
+ if data.bytesize < length
192
+ raise(InvalidFormat,"premature end of string")
193
+ end
194
+
195
+ return unpack_int_le(data)
196
+ end
197
+
198
+ #
199
+ # Decodes a packed twos-complement long value of arbitrary length.
200
+ #
201
+ # @param [String] data
202
+ # The packed long to decode.
203
+ #
204
+ # @return [Integer]
205
+ # The unpacked long.
206
+ #
207
+ def unpack_int_le(data)
208
+ return 0 if data.empty?
209
+
210
+ long = 0
211
+ shift = 0
212
+
213
+ data.each_byte do |b|
214
+ long |= b << shift
215
+ shift += 8
216
+ end
217
+
218
+ max_signed = (1 << (shift-1))
219
+
220
+ if long >= max_signed
221
+ long -= (1 << shift)
222
+ end
223
+
224
+ return long
225
+ end
226
+
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,163 @@
1
+ require 'python/pickle/protocol2'
2
+ require 'python/pickle/instructions/bin_bytes'
3
+ require 'python/pickle/instructions/short_bin_bytes'
4
+
5
+ module Python
6
+ module Pickle
7
+ class Protocol3 < Protocol2
8
+ # Opcodes for Pickle protocol version 2.
9
+ #
10
+ # @see http://formats.kaitai.io/python_pickle/ruby.html
11
+ OPCODES = Protocol2::OPCODES + Set[
12
+ 66, # BINBYTES
13
+ 67 # SHORT_BINBYTES
14
+ ]
15
+
16
+ #
17
+ # Reads an instruction from the pickle stream.
18
+ #
19
+ # @return [Instruction]
20
+ # The decoded instruction.
21
+ #
22
+ # @raise [InvalidFormat]
23
+ # The pickle stream could not be parsed.
24
+ #
25
+ def read_instruction
26
+ case (opcode = @io.getbyte)
27
+ #
28
+ # Protocol 0 instructions
29
+ #
30
+ when 40 # MARK
31
+ Instructions::MARK
32
+ when 46 # STOP
33
+ Instructions::STOP
34
+ when 48 # POP
35
+ Instructions::POP
36
+ when 49 # POP_MARK
37
+ Instructions::POP_MARK
38
+ when 50 # DUP
39
+ Instructions::DUP
40
+ when 70 # FLOAT
41
+ Instructions::Float.new(read_float)
42
+ when 73 # INT
43
+ Instructions::Int.new(read_int)
44
+ when 76 # LONG
45
+ Instructions::Long.new(read_long)
46
+ when 78 # NONE
47
+ Instructions::NONE
48
+ when 82 # REDUCE
49
+ Instructions::REDUCE
50
+ when 83 # STRING
51
+ Instructions::String.new(read_string)
52
+ when 86 # UNICODE
53
+ Instructions::String.new(read_unicode_string)
54
+ when 97 # APPEND
55
+ Instructions::APPEND
56
+ when 98 # BUILD
57
+ Instructions::BUILD
58
+ when 99 # GLOBAL
59
+ Instructions::Global.new(read_nl_string,read_nl_string)
60
+ when 100 # DICT
61
+ Instructions::DICT
62
+ when 103 # GET
63
+ Instructions::Get.new(read_int)
64
+ when 108 # LIST
65
+ Instructions::LIST
66
+ when 112 # PUT
67
+ Instructions::Put.new(read_int)
68
+ when 115 # SETITEM
69
+ Instructions::SETITEM
70
+ when 116 # TUPLE
71
+ Instructions::TUPLE
72
+ #
73
+ # Protocol 1 instructions
74
+ #
75
+ when 41 # EMPTY_TUPLE
76
+ Instructions::EMPTY_TUPLE
77
+ when 71 # BINFLOAT
78
+ Instructions::BinFloat.new(read_float64_be)
79
+ when 75 # BININT1
80
+ Instructions::BinInt1.new(read_uint8)
81
+ when 84 # BINSTRING
82
+ length = read_uint32_le
83
+ string = @io.read(length)
84
+
85
+ Instructions::BinString.new(length,string)
86
+ when 85 # SHORT_BINSTRING
87
+ length = read_uint8
88
+ string = @io.read(length)
89
+
90
+ Instructions::ShortBinString.new(length,string)
91
+ when 88 # BINUNICODE
92
+ length = read_uint32_le
93
+ string = @io.read(length).force_encoding(Encoding::UTF_8)
94
+
95
+ Instructions::BinUnicode.new(length,string)
96
+ when 93 # EMPTY_LIST
97
+ Instructions::EMPTY_LIST
98
+ when 101 # APPENDS
99
+ Instructions::APPENDS
100
+ when 104 # BINGET
101
+ Instructions::BinGet.new(read_uint8)
102
+ when 106 # LONG_BINGET
103
+ Instructions::LongBinGet.new(read_uint32_le)
104
+ when 113 # BINPUT
105
+ Instructions::BinPut.new(read_uint8)
106
+ when 117 # SETITEMS
107
+ Instructions::SETITEMS
108
+ when 125 # EMPTY_DICT
109
+ Instructions::EMPTY_DICT
110
+ #
111
+ # Protocol 2 instructions
112
+ #
113
+ when 128 # PROT
114
+ Instructions::Proto.new(read_uint8)
115
+ when 129 # NEWOBJ
116
+ Instructions::NEWOBJ
117
+ when 130 # EXT1
118
+ Instructions::Ext1.new(read_uint8)
119
+ when 131 # EXT2
120
+ Instructions::Ext2.new(read_uint16_le)
121
+ when 132 # EXT4
122
+ Instructions::Ext4.new(read_uint32_le)
123
+ when 133 # TUPLE1
124
+ Instructions::TUPLE1
125
+ when 134 # TUPLE2
126
+ Instructions::TUPLE2
127
+ when 135 # TUPLE3
128
+ Instructions::TUPLE3
129
+ when 136 # NEWTRUE
130
+ Instructions::NEWTRUE
131
+ when 137 # NEWFALSE
132
+ Instructions::NEWFALSE
133
+ when 138 # LONG1
134
+ length = read_uint8
135
+ long = read_int_le(length)
136
+
137
+ Instructions::Long1.new(length,long)
138
+ when 139 # LONG4
139
+ length = read_uint32_le
140
+ long = read_int_le(length)
141
+
142
+ Instructions::Long4.new(length,long)
143
+ #
144
+ # Protocol 3 instructions
145
+ #
146
+ when 66 # BINBYTES
147
+ length = read_uint32_le
148
+ bytes = @io.read(length)
149
+
150
+ Instructions::BinBytes.new(length,bytes)
151
+ when 67 # SHORT_BINBYTES
152
+ length = read_uint8
153
+ bytes = @io.read(length)
154
+
155
+ Instructions::ShortBinBytes.new(length,bytes)
156
+ else
157
+ raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 3")
158
+ end
159
+ end
160
+
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,285 @@
1
+ require 'python/pickle/protocol3'
2
+ require 'python/pickle/instructions/short_bin_unicode'
3
+ require 'python/pickle/instructions/bin_unicode8'
4
+ require 'python/pickle/instructions/bin_bytes8'
5
+ require 'python/pickle/instructions/empty_set'
6
+ require 'python/pickle/instructions/add_items'
7
+ require 'python/pickle/instructions/frozen_set'
8
+ require 'python/pickle/instructions/new_obj_ex'
9
+ require 'python/pickle/instructions/stack_global'
10
+ require 'python/pickle/instructions/memoize'
11
+ require 'python/pickle/instructions/frame'
12
+
13
+ module Python
14
+ module Pickle
15
+ #
16
+ # Implements Python Pickle protocol 4.
17
+ #
18
+ class Protocol4 < Protocol3
19
+
20
+ #
21
+ # Initializes the protocol 4 reader/writer.
22
+ #
23
+ def initialize(io)
24
+ super(io)
25
+
26
+ @io_stack = []
27
+ end
28
+
29
+ # Opcodes for Pickle protocol 4.
30
+ #
31
+ # @see https://peps.python.org/pep-3154/
32
+ OPCODES = Protocol3::OPCODES + Set[
33
+ 140, # SHORT_BINUNICODE
34
+ 141, # BINUNICODE8
35
+ 142, # BINBYTES8
36
+ 143, # EMPTY_SET
37
+ 144, # ADDITEMS
38
+ 145, # FROZENSET
39
+ 146, # NEWOBJ_EX
40
+ 147, # STACK_GLOBAL
41
+ 148, # MEMOIZE
42
+ 149 # FRAME
43
+ ]
44
+
45
+ #
46
+ # Reads an instruction from the pickle stream.
47
+ #
48
+ # @return [Instruction]
49
+ # The decoded instruction.
50
+ #
51
+ # @raise [InvalidFormat]
52
+ # The pickle stream could not be parsed.
53
+ #
54
+ def read_instruction
55
+ case (opcode = @io.getbyte)
56
+ #
57
+ # Protocol 0 instructions
58
+ #
59
+ when 40 # MARK
60
+ Instructions::MARK
61
+ when 46 # STOP
62
+ Instructions::STOP
63
+ when 48 # POP
64
+ Instructions::POP
65
+ when 49 # POP_MARK
66
+ Instructions::POP_MARK
67
+ when 50 # DUP
68
+ Instructions::DUP
69
+ when 70 # FLOAT
70
+ Instructions::Float.new(read_float)
71
+ when 73 # INT
72
+ Instructions::Int.new(read_int)
73
+ when 76 # LONG
74
+ Instructions::Long.new(read_long)
75
+ when 78 # NONE
76
+ Instructions::NONE
77
+ when 82 # REDUCE
78
+ Instructions::REDUCE
79
+ when 83 # STRING
80
+ Instructions::String.new(read_string)
81
+ when 86 # UNICODE
82
+ Instructions::String.new(read_unicode_string)
83
+ when 97 # APPEND
84
+ Instructions::APPEND
85
+ when 98 # BUILD
86
+ Instructions::BUILD
87
+ when 99 # GLOBAL
88
+ Instructions::Global.new(read_nl_string,read_nl_string)
89
+ when 100 # DICT
90
+ Instructions::DICT
91
+ when 103 # GET
92
+ Instructions::Get.new(read_int)
93
+ when 108 # LIST
94
+ Instructions::LIST
95
+ when 112 # PUT
96
+ Instructions::Put.new(read_int)
97
+ when 115 # SETITEM
98
+ Instructions::SETITEM
99
+ when 116 # TUPLE
100
+ Instructions::TUPLE
101
+ #
102
+ # Protocol 1 instructions
103
+ #
104
+ when 41 # EMPTY_TUPLE
105
+ Instructions::EMPTY_TUPLE
106
+ when 71 # BINFLOAT
107
+ Instructions::BinFloat.new(read_float64_be)
108
+ when 75 # BININT1
109
+ Instructions::BinInt1.new(read_uint8)
110
+ when 84 # BINSTRING
111
+ length = read_uint32_le
112
+ string = @io.read(length)
113
+
114
+ Instructions::BinString.new(length,string)
115
+ when 85 # SHORT_BINSTRING
116
+ length = read_uint8
117
+ string = @io.read(length)
118
+
119
+ Instructions::ShortBinString.new(length,string)
120
+ when 88 # BINUNICODE
121
+ length = read_uint32_le
122
+ string = @io.read(length).force_encoding(Encoding::UTF_8)
123
+
124
+ Instructions::BinUnicode.new(length,string)
125
+ when 93 # EMPTY_LIST
126
+ Instructions::EMPTY_LIST
127
+ when 101 # APPENDS
128
+ Instructions::APPENDS
129
+ when 104 # BINGET
130
+ Instructions::BinGet.new(read_uint8)
131
+ when 106 # LONG_BINGET
132
+ Instructions::LongBinGet.new(read_uint32_le)
133
+ when 113 # BINPUT
134
+ Instructions::BinPut.new(read_uint8)
135
+ when 117 # SETITEMS
136
+ Instructions::SETITEMS
137
+ when 125 # EMPTY_DICT
138
+ Instructions::EMPTY_DICT
139
+ #
140
+ # Protocol 2 instructions
141
+ #
142
+ when 128 # PROT
143
+ Instructions::Proto.new(read_uint8)
144
+ when 129 # NEWOBJ
145
+ Instructions::NEWOBJ
146
+ when 130 # EXT1
147
+ Instructions::Ext1.new(read_uint8)
148
+ when 131 # EXT2
149
+ Instructions::Ext2.new(read_uint16_le)
150
+ when 132 # EXT4
151
+ Instructions::Ext4.new(read_uint32_le)
152
+ when 133 # TUPLE1
153
+ Instructions::TUPLE1
154
+ when 134 # TUPLE2
155
+ Instructions::TUPLE2
156
+ when 135 # TUPLE3
157
+ Instructions::TUPLE3
158
+ when 136 # NEWTRUE
159
+ Instructions::NEWTRUE
160
+ when 137 # NEWFALSE
161
+ Instructions::NEWFALSE
162
+ when 138 # LONG1
163
+ length = read_uint8
164
+ long = read_int_le(length)
165
+
166
+ Instructions::Long1.new(length,long)
167
+ when 139 # LONG4
168
+ length = read_uint32_le
169
+ long = read_int_le(length)
170
+
171
+ Instructions::Long4.new(length,long)
172
+ #
173
+ # Protocol 3 instructions
174
+ #
175
+ when 66 # BINBYTES
176
+ length = read_uint32_le
177
+ bytes = @io.read(length)
178
+
179
+ Instructions::BinBytes.new(length,bytes)
180
+ when 67 # SHORT_BINBYTES
181
+ length = read_uint8
182
+ bytes = @io.read(length)
183
+
184
+ Instructions::ShortBinBytes.new(length,bytes)
185
+ #
186
+ # Protocol 4 instructions
187
+ #
188
+ when 140 # SHORT_BINUNICODE
189
+ length = read_uint8
190
+ string = read_utf8_string(length)
191
+
192
+ Instructions::ShortBinUnicode.new(length,string)
193
+ when 141 # BINUNICODE8
194
+ length = read_uint64_le
195
+ string = read_utf8_string(length)
196
+
197
+ Instructions::BinUnicode8.new(length,string)
198
+ when 142 # BINBYTES8
199
+ length = read_uint64_le
200
+ bytes = @io.read(length)
201
+
202
+ Instructions::BinBytes8.new(length,bytes)
203
+ when 143 # EMPTY_SET
204
+ Instructions::EMPTY_SET
205
+ when 144 # ADDITEMS
206
+ Instructions::ADDITEMS
207
+ when 145 # FROZENSET
208
+ Instructions::FROZENSET
209
+ when 146 # NEWOBJ_EX
210
+ Instructions::NEWOBJ_EX
211
+ when 147 # STACK_GLOBAL
212
+ Instructions::STACK_GLOBAL
213
+ when 148 # MEMOIZE
214
+ Instructions::MEMOIZE
215
+ when 149 # FRAME
216
+ length = read_uint64_le
217
+
218
+ enter_frame(read_frame(length))
219
+
220
+ Instructions::Frame.new(length)
221
+ else
222
+ raise(InvalidFormat,"invalid opcode (#{opcode.inspect}) for protocol 4")
223
+ end
224
+ ensure
225
+ if @io.eof? && !@io_stack.empty?
226
+ leave_frame
227
+ end
228
+ end
229
+
230
+ #
231
+ # Reads an unsigned 64bit integer, in little-endian byte-order.
232
+ #
233
+ # @return [Integer]
234
+ #
235
+ def read_uint64_le
236
+ @io.read(8).unpack1('Q<')
237
+ end
238
+
239
+ #
240
+ # Reads a UTF-8 string of the desired length.
241
+ #
242
+ # @param [Integer] length
243
+ # The desired length to read.
244
+ #
245
+ # @return [String]
246
+ # The read UTF-8 string.
247
+ #
248
+ def read_utf8_string(length)
249
+ @io.read(length).force_encoding(Encoding::UTF_8)
250
+ end
251
+
252
+ #
253
+ # Reads a data frame of the given length.
254
+ #
255
+ # @param [Integer] length
256
+ # The desired length of the frame.
257
+ #
258
+ # @return [String]
259
+ # The read data frame.
260
+ #
261
+ def read_frame(length)
262
+ @io.read(length)
263
+ end
264
+
265
+ #
266
+ # Enters a new data frame.
267
+ #
268
+ # @param [String] frame
269
+ # The contents of the data frame.
270
+ #
271
+ def enter_frame(frame)
272
+ @io_stack.push(@io)
273
+ @io = StringIO.new(frame)
274
+ end
275
+
276
+ #
277
+ # Leaves a data frame and restores {#io}.
278
+ #
279
+ def leave_frame
280
+ @io = @io_stack.pop
281
+ end
282
+
283
+ end
284
+ end
285
+ end