pure_ruby_zip 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d1ea1789a09e61e0950554765cc23a4d01274533fe2a4faefc0ad17295e3bcf5
4
+ data.tar.gz: 7f3ad14e5e7a24f64dc8b3742644aecce8eaf9cdef250a40b7eec50d4ca3a05e
5
+ SHA512:
6
+ metadata.gz: 842d6239f37fcde66242e1e8ef1e8f275dce4f6c16d12127199956d1fe13d7feadea14bbeb2133e2e60d784d183da88517f4f9b1454c34185675fbc822287f17
7
+ data.tar.gz: 2c16043e638b6d89624ed52f04d871e89e7f7366db52b55471b1a3a228f9bc2581e7ea93f12110e02853d25413f6568f4a2a3943e3fc0587d61a6dcc809eb35f
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "pure_ruby_zip"
4
+
5
+ zipfile_filename = ARGV[0]
6
+
7
+ PureRubyZip::ZipFile.new(zipfile_filename).decompress_all_files
@@ -0,0 +1,276 @@
1
+ require "pure_ruby_zip/version.rb"
2
+
3
+ module PureRubyZip
4
+ class Error < StandardError; end
5
+ # Your code goes here...
6
+ CODE_LENGTH_CODES_ORDER = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
7
+ LENGTH_EXTRA_BITS = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0]
8
+ LENGTH_BASE = [3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163,
9
+ 195, 227, 258]
10
+ DISTANCE_EXTRA_BITS = [0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
11
+ 13]
12
+ DISTANCE_BASE = [1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049,
13
+ 3073, 4097, 6145, 8193, 12289, 16385, 24577]
14
+ class Bitstream
15
+ attr_accessor :byte
16
+ attr_accessor :bit_index
17
+ attr_accessor :file
18
+ def initialize(data)
19
+ @data = data
20
+ @bit_index = 0
21
+ end
22
+ def read_bit
23
+ res = ((@data[0].codepoints.first >> @bit_index) & 1) == 1
24
+ if @bit_index == 7
25
+ @data = @data[1..-1]
26
+ @bit_index = 0
27
+ else
28
+ @bit_index += 1
29
+ end
30
+ res
31
+ end
32
+ def read_int(n_bits)
33
+ res = 0
34
+ (0..(n_bits - 1)).each { |i| res += (read_bit ? 1 : 0) << i }
35
+ res
36
+ end
37
+ end
38
+ class ZipDecompressor
39
+ def decode_symbol(tree, file_bitstream)
40
+ bits = []
41
+ while true
42
+ bit = file_bitstream.read_bit
43
+ bits += [bit]
44
+ key = bits.map { |x| x ? "1" : "0" }.join("")
45
+ return tree[key] if tree[key]
46
+ end
47
+ end
48
+ def inflate_block_data(litlen_tree, dist_tree, file_data, file_bitstream)
49
+ data = file_data
50
+ loop do
51
+ sym = decode_symbol(litlen_tree, file_bitstream)
52
+ if sym < 256
53
+ data += sym.chr
54
+ elsif sym == 256
55
+ return data
56
+ else
57
+ sym -= 257
58
+ length = file_bitstream.read_int(LENGTH_EXTRA_BITS[sym]) + LENGTH_BASE[sym]
59
+ dist_sym = decode_symbol(dist_tree, file_bitstream)
60
+ dist = file_bitstream.read_int(DISTANCE_EXTRA_BITS[dist_sym]) + DISTANCE_BASE[dist_sym]
61
+ reference_data = []
62
+ (0..(length - 1)).each {
63
+ char = data[-dist]
64
+ data += char
65
+ reference_data += [char]
66
+ }
67
+ x = reference_data[0..10].join("").codepoints.join(" ")
68
+ end
69
+ end
70
+ data
71
+ end
72
+ def bit_lengths_to_tree(bit_lengths)
73
+ max_bits = bit_lengths.max
74
+ bitlen_counts = (0..max_bits).map { |count| bit_lengths.count { |length| length == count && length != 0 } }
75
+ next_code = [0, 0]
76
+ (2..max_bits).each do |i|
77
+ next_code[i] = ((next_code[i - 1] || 0) + bitlen_counts[i - 1]) << 1
78
+ end
79
+ tree = {}
80
+ bit_lengths.each.with_index do |length, index|
81
+ if length != 0
82
+ tree[next_code[length].to_s(2).rjust(length, "0")] = index
83
+ next_code[length] += 1
84
+ end
85
+ end
86
+ tree
87
+ end
88
+ def decode_uncompressed_block(file_data, file_bitstream)
89
+ file_bitstream.read_int(5)
90
+ length = file_bitstream.read_int(16)
91
+ file_bitstream.read_int(16)
92
+ (0..(length - 1)).map { |x| file_bitstream.read_int(8).chr }.join("")
93
+ end
94
+ def decode_fixed_huffman_compressed_block(file_data, file_bitstream)
95
+ litlen_bit_lengths = [8] * 144 + [9] * (256 - 144) + [7] * (280 - 256) + [8] * (286 - 280)
96
+ litlen_tree = bit_lengths_to_tree(litlen_bit_lengths)
97
+ dist_bit_lengths = [5] * 30
98
+ dist_tree = bit_lengths_to_tree(dist_bit_lengths)
99
+ inflate_block_data(litlen_tree, dist_tree, file_data, file_bitstream)
100
+ end
101
+ def decode_dynamic_huffman_compressed_block(file_data, file_bitstream)
102
+ hlit = file_bitstream.read_int(5) + 257
103
+ hdist = file_bitstream.read_int(5) + 1
104
+ hclen = file_bitstream.read_int(4) + 4
105
+ code_length_bit_lengths = [0] * 19
106
+ (0..(hclen - 1)).each { |len| code_length_bit_lengths[CODE_LENGTH_CODES_ORDER[len]] = file_bitstream.read_int(3) }
107
+ code_length_tree = bit_lengths_to_tree(code_length_bit_lengths)
108
+ bit_lengths = []
109
+ while bit_lengths.count < hlit + hdist
110
+ sym = decode_symbol(code_length_tree, file_bitstream)
111
+ if sym < 16
112
+ bit_lengths += [sym]
113
+ elsif sym == 16
114
+ prev_byte = bit_lengths[-1]
115
+ bit_lengths += [prev_byte] * (file_bitstream.read_int(2) + 3)
116
+ elsif sym == 17
117
+ bit_lengths += [0] * (file_bitstream.read_int(3) + 3)
118
+ elsif sym == 18
119
+ bit_lengths += [0] * (file_bitstream.read_int(7) + 11)
120
+ end
121
+ end
122
+ litlen_tree = bit_lengths_to_tree(bit_lengths[0..(hlit - 1)])
123
+ dist_tree = bit_lengths_to_tree(bit_lengths[hlit..-1])
124
+ inflate_block_data(litlen_tree, dist_tree, file_data, file_bitstream)
125
+ end
126
+ def decode_zipped_file(file_bitstream)
127
+ file_data = ""
128
+ is_last_block = false
129
+ until is_last_block
130
+ is_last_block = file_bitstream.read_bit
131
+ block_type = file_bitstream.read_int(2)
132
+ file_data += if block_type == 0
133
+ decode_uncompressed_block(file_data, file_bitstream)
134
+ elsif block_type == 1
135
+ decode_fixed_huffman_compressed_block(file_data, file_bitstream)
136
+ else
137
+ decode_dynamic_huffman_compressed_block(file_data, file_bitstream)
138
+ end
139
+ end
140
+ file_data
141
+ end
142
+ end
143
+ module ZipHelpers
144
+ def read_int(file, bytes)
145
+ file.read(bytes).codepoints.each.with_index.reduce(0) { |acc, x| acc + x[0] * (256 ** x[1]) }
146
+ end
147
+ def find_string(file, string)
148
+ search_fifo = ""
149
+ while search_fifo != string
150
+ search_fifo = (search_fifo.length == 4 ? search_fifo[1..-1] : search_fifo) + file.read(1)
151
+ end
152
+ end
153
+ def skip(file, bytes)
154
+ file.read bytes
155
+ end
156
+ end
157
+ class ZipFileItem
158
+ include ZipHelpers
159
+ def initialize(filename, offset)
160
+ @filename = filename
161
+ @offset = offset
162
+ end
163
+ def read_data(zipfile)
164
+ # Skip to compressed length
165
+ skip zipfile, 8
166
+ # Get compressed length
167
+ compressed_size = read_int zipfile, 4
168
+ # Skip to extra data length
169
+ skip zipfile, 6
170
+ # Get extra data length
171
+ extra_data_length = read_int zipfile, 2
172
+ # Skip to file data
173
+ skip zipfile, @filename.length + extra_data_length
174
+ # Read file data
175
+ zipfile.read compressed_size
176
+ end
177
+ def handle_compressed(zipfile)
178
+ data = read_data zipfile
179
+ b = Bitstream.new data
180
+ z = ZipDecompressor.new
181
+ z.decode_zipped_file b
182
+ end
183
+ def handle_uncompressed(zipfile)
184
+ read_data zipfile
185
+ end
186
+ def get_decompressed_data(zipfile)
187
+ zipfile.seek @offset
188
+ # Skip to compression type
189
+ skip zipfile, 8
190
+ # Read compression type
191
+ compression_type = read_int zipfile, 2
192
+ data = if compression_type == 8
193
+ handle_compressed zipfile
194
+ else
195
+ handle_uncompressed zipfile
196
+ end
197
+ end
198
+ end
199
+ class ZipFile
200
+ include ZipHelpers
201
+ private
202
+ def find_eocd(file)
203
+ find_string file, "\x50\x4b\x05\x06"
204
+ end
205
+ def find_central_directory(file)
206
+ find_string file, "\x50\x4b\x01\x02"
207
+ end
208
+ def get_number_of_items(file)
209
+ # Find the EOCD (End Of Central Directory)
210
+ find_eocd file
211
+ # Skip to number of entries (items in zipfile)
212
+ skip file, 6
213
+ # Read number of entries
214
+ num_entries = read_int file, 2
215
+ end
216
+ def get_items_metadata(file)
217
+ File.open(@filename) do |file|
218
+ # Read number of entries
219
+ num_entries = get_number_of_items file
220
+ # Rewind file
221
+ file.seek 0
222
+ @items = Hash[(0..(num_entries - 1)).map {
223
+ # Find central directory record
224
+ find_central_directory file
225
+ # Skip to filename length
226
+ skip file, 24
227
+ # Get filename length
228
+ filename_length = read_int file, 2
229
+ # Skip to file offset
230
+ skip file, 12
231
+ # Get file offset
232
+ file_offset = read_int file, 4
233
+ # Get filename
234
+ filename = file.read(filename_length)
235
+ [filename, ZipFileItem.new(filename, file_offset)]
236
+ }]
237
+ end
238
+ end
239
+ public
240
+ def initialize(filename)
241
+ @filename = filename
242
+ get_items_metadata @filename
243
+ end
244
+ def decompress_file(path)
245
+ File.open(@filename) do |file|
246
+ @items[path].get_decompressed_data file
247
+ end
248
+ end
249
+ def decompress_all_files
250
+ decompressed_files = []
251
+ File.open(@filename) do |file|
252
+ decompressed_files = @items.keys.map do |path|
253
+ {
254
+ path: path,
255
+ data: @items[path].get_decompressed_data(file)
256
+ }
257
+ end
258
+ end
259
+ decompressed_files
260
+ end
261
+ def decompress_all_files_to_disk
262
+ dir_of_zip_file = File.dirname(@filename)
263
+ zipfile_without_extension = File.basename @filename, ".*"
264
+ extract_dir_path = "#{dir_of_zip_file}/#{zipfile_without_extension}"
265
+ Dir.mkdir(extract_dir_path) unless Dir.exist?(extract_dir_path)
266
+ decompress_all_files.each do |decompressed_item|
267
+ path = decompressed_item[:path]
268
+ data = decompressed_item[:data]
269
+ file_extract_path = "#{extract_dir_path}/#{path}"
270
+ File.open(file_extract_path, "w") do |extracted_file|
271
+ extracted_file.write data
272
+ end
273
+ end
274
+ end
275
+ end
276
+ end
@@ -0,0 +1,3 @@
1
+ module PureRubyZip
2
+ VERSION = "0.1.4"
3
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pure_ruby_zip
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors:
7
+ - Edward Halferty
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-03-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.17'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.17'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: |-
42
+ A pure-Ruby ZIP file decompressor/compressor.
43
+ VERY inefficient, possibly buggy.
44
+ Mostly for entertainment value.
45
+ email:
46
+ - me@edwardhalferty.com
47
+ executables:
48
+ - pure-ruby-zip
49
+ extensions: []
50
+ extra_rdoc_files: []
51
+ files:
52
+ - bin/pure-ruby-zip
53
+ - lib/pure_ruby_zip.rb
54
+ - lib/pure_ruby_zip/version.rb
55
+ homepage: https://github.com/ehalferty/pure_ruby_zip
56
+ licenses:
57
+ - MIT
58
+ metadata: {}
59
+ post_install_message:
60
+ rdoc_options: []
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubygems_version: 3.0.8
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: A pure-Ruby ZIP file decompressor/compressor
78
+ test_files: []