excavate 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 533124c47fcd3ac27122c10f3fccf7c585f74684dccefd5988f812a2e2804392
|
|
4
|
+
data.tar.gz: 1d70e420f988127059dc364cd26c7e7c3e34b5bf618a5e7b5d1e5b28005d2d53
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 64ebd2cc16fee28e756db9cbacc317b3a70dd12e944e51c3c0f6994541fd674f0b89d8e06fff141167e5b3ae839477278bc958243c1dd77283f36ece4c63d1a6
|
|
7
|
+
data.tar.gz: 015bb8fb81644b1dda590157e9184b624f62415edbc5b94fafcad8df6090a75d8ce02df218c8f54726b904a18afe7bba8a7d78d2dd5e597e40341585bda76619
|
data/lib/excavate/archive.rb
CHANGED
|
@@ -1,19 +1,5 @@
|
|
|
1
1
|
module Excavate
|
|
2
2
|
class Archive
|
|
3
|
-
INVALID_MEMORY_MESSAGE =
|
|
4
|
-
"invalid memory read at address=0x0000000000000000".freeze
|
|
5
|
-
|
|
6
|
-
TYPES = { "cab" => Extractors::CabExtractor,
|
|
7
|
-
"cpio" => Extractors::CpioExtractor,
|
|
8
|
-
"exe" => Extractors::SevenZipExtractor,
|
|
9
|
-
"gz" => Extractors::GzipExtractor,
|
|
10
|
-
"msi" => Extractors::OleExtractor,
|
|
11
|
-
"pkg" => Extractors::XarExtractor,
|
|
12
|
-
"rpm" => Extractors::RpmExtractor,
|
|
13
|
-
"tar" => Extractors::TarExtractor,
|
|
14
|
-
"xz" => Extractors::XzExtractor,
|
|
15
|
-
"zip" => Extractors::ZipExtractor }.freeze
|
|
16
|
-
|
|
17
3
|
def initialize(archive)
|
|
18
4
|
@archive = archive
|
|
19
5
|
end
|
|
@@ -193,27 +179,26 @@ module Excavate
|
|
|
193
179
|
FileUtils.cp(archive, target)
|
|
194
180
|
end
|
|
195
181
|
|
|
196
|
-
def may_be_nested_cab?(extension, message)
|
|
197
|
-
extension == "exe" &&
|
|
198
|
-
(message.start_with?("Invalid file format",
|
|
199
|
-
"Unrecognized archive format") ||
|
|
200
|
-
message.include?("Invalid .7z signature"))
|
|
201
|
-
end
|
|
202
|
-
|
|
203
182
|
def extract_once(archive, target)
|
|
204
|
-
|
|
205
|
-
extractor_class =
|
|
183
|
+
type = FileMagic.detect(archive)
|
|
184
|
+
extractor_class = Extractors::Extractor.for_magic_type(type)
|
|
206
185
|
unless extractor_class
|
|
207
186
|
raise(UnknownArchiveError, "Could not unarchive `#{archive}`.")
|
|
208
187
|
end
|
|
209
188
|
|
|
210
189
|
extractor_class.new(archive).extract(target)
|
|
211
190
|
rescue StandardError => e
|
|
212
|
-
raise unless may_be_nested_cab?(
|
|
191
|
+
raise unless type == :exe && may_be_nested_cab?(e.message)
|
|
213
192
|
|
|
214
193
|
Extractors::CabExtractor.new(archive).extract(target)
|
|
215
194
|
end
|
|
216
195
|
|
|
196
|
+
def may_be_nested_cab?(message)
|
|
197
|
+
message.start_with?("Invalid file format",
|
|
198
|
+
"Unrecognized archive format") ||
|
|
199
|
+
message.include?("Invalid .7z signature")
|
|
200
|
+
end
|
|
201
|
+
|
|
217
202
|
def extract_and_replace(archive)
|
|
218
203
|
target = Dir.mktmpdir
|
|
219
204
|
extract_recursively(archive, target)
|
|
@@ -222,8 +207,8 @@ module Excavate
|
|
|
222
207
|
# During recursive extraction of nested archives, silently skip
|
|
223
208
|
# any that fail (e.g. .msi files that aren't real OLE, .cab files
|
|
224
209
|
# with incompatible format, .exe files with unsupported compression).
|
|
225
|
-
# Only re-raise
|
|
226
|
-
raise unless
|
|
210
|
+
# Only re-raise if the file is not a recognized archive format.
|
|
211
|
+
raise unless File.exist?(archive) && archive?(archive)
|
|
227
212
|
ensure
|
|
228
213
|
FileUtils.rm_rf(target)
|
|
229
214
|
end
|
|
@@ -278,22 +263,6 @@ module Excavate
|
|
|
278
263
|
end
|
|
279
264
|
end
|
|
280
265
|
|
|
281
|
-
def normalized_extension(file)
|
|
282
|
-
fetch_extension(file).downcase
|
|
283
|
-
end
|
|
284
|
-
|
|
285
|
-
def fetch_extension(file)
|
|
286
|
-
File.extname(filename(file)).sub(/^\./, "")
|
|
287
|
-
end
|
|
288
|
-
|
|
289
|
-
def filename(file)
|
|
290
|
-
if file.respond_to?(:original_filename)
|
|
291
|
-
file.original_filename
|
|
292
|
-
else
|
|
293
|
-
File.basename(file)
|
|
294
|
-
end
|
|
295
|
-
end
|
|
296
|
-
|
|
297
266
|
def all_files_in(dir)
|
|
298
267
|
Dir.glob(File.join(dir, "**", "*"))
|
|
299
268
|
end
|
|
@@ -301,10 +270,8 @@ module Excavate
|
|
|
301
270
|
def archive?(file)
|
|
302
271
|
return false unless File.file?(file)
|
|
303
272
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
TYPES.key?(ext)
|
|
273
|
+
type = FileMagic.detect(file)
|
|
274
|
+
!type.nil? && !Extractors::Extractor.for_magic_type(type).nil?
|
|
308
275
|
end
|
|
309
276
|
end
|
|
310
277
|
end
|
|
@@ -1,6 +1,25 @@
|
|
|
1
1
|
module Excavate
|
|
2
2
|
module Extractors
|
|
3
3
|
class Extractor
|
|
4
|
+
MAGIC_MAP = {
|
|
5
|
+
cab: "CabExtractor",
|
|
6
|
+
cpio: "CpioExtractor",
|
|
7
|
+
exe: "SevenZipExtractor",
|
|
8
|
+
gzip: "GzipExtractor",
|
|
9
|
+
ole: "OleExtractor",
|
|
10
|
+
rpm: "RpmExtractor",
|
|
11
|
+
seven_zip: "SevenZipExtractor",
|
|
12
|
+
tar: "TarExtractor",
|
|
13
|
+
xar: "XarExtractor",
|
|
14
|
+
xz: "XzExtractor",
|
|
15
|
+
zip: "ZipExtractor",
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
18
|
+
def self.for_magic_type(type)
|
|
19
|
+
name = MAGIC_MAP[type]
|
|
20
|
+
Extractors.const_get(name) if name
|
|
21
|
+
end
|
|
22
|
+
|
|
4
23
|
def initialize(archive)
|
|
5
24
|
@archive = archive
|
|
6
25
|
end
|
|
@@ -8,6 +27,31 @@ module Excavate
|
|
|
8
27
|
def extract(_target)
|
|
9
28
|
raise NotImplementedError.new("You must implement this method")
|
|
10
29
|
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
# Detect inner format of decompressed data and extract
|
|
34
|
+
# or write raw output.
|
|
35
|
+
# Shared by GzipExtractor and XzExtractor.
|
|
36
|
+
def extract_inner(data, target)
|
|
37
|
+
inner_type = FileMagic.detect_bytes(data)
|
|
38
|
+
extractor_class = Extractor.for_magic_type(inner_type) if inner_type
|
|
39
|
+
|
|
40
|
+
if extractor_class
|
|
41
|
+
temp = File.join(target, ".temp_#{Time.now.to_i}_#{rand(1000)}")
|
|
42
|
+
File.binwrite(temp, data)
|
|
43
|
+
extractor_class.new(temp).extract(target)
|
|
44
|
+
else
|
|
45
|
+
write_raw_output(data, target)
|
|
46
|
+
end
|
|
47
|
+
ensure
|
|
48
|
+
FileUtils.rm_f(temp) if temp
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def write_raw_output(data, target)
|
|
52
|
+
basename = File.basename(@archive, ".*")
|
|
53
|
+
File.binwrite(File.join(target, basename), data)
|
|
54
|
+
end
|
|
11
55
|
end
|
|
12
56
|
end
|
|
13
57
|
end
|
|
@@ -6,12 +6,8 @@ module Excavate
|
|
|
6
6
|
module Extractors
|
|
7
7
|
class GzipExtractor < Extractor
|
|
8
8
|
def extract(target)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
Zlib::GzipReader.open(@archive) do |gz|
|
|
13
|
-
File.write(output_path, gz.read, mode: "wb")
|
|
14
|
-
end
|
|
9
|
+
data = Zlib::GzipReader.open(@archive, &:read)
|
|
10
|
+
extract_inner(data, target)
|
|
15
11
|
end
|
|
16
12
|
end
|
|
17
13
|
end
|
|
@@ -1,70 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "omnizip"
|
|
4
|
-
require "zlib"
|
|
5
4
|
|
|
6
5
|
module Excavate
|
|
7
6
|
module Extractors
|
|
8
|
-
# Extractor for XZ compressed archives (both .xz and .tar.xz formats)
|
|
9
|
-
#
|
|
10
|
-
# This extractor handles:
|
|
11
|
-
# - Pure XZ compressed files (.xz)
|
|
12
|
-
# - Compound TAR+XZ archives (.tar.xz)
|
|
13
|
-
#
|
|
14
|
-
# Uses Omnizip for XZ decompression.
|
|
15
7
|
class XzExtractor < Extractor
|
|
16
8
|
def extract(target)
|
|
17
|
-
if tar_xz?
|
|
18
|
-
extract_tar_xz(target)
|
|
19
|
-
else
|
|
20
|
-
extract_pure_xz(target)
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
private
|
|
25
|
-
|
|
26
|
-
def tar_xz?
|
|
27
|
-
@archive.end_with?(".tar.xz", ".txz")
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def extract_tar_xz(target)
|
|
31
|
-
data = Omnizip::Formats::Xz.decompress(@archive)
|
|
32
|
-
data = strip_compression(data)
|
|
33
|
-
validate_tar!(data)
|
|
34
|
-
|
|
35
|
-
# Write tar file and extract
|
|
36
|
-
temp_tar = File.join(target, ".temp_#{Time.now.to_i}_#{rand(1000)}.tar")
|
|
37
|
-
File.binwrite(temp_tar, data)
|
|
38
|
-
|
|
39
|
-
TarExtractor.new(temp_tar).extract(target)
|
|
40
|
-
ensure
|
|
41
|
-
File.delete(temp_tar) if temp_tar && File.exist?(temp_tar)
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def strip_compression(data)
|
|
45
|
-
if FileMagic.detect_bytes(data) == :gzip
|
|
46
|
-
return Zlib::GzipReader.new(StringIO.new(data)).read
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
data
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def validate_tar!(data)
|
|
53
|
-
inner_type = FileMagic.detect_bytes(data)
|
|
54
|
-
return if inner_type == :tar
|
|
55
|
-
|
|
56
|
-
inner_type ||= "unknown format"
|
|
57
|
-
|
|
58
|
-
raise UnknownArchiveError,
|
|
59
|
-
"Expected tar inside #{@archive}, got #{inner_type}"
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def extract_pure_xz(target)
|
|
63
|
-
# Decompress XZ
|
|
64
9
|
data = Omnizip::Formats::Xz.decompress(@archive)
|
|
65
|
-
|
|
66
|
-
output_path = File.join(target, basename)
|
|
67
|
-
File.binwrite(output_path, data)
|
|
10
|
+
extract_inner(data, target)
|
|
68
11
|
end
|
|
69
12
|
end
|
|
70
13
|
end
|
data/lib/excavate/file_magic.rb
CHANGED
|
@@ -8,6 +8,15 @@ module Excavate
|
|
|
8
8
|
[0, "\xFD7zXZ\x00".b, :xz],
|
|
9
9
|
[0, "\x1F\x8B".b, :gzip],
|
|
10
10
|
[257, "ustar".b, :tar],
|
|
11
|
+
[0, "7z\xBC\xAF\x27\x1C".b, :seven_zip],
|
|
12
|
+
[0, "PK\x03\x04".b, :zip],
|
|
13
|
+
[0, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1".b, :ole],
|
|
14
|
+
[0, "xar!".b, :xar],
|
|
15
|
+
[0, "\xED\xAB\xEE\xDB".b, :rpm],
|
|
16
|
+
[0, "070707".b, :cpio],
|
|
17
|
+
[0, "070701".b, :cpio],
|
|
18
|
+
[0, "070702".b, :cpio],
|
|
19
|
+
[0, "MZ".b, :exe],
|
|
11
20
|
].freeze
|
|
12
21
|
|
|
13
22
|
MAX_READ = SIGNATURES.map { |o, m, _| o + m.bytesize }.max
|
data/lib/excavate/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: excavate
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03
|
|
11
|
+
date: 2026-04-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: cabriolet
|