excavate 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6360dbc5e452e6a0984a8bb8394051fa74da9d33700afd1ca9b36dbd73f9eeeb
4
- data.tar.gz: 4d1f8e805be65dd56c6194915a43433561ba29572b016d6dc3d613617140957f
3
+ metadata.gz: 533124c47fcd3ac27122c10f3fccf7c585f74684dccefd5988f812a2e2804392
4
+ data.tar.gz: 1d70e420f988127059dc364cd26c7e7c3e34b5bf618a5e7b5d1e5b28005d2d53
5
5
  SHA512:
6
- metadata.gz: dd1b31fabaf5c27c182bfec752255f918acf63763630bd7b6e39850f8296219b0ff182458efb3cfc5b3fe04f448b657a5e8b197330d532f1ccceed22e78a7ba3
7
- data.tar.gz: f8a7aa72c8486144b9dcd09c5ecc05fa4c0acefbcc7482dca5a09cb1725d0d84438125438f1020dbdc478e37dce02486d1c4fdab7d37dc62e033a57f0094bd6b
6
+ metadata.gz: 64ebd2cc16fee28e756db9cbacc317b3a70dd12e944e51c3c0f6994541fd674f0b89d8e06fff141167e5b3ae839477278bc958243c1dd77283f36ece4c63d1a6
7
+ data.tar.gz: 015bb8fb81644b1dda590157e9184b624f62415edbc5b94fafcad8df6090a75d8ce02df218c8f54726b904a18afe7bba8a7d78d2dd5e597e40341585bda76619
@@ -1,19 +1,5 @@
1
1
  module Excavate
2
2
  class Archive
3
- INVALID_MEMORY_MESSAGE =
4
- "invalid memory read at address=0x0000000000000000".freeze
5
-
6
- TYPES = { "cab" => Extractors::CabExtractor,
7
- "cpio" => Extractors::CpioExtractor,
8
- "exe" => Extractors::SevenZipExtractor,
9
- "gz" => Extractors::GzipExtractor,
10
- "msi" => Extractors::OleExtractor,
11
- "pkg" => Extractors::XarExtractor,
12
- "rpm" => Extractors::RpmExtractor,
13
- "tar" => Extractors::TarExtractor,
14
- "xz" => Extractors::XzExtractor,
15
- "zip" => Extractors::ZipExtractor }.freeze
16
-
17
3
  def initialize(archive)
18
4
  @archive = archive
19
5
  end
@@ -193,27 +179,26 @@ module Excavate
193
179
  FileUtils.cp(archive, target)
194
180
  end
195
181
 
196
- def may_be_nested_cab?(extension, message)
197
- extension == "exe" &&
198
- (message.start_with?("Invalid file format",
199
- "Unrecognized archive format") ||
200
- message.include?("Invalid .7z signature"))
201
- end
202
-
203
182
  def extract_once(archive, target)
204
- extension = normalized_extension(archive)
205
- extractor_class = TYPES[extension]
183
+ type = FileMagic.detect(archive)
184
+ extractor_class = Extractors::Extractor.for_magic_type(type)
206
185
  unless extractor_class
207
186
  raise(UnknownArchiveError, "Could not unarchive `#{archive}`.")
208
187
  end
209
188
 
210
189
  extractor_class.new(archive).extract(target)
211
190
  rescue StandardError => e
212
- raise unless may_be_nested_cab?(extension, e.message)
191
+ raise unless type == :exe && may_be_nested_cab?(e.message)
213
192
 
214
193
  Extractors::CabExtractor.new(archive).extract(target)
215
194
  end
216
195
 
196
+ def may_be_nested_cab?(message)
197
+ message.start_with?("Invalid file format",
198
+ "Unrecognized archive format") ||
199
+ message.include?("Invalid .7z signature")
200
+ end
201
+
217
202
  def extract_and_replace(archive)
218
203
  target = Dir.mktmpdir
219
204
  extract_recursively(archive, target)
@@ -222,8 +207,8 @@ module Excavate
222
207
  # During recursive extraction of nested archives, silently skip
223
208
  # any that fail (e.g. .msi files that aren't real OLE, .cab files
224
209
  # with incompatible format, .exe files with unsupported compression).
225
- # Only re-raise for file types we don't recognize as archives.
226
- raise unless TYPES.key?(normalized_extension(archive))
210
+ # Only re-raise if the file is not a recognized archive format.
211
+ raise unless File.exist?(archive) && archive?(archive)
227
212
  ensure
228
213
  FileUtils.rm_rf(target)
229
214
  end
@@ -278,22 +263,6 @@ module Excavate
278
263
  end
279
264
  end
280
265
 
281
- def normalized_extension(file)
282
- fetch_extension(file).downcase
283
- end
284
-
285
- def fetch_extension(file)
286
- File.extname(filename(file)).sub(/^\./, "")
287
- end
288
-
289
- def filename(file)
290
- if file.respond_to?(:original_filename)
291
- file.original_filename
292
- else
293
- File.basename(file)
294
- end
295
- end
296
-
297
266
  def all_files_in(dir)
298
267
  Dir.glob(File.join(dir, "**", "*"))
299
268
  end
@@ -301,10 +270,8 @@ module Excavate
301
270
  def archive?(file)
302
271
  return false unless File.file?(file)
303
272
 
304
- ext = normalized_extension(file)
305
- return false if ext == "gz" && FileMagic.detect(file) != :gzip
306
-
307
- TYPES.key?(ext)
273
+ type = FileMagic.detect(file)
274
+ !type.nil? && !Extractors::Extractor.for_magic_type(type).nil?
308
275
  end
309
276
  end
310
277
  end
@@ -1,6 +1,25 @@
1
1
  module Excavate
2
2
  module Extractors
3
3
  class Extractor
4
+ MAGIC_MAP = {
5
+ cab: "CabExtractor",
6
+ cpio: "CpioExtractor",
7
+ exe: "SevenZipExtractor",
8
+ gzip: "GzipExtractor",
9
+ ole: "OleExtractor",
10
+ rpm: "RpmExtractor",
11
+ seven_zip: "SevenZipExtractor",
12
+ tar: "TarExtractor",
13
+ xar: "XarExtractor",
14
+ xz: "XzExtractor",
15
+ zip: "ZipExtractor",
16
+ }.freeze
17
+
18
+ def self.for_magic_type(type)
19
+ name = MAGIC_MAP[type]
20
+ Extractors.const_get(name) if name
21
+ end
22
+
4
23
  def initialize(archive)
5
24
  @archive = archive
6
25
  end
@@ -8,6 +27,31 @@ module Excavate
8
27
  def extract(_target)
9
28
  raise NotImplementedError.new("You must implement this method")
10
29
  end
30
+
31
+ private
32
+
33
+ # Detect inner format of decompressed data and extract
34
+ # or write raw output.
35
+ # Shared by GzipExtractor and XzExtractor.
36
+ def extract_inner(data, target)
37
+ inner_type = FileMagic.detect_bytes(data)
38
+ extractor_class = Extractor.for_magic_type(inner_type) if inner_type
39
+
40
+ if extractor_class
41
+ temp = File.join(target, ".temp_#{Time.now.to_i}_#{rand(1000)}")
42
+ File.binwrite(temp, data)
43
+ extractor_class.new(temp).extract(target)
44
+ else
45
+ write_raw_output(data, target)
46
+ end
47
+ ensure
48
+ FileUtils.rm_f(temp) if temp
49
+ end
50
+
51
+ def write_raw_output(data, target)
52
+ basename = File.basename(@archive, ".*")
53
+ File.binwrite(File.join(target, basename), data)
54
+ end
11
55
  end
12
56
  end
13
57
  end
@@ -6,12 +6,8 @@ module Excavate
6
6
  module Extractors
7
7
  class GzipExtractor < Extractor
8
8
  def extract(target)
9
- basename = File.basename(@archive, ".*")
10
- output_path = File.join(target, basename)
11
-
12
- Zlib::GzipReader.open(@archive) do |gz|
13
- File.write(output_path, gz.read, mode: "wb")
14
- end
9
+ data = Zlib::GzipReader.open(@archive, &:read)
10
+ extract_inner(data, target)
15
11
  end
16
12
  end
17
13
  end
@@ -1,70 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "omnizip"
4
- require "zlib"
5
4
 
6
5
  module Excavate
7
6
  module Extractors
8
- # Extractor for XZ compressed archives (both .xz and .tar.xz formats)
9
- #
10
- # This extractor handles:
11
- # - Pure XZ compressed files (.xz)
12
- # - Compound TAR+XZ archives (.tar.xz)
13
- #
14
- # Uses Omnizip for XZ decompression.
15
7
  class XzExtractor < Extractor
16
8
  def extract(target)
17
- if tar_xz?
18
- extract_tar_xz(target)
19
- else
20
- extract_pure_xz(target)
21
- end
22
- end
23
-
24
- private
25
-
26
- def tar_xz?
27
- @archive.end_with?(".tar.xz", ".txz")
28
- end
29
-
30
- def extract_tar_xz(target)
31
- data = Omnizip::Formats::Xz.decompress(@archive)
32
- data = strip_compression(data)
33
- validate_tar!(data)
34
-
35
- # Write tar file and extract
36
- temp_tar = File.join(target, ".temp_#{Time.now.to_i}_#{rand(1000)}.tar")
37
- File.binwrite(temp_tar, data)
38
-
39
- TarExtractor.new(temp_tar).extract(target)
40
- ensure
41
- File.delete(temp_tar) if temp_tar && File.exist?(temp_tar)
42
- end
43
-
44
- def strip_compression(data)
45
- if FileMagic.detect_bytes(data) == :gzip
46
- return Zlib::GzipReader.new(StringIO.new(data)).read
47
- end
48
-
49
- data
50
- end
51
-
52
- def validate_tar!(data)
53
- inner_type = FileMagic.detect_bytes(data)
54
- return if inner_type == :tar
55
-
56
- inner_type ||= "unknown format"
57
-
58
- raise UnknownArchiveError,
59
- "Expected tar inside #{@archive}, got #{inner_type}"
60
- end
61
-
62
- def extract_pure_xz(target)
63
- # Decompress XZ
64
9
  data = Omnizip::Formats::Xz.decompress(@archive)
65
- basename = File.basename(@archive, ".*")
66
- output_path = File.join(target, basename)
67
- File.binwrite(output_path, data)
10
+ extract_inner(data, target)
68
11
  end
69
12
  end
70
13
  end
@@ -8,6 +8,15 @@ module Excavate
8
8
  [0, "\xFD7zXZ\x00".b, :xz],
9
9
  [0, "\x1F\x8B".b, :gzip],
10
10
  [257, "ustar".b, :tar],
11
+ [0, "7z\xBC\xAF\x27\x1C".b, :seven_zip],
12
+ [0, "PK\x03\x04".b, :zip],
13
+ [0, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1".b, :ole],
14
+ [0, "xar!".b, :xar],
15
+ [0, "\xED\xAB\xEE\xDB".b, :rpm],
16
+ [0, "070707".b, :cpio],
17
+ [0, "070701".b, :cpio],
18
+ [0, "070702".b, :cpio],
19
+ [0, "MZ".b, :exe],
11
20
  ].freeze
12
21
 
13
22
  MAX_READ = SIGNATURES.map { |o, m, _| o + m.bytesize }.max
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Excavate
4
- VERSION = "1.0.3"
4
+ VERSION = "1.1.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: excavate
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-24 00:00:00.000000000 Z
11
+ date: 2026-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cabriolet