excavate 0.3.9 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 56401e5430415802c560b8f23c9b83da63c363e41b6fe0cf9237542a3903959b
4
- data.tar.gz: 56d280616e5ec8dc1d1833a3453df61ab3cfd99bccea7bb6985f47c58b1a5aa6
3
+ metadata.gz: 7adbf7839cf5917692766601ec8b44ec32794e860a658ef9cdb9594031cc904e
4
+ data.tar.gz: 3fa93a928c9fd291f757c740c320e39f9431af5fc718352c5671f5dc7b4c9721
5
5
  SHA512:
6
- metadata.gz: 316e2bcb6575a02d21ea2463074b242c5ee8a0c819851fbe161d0df8c27267fcfaa4bc84411a8ee5a5a5158f4e03f0bd3cf48b92a9de89be2781ea8c03100a6f
7
- data.tar.gz: 3193c2e4c10eee70a64f3944e5aa274bb76912a58e32cbf9ed971f3bd44b9106725e37371fd658b02d2273ff43ca4bbbca0f55f6b69e8cf71393e2d0297c8287
6
+ metadata.gz: 7e40f307222b934913b71e731e94df4488d2fd668895a3ded5c010c592791666655a44330c74c10f66c2be892f975341afec98c9961bcd6f6a0f4448680b5147
7
+ data.tar.gz: d7774a97313fffd8f773f8df098c564edb8bce53c8b96b47b1669504729517c35fb27f7627cd161733068b9c0ce7b1183c6dd23516ee5984089adcabf89c14cc
@@ -2,6 +2,8 @@ name: release
2
2
 
3
3
  permissions:
4
4
  contents: write
5
+ packages: write
6
+ id-token: write
5
7
 
6
8
  on:
7
9
  workflow_dispatch:
data/.rubocop.yml CHANGED
@@ -6,6 +6,7 @@ inherit_from:
6
6
 
7
7
  # local repo-specific modifications
8
8
  AllCops:
9
+ NewCops: disable
9
10
  Exclude:
10
11
  - 'lib/excavate/extractors/cpio/cpio_old_format.rb'
11
12
  - 'vendor/**/*'
data/Gemfile CHANGED
@@ -5,6 +5,7 @@ source "https://rubygems.org"
5
5
  gemspec
6
6
 
7
7
  gem "openssl", "~> 3.0"
8
+ gem "rake"
8
9
  gem "rspec"
9
10
  gem "rubocop"
10
11
  gem "rubocop-performance"
data/excavate.gemspec CHANGED
@@ -31,15 +31,8 @@ Gem::Specification.new do |spec|
31
31
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ["lib"]
33
33
 
34
- spec.add_dependency "arr-pm", "~> 0.0"
35
- # spec.add_dependency "bundler", "~> 2", ">= 2.3.24"
36
- # Workaround for https://github.com/metanorma/ruby-libmspack/issues/2
37
- spec.add_dependency "ffi-compiler2", ">= 2.2.2"
38
- spec.add_dependency "ffi-libarchive-binary", "~> 0.4", ">= 0.4.2"
39
- spec.add_dependency "libmspack", "~> 0.1"
40
- spec.add_dependency "ruby-ole", "~> 1.0"
41
- spec.add_dependency "rubyzip", "~> 2.3"
42
- spec.add_dependency "seven-zip", "~> 1.4"
34
+ spec.add_dependency "cabriolet", "~> 0.2.2"
35
+ spec.add_dependency "omnizip", "~> 0.3.8"
43
36
  spec.add_dependency "thor", "~> 1.0"
44
37
 
45
38
  spec.metadata["rubygems_mfa_required"] = "false"
@@ -25,7 +25,7 @@ module Excavate
25
25
 
26
26
  all_files_in(target).map(&block)
27
27
  ensure
28
- FileUtils.rm_rf(target)
28
+ windows_safe_rm_rf(target)
29
29
  end
30
30
 
31
31
  def extract(target = nil,
@@ -187,8 +187,9 @@ module Excavate
187
187
 
188
188
  def may_be_nested_cab?(extension, message)
189
189
  extension == "exe" &&
190
- message.start_with?("Invalid file format",
191
- "Unrecognized archive format")
190
+ (message.start_with?("Invalid file format",
191
+ "Unrecognized archive format") ||
192
+ message.include?("Invalid .7z signature"))
192
193
  end
193
194
 
194
195
  def extract_once(archive, target)
@@ -208,13 +209,60 @@ module Excavate
208
209
  def extract_and_replace(archive)
209
210
  target = Dir.mktmpdir
210
211
  extract_recursively(archive, target)
212
+ replace_archive_with_contents(archive, target)
213
+ rescue StandardError
214
+ FileUtils.rm_rf(target)
215
+ raise unless normalized_extension(archive) == "exe"
216
+ end
211
217
 
212
- FileUtils.rm(archive)
218
+ def replace_archive_with_contents(archive, target)
219
+ windows_safe_rm(archive)
213
220
  FileUtils.mv(target, archive)
214
- rescue FFI::NullPointerError => e
215
- FileUtils.rmdir(target)
216
- raise unless normalized_extension(archive) == "exe" &&
217
- e.message.start_with?(INVALID_MEMORY_MESSAGE)
221
+ rescue Errno::EACCES
222
+ # Windows: file is locked. Copy extracted contents to archive location
223
+ # and keep both the archive and extracted files
224
+ target_dir = File.dirname(archive)
225
+ # Copy all extracted files to the target directory
226
+ Dir.glob(File.join(target, "**", "*")).each do |src|
227
+ next unless File.file?(src)
228
+
229
+ dest = File.join(target_dir, File.basename(src))
230
+ FileUtils.cp(src, dest) unless File.exist?(dest)
231
+ end
232
+ # Leave the original locked archive in place
233
+ end
234
+
235
+ # Windows sometimes holds file locks briefly after operations.
236
+ # This method retries file deletion with a small delay.
237
+ def windows_safe_rm(path, max_retries: 5)
238
+ attempts = 0
239
+ begin
240
+ FileUtils.rm(path)
241
+ rescue Errno::EACCES => e
242
+ attempts += 1
243
+ if attempts < max_retries
244
+ sleep(0.2)
245
+ retry
246
+ else
247
+ raise e
248
+ end
249
+ end
250
+ end
251
+
252
+ # Windows-safe recursive removal
253
+ def windows_safe_rm_rf(path, max_retries: 5)
254
+ attempts = 0
255
+ begin
256
+ FileUtils.rm_rf(path)
257
+ rescue Errno::EACCES, Errno::ENOTEMPTY => e
258
+ attempts += 1
259
+ if attempts < max_retries
260
+ sleep(0.2)
261
+ retry
262
+ else
263
+ raise e
264
+ end
265
+ end
218
266
  end
219
267
 
220
268
  def normalized_extension(file)
@@ -1,32 +1,17 @@
1
- require "libmspack"
1
+ # frozen_string_literal: true
2
+
3
+ require "cabriolet"
2
4
 
3
5
  module Excavate
4
6
  module Extractors
5
7
  class CabExtractor < Extractor
6
8
  def extract(target)
7
- open_cab(@archive) do |decompressor, cab|
8
- file = cab.files
9
-
10
- while file
11
- path = File.join(target, file.filename)
12
- decompressor.extract(file, path)
13
- file = file.next
14
- end
15
- end
16
- end
17
-
18
- private
19
-
20
- def open_cab(archive)
21
- decompressor = LibMsPack::CabDecompressor.new
22
- cab = Utils.silence_stream($stderr) do
23
- decompressor.search(archive)
24
- end
25
-
26
- yield decompressor, cab
9
+ decompressor = Cabriolet::CAB::Decompressor.new
10
+ decompressor.salvage = true # Enable salvage mode for compatibility
27
11
 
28
- decompressor.close(cab)
29
- decompressor.destroy
12
+ # Try to find embedded CAB first (for self-extracting archives)
13
+ cabinet = decompressor.search(@archive) || decompressor.open(@archive)
14
+ decompressor.extract_all(cabinet, target, salvage: true)
30
15
  end
31
16
  end
32
17
  end
@@ -1,53 +1,14 @@
1
- require_relative "cpio/cpio"
2
- require_relative "cpio/cpio_old_format"
1
+ # frozen_string_literal: true
2
+
3
+ require "omnizip/formats/cpio"
3
4
 
4
5
  module Excavate
5
6
  module Extractors
6
7
  class CpioExtractor < Extractor
7
8
  def extract(target)
8
- extract_cpio_new_format(target)
9
- rescue RuntimeError => e
10
- raise unless e.message.start_with?("Invalid magic")
11
-
12
- extract_cpio_old_format(target)
13
- end
14
-
15
- private
16
-
17
- def extract_cpio_inner_new(entry, file, target)
18
- path = File.join(target, entry.name)
19
- if entry.directory?
20
- FileUtils.mkdir_p(path)
21
- else
22
- FileUtils.mkdir_p(File.dirname(path))
23
- File.write(path, file.read, mode: "wb")
24
- end
25
- end
26
-
27
- def extract_cpio_new_format(target)
28
- File.open(@archive, "rb") do |archive_file|
29
- CPIO::ASCIIReader.new(archive_file).each do |entry, file|
30
- extract_cpio_inner_new(entry, file, target)
31
- end
32
- end
33
- end
34
-
35
- def extract_cpio_inner_old(entry, target)
36
- path = File.expand_path(entry.filename, target)
37
- if entry.directory?
38
- FileUtils.mkdir_p(path)
39
- else
40
- FileUtils.mkdir_p(File.dirname(path))
41
- File.write(path, entry.data, mode: "wb")
42
- end
43
- end
44
-
45
- def extract_cpio_old_format(target)
46
- File.open(@archive, "rb") do |archive_file|
47
- CPIO::ArchiveReader.new(archive_file).each_entry do |entry|
48
- extract_cpio_inner_old(entry, target)
49
- end
50
- end
9
+ reader = Omnizip::Formats::Cpio::Reader.new(@archive)
10
+ reader.open
11
+ reader.extract_all(target)
51
12
  end
52
13
  end
53
14
  end
@@ -1,11 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+
1
5
  module Excavate
2
6
  module Extractors
3
7
  class GzipExtractor < Extractor
4
8
  def extract(target)
9
+ basename = File.basename(@archive, ".*")
10
+ output_path = File.join(target, basename)
11
+
5
12
  Zlib::GzipReader.open(@archive) do |gz|
6
- basename = File.basename(@archive, ".*")
7
- path = File.join(target, basename)
8
- File.write(path, gz.read, mode: "wb")
13
+ File.write(output_path, gz.read, mode: "wb")
9
14
  end
10
15
  end
11
16
  end
@@ -1,7 +1,14 @@
1
- require "ole/storage"
1
+ # frozen_string_literal: true
2
+
3
+ require "omnizip"
4
+ require "omnizip/formats/ole"
5
+ require_relative "../file_magic"
2
6
 
3
7
  module Excavate
4
8
  module Extractors
9
+ # Extractor for OLE compound documents (MSI, DOC, XLS, PPT, etc.)
10
+ #
11
+ # Uses Omnizip's OLE format support for extraction.
5
12
  class OleExtractor < Extractor
6
13
  def extract(target)
7
14
  do_extract(target)
@@ -13,20 +20,18 @@ module Excavate
13
20
  def do_extract(target)
14
21
  reset_filename_lookup
15
22
 
16
- Ole::Storage.open(@archive) do |ole|
17
- children(ole).each do |file|
18
- next if ole.file.directory?(file)
19
-
20
- filename = prepare_filename(file)
21
- path = File.join(target, filename)
22
- content = ole.file.read(file)
23
- File.write(path, content, mode: "wb")
23
+ Omnizip::Formats::Ole.open(@archive) do |ole|
24
+ children(ole).each do |entry|
25
+ path = File.join(target, prepare_filename(entry))
26
+ FileUtils.mkdir_p(File.dirname(path))
27
+ content = ole.read(entry)
28
+ File.write(path, content, mode: "wb") if content
24
29
  end
25
30
  end
26
31
  end
27
32
 
28
33
  def children(ole)
29
- ole.dir.entries(".") - [".", ".."]
34
+ ole.list("/")
30
35
  end
31
36
 
32
37
  def reset_filename_lookup
@@ -56,6 +61,8 @@ module Excavate
56
61
 
57
62
  def rename_archives(target)
58
63
  Dir.glob(File.join(target, "**", "*")).each do |file|
64
+ next unless File.file?(file)
65
+
59
66
  FileUtils.mv(file, "#{file}.cab") if cab?(file)
60
67
  end
61
68
  end
@@ -1,33 +1,35 @@
1
- require "arr-pm"
1
+ # frozen_string_literal: true
2
2
 
3
- H_MAGIC = "\x8e\xad\xe8\x01\x00\x00\x00\x00".force_encoding("BINARY")
4
-
5
- # fix for Ruby 3.0
6
- unless RPM::File::Header::HEADER_MAGIC == H_MAGIC
7
- RPM::File::Header.send(:remove_const, "HEADER_MAGIC")
8
- RPM::File::Header.const_set(:HEADER_MAGIC, H_MAGIC)
9
- end
3
+ require "omnizip"
4
+ require "omnizip/formats/rpm"
10
5
 
11
6
  module Excavate
12
7
  module Extractors
8
+ # Extractor for RPM packages
9
+ #
10
+ # Uses Omnizip's RPM format support for extraction.
11
+ # Extracts the raw payload as a file (e.g., fonts.src.cpio.gz).
13
12
  class RpmExtractor < Extractor
14
13
  def extract(target)
15
- File.open(@archive, "rb") do |file|
16
- rpm = RPM::File.new(file)
17
- content = rpm.payload.read
18
- path = target_path(@archive, rpm.tags, target)
14
+ rpm = Omnizip::Formats::Rpm::Reader.new(@archive)
15
+ rpm.open
16
+ content = rpm.raw_payload
17
+ path = target_path(@archive, rpm, target)
18
+ rpm.close
19
19
 
20
- File.write(path, content, mode: "wb")
21
- end
20
+ FileUtils.mkdir_p(File.dirname(path))
21
+ File.write(path, content, mode: "wb")
22
22
  end
23
23
 
24
24
  private
25
25
 
26
- def target_path(archive, tags, dir)
27
- archive_format = tags[:payloadformat]
28
- compression_format = tags[:payloadcompressor] == "gzip" ? "gz" : tags[:payloadcompressor]
26
+ def target_path(archive, rpm, dir)
29
27
  basename = File.basename(archive, ".*")
30
- filename = "#{basename}.#{archive_format}.#{compression_format}"
28
+ payload_format = rpm.tags[:payloadformat] || "cpio"
29
+ compression_format = rpm.tags[:payloadcompressor] || "gzip"
30
+ # Convert "gzip" to "gz" for file extension
31
+ compression_ext = compression_format == "gzip" ? "gz" : compression_format
32
+ filename = "#{basename}.#{payload_format}.#{compression_ext}"
31
33
  File.join(dir, filename)
32
34
  end
33
35
  end
@@ -1,12 +1,23 @@
1
- require "seven_zip_ruby"
1
+ # frozen_string_literal: true
2
+
3
+ require "omnizip"
2
4
 
3
5
  module Excavate
4
6
  module Extractors
5
7
  class SevenZipExtractor < Extractor
6
8
  def extract(target)
7
- Dir.chdir(target) do
8
- File.open(@archive, "rb") do |file|
9
- SevenZipRuby::Reader.extract_all(file, target)
9
+ # Check for embedded 7z in self-extracting archives
10
+ offset = Omnizip::Formats::SevenZip.search_embedded(@archive)
11
+
12
+ if offset
13
+ # Self-extracting archive - use offset
14
+ Omnizip::Formats::SevenZip.open(@archive, offset: offset) do |reader|
15
+ reader.extract_all(target)
16
+ end
17
+ else
18
+ # Regular 7z archive
19
+ Omnizip::Formats::SevenZip.open(@archive) do |reader|
20
+ reader.extract_all(target)
10
21
  end
11
22
  end
12
23
  end
@@ -1,28 +1,13 @@
1
- require "rubygems/package"
1
+ # frozen_string_literal: true
2
+
3
+ require "omnizip"
2
4
 
3
5
  module Excavate
4
6
  module Extractors
5
7
  class TarExtractor < Extractor
6
8
  def extract(target)
7
- File.open(@archive, "rb") do |archive_file|
8
- Gem::Package::TarReader.new(archive_file) do |tar|
9
- tar.each do |tarfile|
10
- save_tar_file(tarfile, target)
11
- end
12
- end
13
- end
14
- end
15
-
16
- private
17
-
18
- def save_tar_file(file, dir)
19
- path = File.join(dir, file.full_name)
20
-
21
- if file.directory?
22
- FileUtils.mkdir_p(path)
23
- else
24
- File.write(path, file.read, mode: "wb")
25
- end
9
+ reader = Omnizip::Formats::Tar::Reader.open(@archive)
10
+ reader.extract_all(target)
26
11
  end
27
12
  end
28
13
  end
@@ -1,28 +1,18 @@
1
- require "ffi-libarchive-binary"
1
+ # frozen_string_literal: true
2
+
3
+ require "omnizip"
4
+ require "omnizip/formats/xar"
2
5
 
3
6
  module Excavate
4
7
  module Extractors
5
8
  class XarExtractor < Extractor
6
9
  def extract(target)
7
- Dir.chdir(target) do
8
- extract_with_libarchive
9
- rename_payload(target)
10
- end
10
+ Omnizip::Formats::Xar.extract(@archive, target)
11
+ rename_payload(target)
11
12
  end
12
13
 
13
14
  private
14
15
 
15
- def extract_with_libarchive
16
- flags = ::Archive::EXTRACT_PERM
17
- reader = ::Archive::Reader.open_filename(@archive)
18
-
19
- reader.each_entry do |entry|
20
- reader.extract(entry, flags.to_i)
21
- end
22
-
23
- reader.close
24
- end
25
-
26
16
  def rename_payload(target)
27
17
  Dir.glob(File.join(target, "**", "Payload")).each do |file|
28
18
  next unless File.file?(file)
@@ -1,4 +1,7 @@
1
- require "ffi-libarchive-binary"
1
+ # frozen_string_literal: true
2
+
3
+ require "omnizip"
4
+ require "zlib"
2
5
 
3
6
  module Excavate
4
7
  module Extractors
@@ -8,50 +11,43 @@ module Excavate
8
11
  # - Pure XZ compressed files (.xz)
9
12
  # - Compound TAR+XZ archives (.tar.xz)
10
13
  #
11
- # Uses libarchive through ffi-libarchive-binary for extraction,
12
- # which provides native XZ decompression support.
13
- #
14
- # @example Extract a .tar.xz file
15
- # extractor = XzExtractor.new("archive.tar.xz")
16
- # extractor.extract("/target/directory")
17
- #
18
- # @example Extract a pure .xz file
19
- # extractor = XzExtractor.new("file.xz")
20
- # extractor.extract("/target/directory")
14
+ # Uses Omnizip for XZ decompression.
21
15
  class XzExtractor < Extractor
22
- # Extract the XZ archive to the specified target directory
23
- #
24
- # @param target [String] the directory path where files should be extracted
25
- # @return [void]
26
- #
27
- # @raise [StandardError] if extraction fails
28
16
  def extract(target)
29
- extract_with_libarchive(target)
17
+ if tar_xz?
18
+ extract_tar_xz(target)
19
+ else
20
+ extract_pure_xz(target)
21
+ end
30
22
  end
31
23
 
32
24
  private
33
25
 
34
- # Perform extraction using libarchive
35
- #
36
- # This method uses libarchive's reader API to:
37
- # 1. Open the XZ archive
38
- # 2. Iterate through all entries
39
- # 3. Extract each entry with appropriate permissions
40
- # 4. Close the reader
41
- #
42
- # @param target [String] the target directory for extraction
43
- # @return [void]
44
- def extract_with_libarchive(target)
45
- flags = ::Archive::EXTRACT_PERM
46
- reader = ::Archive::Reader.open_filename(@archive)
47
-
48
- Dir.chdir(target) do
49
- reader.each_entry do |entry|
50
- reader.extract(entry, flags.to_i)
51
- end
52
- end
26
+ def tar_xz?
27
+ @archive.end_with?(".tar.xz", ".txz")
28
+ end
29
+
30
+ def extract_tar_xz(target)
31
+ # Decompress XZ to get gzip data
32
+ gzip_data = Omnizip::Formats::Xz.decompress(@archive)
33
+ # Decompress gzip to get tar data
34
+ tar_data = Zlib::GzipReader.new(StringIO.new(gzip_data)).read
35
+
36
+ # Write tar file and extract
37
+ temp_tar = File.join(target, ".temp_#{Time.now.to_i}_#{rand(1000)}.tar")
38
+ File.binwrite(temp_tar, tar_data)
39
+
40
+ TarExtractor.new(temp_tar).extract(target)
41
+ ensure
42
+ File.delete(temp_tar) if temp_tar && File.exist?(temp_tar)
43
+ end
53
44
 
54
- reader.close
45
+ def extract_pure_xz(target)
46
+ # Decompress XZ
47
+ data = Omnizip::Formats::Xz.decompress(@archive)
48
+ basename = File.basename(@archive, ".*")
49
+ output_path = File.join(target, basename)
50
+ File.binwrite(output_path, data)
55
51
  end
56
52
  end
57
53
  end
@@ -1,16 +1,14 @@
1
- require "zip"
1
+ # frozen_string_literal: true
2
+
3
+ require "omnizip"
2
4
 
3
5
  module Excavate
4
6
  module Extractors
5
7
  class ZipExtractor < Extractor
6
8
  def extract(target)
7
- Zip::File.open(@archive) do |zip_file|
8
- zip_file.each do |entry|
9
- path = File.join(target, entry.name)
10
- FileUtils.mkdir_p(File.dirname(path))
11
- entry.extract(path)
12
- end
13
- end
9
+ reader = Omnizip::Formats::Zip::Reader.new(@archive)
10
+ reader.read
11
+ reader.extract_all(target)
14
12
  end
15
13
  end
16
14
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Excavate
4
- VERSION = "0.3.9"
4
+ VERSION = "1.0.0"
5
5
  end