excavate 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ed5cec17e4c8c985ae34d044fa170ccbecdc3bd783a0cd0916454a8c567149d7
4
- data.tar.gz: 750dcde7f88158d2e107763a5fc733c4a72ac1030b53e91c6fd7e2697b167686
3
+ metadata.gz: 533124c47fcd3ac27122c10f3fccf7c585f74684dccefd5988f812a2e2804392
4
+ data.tar.gz: 1d70e420f988127059dc364cd26c7e7c3e34b5bf618a5e7b5d1e5b28005d2d53
5
5
  SHA512:
6
- metadata.gz: b62877409f75d68041b7b973ee364356e6e47b09bd714ebd4430f0df627643213bd8ff2dc3ca40d4b8d141c2fda3ff6b9b686f4720fc8997424b169effff169e
7
- data.tar.gz: e3b0dee5b195329ecbc6cc85ffc5fc64c7b4c736481ac3c964b0353828c59e0176b71ad3c70ba53b7503a01419f5e6f205e28fd3531fe3584449ad8b39a66082
6
+ metadata.gz: 64ebd2cc16fee28e756db9cbacc317b3a70dd12e944e51c3c0f6994541fd674f0b89d8e06fff141167e5b3ae839477278bc958243c1dd77283f36ece4c63d1a6
7
+ data.tar.gz: 015bb8fb81644b1dda590157e9184b624f62415edbc5b94fafcad8df6090a75d8ce02df218c8f54726b904a18afe7bba8a7d78d2dd5e597e40341585bda76619
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-03-17 11:57:02 UTC using RuboCop version 1.85.1.
3
+ # on 2026-03-19 00:32:08 UTC using RuboCop version 1.85.1.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -11,15 +11,7 @@ Gemspec/RequiredRubyVersion:
11
11
  Exclude:
12
12
  - 'excavate.gemspec'
13
13
 
14
- # Offense count: 1
15
- # This cop supports safe autocorrection (--autocorrect).
16
- # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
17
- # SupportedStylesAlignWith: start_of_line, relative_to_receiver
18
- Layout/IndentationWidth:
19
- Exclude:
20
- - 'spec/excavate/archive_spec.rb'
21
-
22
- # Offense count: 7
14
+ # Offense count: 21
23
15
  # This cop supports safe autocorrection (--autocorrect).
24
16
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
25
17
  # URISchemes: http, https
@@ -30,6 +22,8 @@ Layout/LineLength:
30
22
  - 'lib/excavate/extractors/rpm_extractor.rb'
31
23
  - 'lib/excavate/utils.rb'
32
24
  - 'spec/excavate/archive_spec.rb'
25
+ - 'test_archives/memory_test.rb'
26
+ - 'test_msi_memory.rb'
33
27
 
34
28
  # Offense count: 1
35
29
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
@@ -37,10 +31,66 @@ Lint/DuplicateBranch:
37
31
  Exclude:
38
32
  - 'lib/excavate/utils.rb'
39
33
 
34
+ # Offense count: 2
35
+ # This cop supports safe autocorrection (--autocorrect).
36
+ Lint/ScriptPermission:
37
+ Exclude:
38
+ - 'test_archives/memory_test.rb'
39
+ - 'test_msi_memory.rb'
40
+
41
+ # Offense count: 1
42
+ # This cop supports safe autocorrection (--autocorrect).
43
+ # Configuration parameters: IgnoreEmptyBlocks, AllowUnusedKeywordArguments.
44
+ Lint/UnusedBlockArgument:
45
+ Exclude:
46
+ - 'test_msi_memory.rb'
47
+
40
48
  # Offense count: 4
49
+ # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
50
+ Metrics/AbcSize:
51
+ Exclude:
52
+ - 'test_archives/memory_test.rb'
53
+ - 'test_msi_memory.rb'
54
+
55
+ # Offense count: 2
56
+ # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
57
+ Metrics/CyclomaticComplexity:
58
+ Exclude:
59
+ - 'test_archives/memory_test.rb'
60
+ - 'test_msi_memory.rb'
61
+
62
+ # Offense count: 11
41
63
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
42
64
  Metrics/MethodLength:
43
- Max: 12
65
+ Max: 82
66
+
67
+ # Offense count: 2
68
+ # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
69
+ Metrics/PerceivedComplexity:
70
+ Exclude:
71
+ - 'test_archives/memory_test.rb'
72
+ - 'test_msi_memory.rb'
73
+
74
+ # Offense count: 2
75
+ # Configuration parameters: Mode, AllowedMethods, AllowedPatterns, AllowBangMethods, WaywardPredicates.
76
+ # AllowedMethods: call
77
+ # WaywardPredicates: infinite?, nonzero?
78
+ Naming/PredicateMethod:
79
+ Exclude:
80
+ - 'test_archives/memory_test.rb'
81
+ - 'test_msi_memory.rb'
82
+
83
+ # Offense count: 1
84
+ # This cop supports safe autocorrection (--autocorrect).
85
+ Performance/RegexpMatch:
86
+ Exclude:
87
+ - 'test_archives/memory_test.rb'
88
+
89
+ # Offense count: 1
90
+ # This cop supports unsafe autocorrection (--autocorrect-all).
91
+ Performance/StringInclude:
92
+ Exclude:
93
+ - 'test_archives/memory_test.rb'
44
94
 
45
95
  # Offense count: 46
46
96
  # Configuration parameters: Prefixes, AllowedPatterns.
@@ -65,18 +115,25 @@ RSpec/MultipleExpectations:
65
115
  RSpec/NestedGroups:
66
116
  Max: 4
67
117
 
68
- # Offense count: 2
118
+ # Offense count: 8
69
119
  # This cop supports safe autocorrection (--autocorrect).
70
- # Configuration parameters: AllowOnlyRestArgument, UseAnonymousForwarding, RedundantRestArgumentNames, RedundantKeywordRestArgumentNames, RedundantBlockArgumentNames.
71
- # RedundantRestArgumentNames: args, arguments
72
- # RedundantKeywordRestArgumentNames: kwargs, options, opts
73
- # RedundantBlockArgumentNames: blk, block, proc
74
- Style/ArgumentsForwarding:
120
+ # Configuration parameters: EnforcedStyle.
121
+ # SupportedStyles: trailing_conditional, ternary
122
+ Style/EmptyStringInsideInterpolation:
75
123
  Exclude:
76
- - 'lib/excavate/archive.rb'
124
+ - 'test_archives/memory_test.rb'
125
+ - 'test_msi_memory.rb'
77
126
 
78
127
  # Offense count: 2
79
128
  # This cop supports unsafe autocorrection (--autocorrect-all).
80
129
  Style/IdenticalConditionalBranches:
81
130
  Exclude:
82
131
  - 'lib/excavate/utils.rb'
132
+
133
+ # Offense count: 1
134
+ # This cop supports safe autocorrection (--autocorrect).
135
+ # Configuration parameters: EnforcedStyle.
136
+ # SupportedStyles: implicit, explicit
137
+ Style/RescueStandardError:
138
+ Exclude:
139
+ - 'test_msi_memory.rb'
data/excavate.gemspec CHANGED
@@ -2,8 +2,6 @@
2
2
 
3
3
  require_relative "lib/excavate/version"
4
4
 
5
- # rubocop:disable Metrics/BlockLength
6
-
7
5
  Gem::Specification.new do |spec|
8
6
  spec.name = "excavate"
9
7
  spec.version = Excavate::VERSION
@@ -32,11 +30,9 @@ Gem::Specification.new do |spec|
32
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
33
31
  spec.require_paths = ["lib"]
34
32
 
35
- spec.add_dependency "cabriolet", "~> 0.2.2"
36
- spec.add_dependency "omnizip", "~> 0.3.8"
33
+ spec.add_dependency "cabriolet", "~> 0.2.4"
34
+ spec.add_dependency "omnizip", "~> 0.3.9"
37
35
  spec.add_dependency "thor", "~> 1.0"
38
36
 
39
- spec.metadata["rubygems_mfa_required"] = "false"
37
+ spec.metadata["rubygems_mfa_required"] = "true"
40
38
  end
41
-
42
- # rubocop:enable Metrics/BlockLength
@@ -1,19 +1,5 @@
1
1
  module Excavate
2
2
  class Archive
3
- INVALID_MEMORY_MESSAGE =
4
- "invalid memory read at address=0x0000000000000000".freeze
5
-
6
- TYPES = { "cab" => Extractors::CabExtractor,
7
- "cpio" => Extractors::CpioExtractor,
8
- "exe" => Extractors::SevenZipExtractor,
9
- "gz" => Extractors::GzipExtractor,
10
- "msi" => Extractors::OleExtractor,
11
- "pkg" => Extractors::XarExtractor,
12
- "rpm" => Extractors::RpmExtractor,
13
- "tar" => Extractors::TarExtractor,
14
- "xz" => Extractors::XzExtractor,
15
- "zip" => Extractors::ZipExtractor }.freeze
16
-
17
3
  def initialize(archive)
18
4
  @archive = archive
19
5
  end
@@ -109,6 +95,8 @@ module Excavate
109
95
  extract_all(tmp, recursive_packages: recursive_packages)
110
96
  found_files = find_by_filter(tmp, filter)
111
97
  copy_files(found_files, target || Dir.pwd)
98
+ ensure
99
+ FileUtils.rm_rf(tmp)
112
100
  end
113
101
 
114
102
  def find_by_filter(source, filter)
@@ -191,38 +179,38 @@ module Excavate
191
179
  FileUtils.cp(archive, target)
192
180
  end
193
181
 
194
- def may_be_nested_cab?(extension, message)
195
- extension == "exe" &&
196
- (message.start_with?("Invalid file format",
197
- "Unrecognized archive format") ||
198
- message.include?("Invalid .7z signature"))
199
- end
200
-
201
182
  def extract_once(archive, target)
202
- extension = normalized_extension(archive)
203
- extractor_class = TYPES[extension]
183
+ type = FileMagic.detect(archive)
184
+ extractor_class = Extractors::Extractor.for_magic_type(type)
204
185
  unless extractor_class
205
186
  raise(UnknownArchiveError, "Could not unarchive `#{archive}`.")
206
187
  end
207
188
 
208
189
  extractor_class.new(archive).extract(target)
209
190
  rescue StandardError => e
210
- raise unless may_be_nested_cab?(extension, e.message)
191
+ raise unless type == :exe && may_be_nested_cab?(e.message)
211
192
 
212
193
  Extractors::CabExtractor.new(archive).extract(target)
213
194
  end
214
195
 
196
+ def may_be_nested_cab?(message)
197
+ message.start_with?("Invalid file format",
198
+ "Unrecognized archive format") ||
199
+ message.include?("Invalid .7z signature")
200
+ end
201
+
215
202
  def extract_and_replace(archive)
216
203
  target = Dir.mktmpdir
217
204
  extract_recursively(archive, target)
218
205
  replace_archive_with_contents(archive, target)
219
206
  rescue StandardError
220
- FileUtils.rm_rf(target)
221
207
  # During recursive extraction of nested archives, silently skip
222
208
  # any that fail (e.g. .msi files that aren't real OLE, .cab files
223
209
  # with incompatible format, .exe files with unsupported compression).
224
- # Only re-raise for file types we don't recognize as archives.
225
- raise unless TYPES.key?(normalized_extension(archive))
210
+ # Only re-raise if the file is not a recognized archive format.
211
+ raise unless File.exist?(archive) && archive?(archive)
212
+ ensure
213
+ FileUtils.rm_rf(target)
226
214
  end
227
215
 
228
216
  def replace_archive_with_contents(archive, target)
@@ -275,22 +263,6 @@ module Excavate
275
263
  end
276
264
  end
277
265
 
278
- def normalized_extension(file)
279
- fetch_extension(file).downcase
280
- end
281
-
282
- def fetch_extension(file)
283
- File.extname(filename(file)).sub(/^\./, "")
284
- end
285
-
286
- def filename(file)
287
- if file.respond_to?(:original_filename)
288
- file.original_filename
289
- else
290
- File.basename(file)
291
- end
292
- end
293
-
294
266
  def all_files_in(dir)
295
267
  Dir.glob(File.join(dir, "**", "*"))
296
268
  end
@@ -298,10 +270,8 @@ module Excavate
298
270
  def archive?(file)
299
271
  return false unless File.file?(file)
300
272
 
301
- ext = normalized_extension(file)
302
- return false if ext == "gz" && FileMagic.detect(file) != :gzip
303
-
304
- TYPES.key?(ext)
273
+ type = FileMagic.detect(file)
274
+ !type.nil? && !Extractors::Extractor.for_magic_type(type).nil?
305
275
  end
306
276
  end
307
277
  end
@@ -1,6 +1,25 @@
1
1
  module Excavate
2
2
  module Extractors
3
3
  class Extractor
4
+ MAGIC_MAP = {
5
+ cab: "CabExtractor",
6
+ cpio: "CpioExtractor",
7
+ exe: "SevenZipExtractor",
8
+ gzip: "GzipExtractor",
9
+ ole: "OleExtractor",
10
+ rpm: "RpmExtractor",
11
+ seven_zip: "SevenZipExtractor",
12
+ tar: "TarExtractor",
13
+ xar: "XarExtractor",
14
+ xz: "XzExtractor",
15
+ zip: "ZipExtractor",
16
+ }.freeze
17
+
18
+ def self.for_magic_type(type)
19
+ name = MAGIC_MAP[type]
20
+ Extractors.const_get(name) if name
21
+ end
22
+
4
23
  def initialize(archive)
5
24
  @archive = archive
6
25
  end
@@ -8,6 +27,31 @@ module Excavate
8
27
  def extract(_target)
9
28
  raise NotImplementedError.new("You must implement this method")
10
29
  end
30
+
31
+ private
32
+
33
+ # Detect inner format of decompressed data and extract
34
+ # or write raw output.
35
+ # Shared by GzipExtractor and XzExtractor.
36
+ def extract_inner(data, target)
37
+ inner_type = FileMagic.detect_bytes(data)
38
+ extractor_class = Extractor.for_magic_type(inner_type) if inner_type
39
+
40
+ if extractor_class
41
+ temp = File.join(target, ".temp_#{Time.now.to_i}_#{rand(1000)}")
42
+ File.binwrite(temp, data)
43
+ extractor_class.new(temp).extract(target)
44
+ else
45
+ write_raw_output(data, target)
46
+ end
47
+ ensure
48
+ FileUtils.rm_f(temp) if temp
49
+ end
50
+
51
+ def write_raw_output(data, target)
52
+ basename = File.basename(@archive, ".*")
53
+ File.binwrite(File.join(target, basename), data)
54
+ end
11
55
  end
12
56
  end
13
57
  end
@@ -6,12 +6,8 @@ module Excavate
6
6
  module Extractors
7
7
  class GzipExtractor < Extractor
8
8
  def extract(target)
9
- basename = File.basename(@archive, ".*")
10
- output_path = File.join(target, basename)
11
-
12
- Zlib::GzipReader.open(@archive) do |gz|
13
- File.write(output_path, gz.read, mode: "wb")
14
- end
9
+ data = Zlib::GzipReader.open(@archive, &:read)
10
+ extract_inner(data, target)
15
11
  end
16
12
  end
17
13
  end
@@ -15,10 +15,11 @@ module Excavate
15
15
  rpm.open
16
16
  content = rpm.raw_payload
17
17
  path = target_path(@archive, rpm, target)
18
- rpm.close
19
18
 
20
19
  FileUtils.mkdir_p(File.dirname(path))
21
20
  File.write(path, content, mode: "wb")
21
+ ensure
22
+ rpm&.close
22
23
  end
23
24
 
24
25
  private
@@ -1,70 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "omnizip"
4
- require "zlib"
5
4
 
6
5
  module Excavate
7
6
  module Extractors
8
- # Extractor for XZ compressed archives (both .xz and .tar.xz formats)
9
- #
10
- # This extractor handles:
11
- # - Pure XZ compressed files (.xz)
12
- # - Compound TAR+XZ archives (.tar.xz)
13
- #
14
- # Uses Omnizip for XZ decompression.
15
7
  class XzExtractor < Extractor
16
8
  def extract(target)
17
- if tar_xz?
18
- extract_tar_xz(target)
19
- else
20
- extract_pure_xz(target)
21
- end
22
- end
23
-
24
- private
25
-
26
- def tar_xz?
27
- @archive.end_with?(".tar.xz", ".txz")
28
- end
29
-
30
- def extract_tar_xz(target)
31
- data = Omnizip::Formats::Xz.decompress(@archive)
32
- data = strip_compression(data)
33
- validate_tar!(data)
34
-
35
- # Write tar file and extract
36
- temp_tar = File.join(target, ".temp_#{Time.now.to_i}_#{rand(1000)}.tar")
37
- File.binwrite(temp_tar, data)
38
-
39
- TarExtractor.new(temp_tar).extract(target)
40
- ensure
41
- File.delete(temp_tar) if temp_tar && File.exist?(temp_tar)
42
- end
43
-
44
- def strip_compression(data)
45
- if FileMagic.detect_bytes(data) == :gzip
46
- return Zlib::GzipReader.new(StringIO.new(data)).read
47
- end
48
-
49
- data
50
- end
51
-
52
- def validate_tar!(data)
53
- inner_type = FileMagic.detect_bytes(data)
54
- return if inner_type == :tar
55
-
56
- inner_type ||= "unknown format"
57
-
58
- raise UnknownArchiveError,
59
- "Expected tar inside #{@archive}, got #{inner_type}"
60
- end
61
-
62
- def extract_pure_xz(target)
63
- # Decompress XZ
64
9
  data = Omnizip::Formats::Xz.decompress(@archive)
65
- basename = File.basename(@archive, ".*")
66
- output_path = File.join(target, basename)
67
- File.binwrite(output_path, data)
10
+ extract_inner(data, target)
68
11
  end
69
12
  end
70
13
  end
@@ -8,6 +8,15 @@ module Excavate
8
8
  [0, "\xFD7zXZ\x00".b, :xz],
9
9
  [0, "\x1F\x8B".b, :gzip],
10
10
  [257, "ustar".b, :tar],
11
+ [0, "7z\xBC\xAF\x27\x1C".b, :seven_zip],
12
+ [0, "PK\x03\x04".b, :zip],
13
+ [0, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1".b, :ole],
14
+ [0, "xar!".b, :xar],
15
+ [0, "\xED\xAB\xEE\xDB".b, :rpm],
16
+ [0, "070707".b, :cpio],
17
+ [0, "070701".b, :cpio],
18
+ [0, "070702".b, :cpio],
19
+ [0, "MZ".b, :exe],
11
20
  ].freeze
12
21
 
13
22
  MAX_READ = SIGNATURES.map { |o, m, _| o + m.bytesize }.max
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Excavate
4
- VERSION = "1.0.2"
4
+ VERSION = "1.1.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: excavate
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-17 00:00:00.000000000 Z
11
+ date: 2026-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cabriolet
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.2.2
19
+ version: 0.2.4
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.2.2
26
+ version: 0.2.4
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: omnizip
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.3.8
33
+ version: 0.3.9
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.3.8
40
+ version: 0.3.9
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: thor
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -175,7 +175,7 @@ metadata:
175
175
  source_code_uri: https://github.com/omnizip/excavate
176
176
  changelog_uri: https://github.com/omnizip/excavite/releases
177
177
  documentation_uri: https://omnizip.github.io/excavate
178
- rubygems_mfa_required: 'false'
178
+ rubygems_mfa_required: 'true'
179
179
  post_install_message:
180
180
  rdoc_options: []
181
181
  require_paths: