excavate 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 34a72a8ae6eb56395e68cf385d8714d9e6c2aa82245353c2c27bb5cb163d6e2a
4
- data.tar.gz: afd30ada69cad61e75e291281f25bed671c3445c9d625db4593a2ee2ea64bf02
3
+ metadata.gz: d9ea70e2de46706b2324d98253d9012d175a706fc010478a0ccb1b6af00fc02e
4
+ data.tar.gz: 51c3c8e44013e17201114f4315b4314d9f44dc645c41d3f8627f81353b0269b1
5
5
  SHA512:
6
- metadata.gz: 345129ddad35de7619aef4b1538ae0fec8b88a64baa45fca4a3e8578067026019614bef3c08ebbd81ef916ef8ba58966cde4cd89cc0afdb3337e2198e09fdafc
7
- data.tar.gz: f829b60851340bf8ff5c881a9e0b9fea7daafdfdceac303e499fba08f2be076df5ffb4f4c0a85f0d072d862d3443298eeddc338df0326b3dbf5f582204685fc8
6
+ metadata.gz: 4f1c1c62794e14c5272cb8c9881dd50a8be3992c446086c0a5f58cecf51100458f962ddaeec1d2d329a0d3e03cff328bb287a9183ab5315534ba203fb1758544
7
+ data.tar.gz: 79a3aac7b28480c4a1675b68877e21e192d311099f348aaebc84ae157d966e6bb56c2dffe53c25aeea331a08286aa7b34e8e92fd015cb8588514da8340092d85
@@ -0,0 +1,35 @@
1
+ name: metanorma
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
11
+ runs-on: ${{ matrix.os }}
12
+ continue-on-error: ${{ matrix.experimental }}
13
+ strategy:
14
+ fail-fast: false
15
+ matrix:
16
+ ruby: [ '2.5', '2.6', '2.7', '3.0' ]
17
+ os: [ ubuntu-latest, windows-latest, macos-latest ]
18
+ experimental: [ true ]
19
+ steps:
20
+ - uses: actions/checkout@master
21
+ with:
22
+ repository: metanorma/metanorma
23
+
24
+ - uses: actions/checkout@master
25
+ with:
26
+ path: "excavate"
27
+
28
+ - run: 'echo ''gem "excavate", path: "./excavate"'' > Gemfile.devel'
29
+
30
+ - uses: ruby/setup-ruby@v1
31
+ with:
32
+ ruby-version: ${{ matrix.ruby }}
33
+ bundler-cache: true
34
+
35
+ - run: bundle exec rake
@@ -2,7 +2,7 @@ name: rspec
2
2
 
3
3
  on:
4
4
  push:
5
- branches: [ master ]
5
+ branches: [ master, main ]
6
6
  pull_request:
7
7
 
8
8
  jobs:
data/Gemfile CHANGED
@@ -5,9 +5,7 @@ source "https://rubygems.org"
5
5
  gemspec
6
6
 
7
7
  gem "gem-release"
8
-
9
8
  gem "rake", "~> 13.0"
10
-
11
9
  gem "rspec", "~> 3.2"
12
10
  gem "rubocop", "0.75.0"
13
11
  gem "rubocop-performance"
data/README.adoc CHANGED
@@ -61,6 +61,13 @@ end
61
61
  $ excavate --recursive path/to/archive.cab
62
62
  ----
63
63
 
64
+ It supports recursive extraction of a directory containing archives:
65
+
66
+ [source,sh]
67
+ ----
68
+ $ excavate --recursive path/to/dir_with_archives
69
+ ----
70
+
64
71
  If you'd like to skip extraction of nested archives, just use:
65
72
 
66
73
  [source,sh]
@@ -68,6 +75,39 @@ If you'd like to skip extraction of nested archives, just use:
68
75
  $ excavate path/to/archive.cab
69
76
  ----
70
77
 
78
+ To extract a particular file or files specify them as last arguments:
79
+
80
+ [source,sh]
81
+ ----
82
+ $ excavate --recursive archive.cab file1 dir/file2
83
+ ----
84
+
85
+ Also `excavate` supports extraction from nested archives:
86
+
87
+ [source,sh]
88
+ ----
89
+ $ excavate --recursive archive.cab dir/nested.zip/file
90
+ ----
91
+
92
+ And filtering:
93
+
94
+ [source,sh]
95
+ ----
96
+ $ excavate archive.cab --filter "**/specialfile*.txt"
97
+ ----
98
+
99
+ == Dependencies
100
+
101
+ Depends on
102
+ https://github.com/fontist/ffi-libarchive-binary[ffi-libarchive-binary] which
103
+ has the following requirements:
104
+
105
+ * zlib
106
+ * Expat
107
+ * OpenSSL (for Linux only)
108
+
109
+ These dependencies are generally present on all systems.
110
+
71
111
 
72
112
  == Development
73
113
 
@@ -1,5 +1,8 @@
1
1
  module Excavate
2
2
  class Archive
3
+ INVALID_MEMORY_MESSAGE =
4
+ "invalid memory read at address=0x0000000000000000".freeze
5
+
3
6
  TYPES = { "cab" => Extractors::CabExtractor,
4
7
  "cpio" => Extractors::CpioExtractor,
5
8
  "exe" => Extractors::SevenZipExtractor,
@@ -14,9 +17,10 @@ module Excavate
14
17
  @archive = archive
15
18
  end
16
19
 
17
- def files(recursive_packages: false)
20
+ def files(recursive_packages: false, files: [], filter: nil)
18
21
  target = Dir.mktmpdir
19
- extract(target, recursive_packages: recursive_packages)
22
+ extract(target, recursive_packages: recursive_packages,
23
+ files: files, filter: filter)
20
24
 
21
25
  all_files_in(target).map do |file|
22
26
  yield file
@@ -25,10 +29,105 @@ module Excavate
25
29
  FileUtils.rm_rf(target)
26
30
  end
27
31
 
28
- def extract(target = nil, recursive_packages: false)
32
+ def extract(target = nil,
33
+ recursive_packages: false,
34
+ files: [],
35
+ filter: nil)
36
+ if files.size.positive?
37
+ extract_particular_files(target, files,
38
+ recursive_packages: recursive_packages)
39
+ elsif filter
40
+ extract_by_filter(target, filter,
41
+ recursive_packages: recursive_packages)
42
+ else
43
+ extract_all(target, recursive_packages: recursive_packages)
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ def extract_particular_files(target, files, recursive_packages: false)
50
+ tmp = Dir.mktmpdir
51
+ extract_all(tmp, recursive_packages: recursive_packages)
52
+ found_files = find_files(tmp, files)
53
+ copy_files(found_files, target || Dir.pwd)
54
+ ensure
55
+ FileUtils.rm_rf(tmp)
56
+ end
57
+
58
+ def copy_files(files, target)
59
+ files.map do |file|
60
+ FileUtils.mkdir_p(target)
61
+ target_path = File.join(target, File.basename(file))
62
+ ensure_not_exist(target_path)
63
+
64
+ FileUtils.cp(file, target_path)
65
+
66
+ target_path
67
+ end
68
+ end
69
+
70
+ def ensure_not_exist(path)
71
+ if File.exist?(path)
72
+ type = File.directory?(path) ? "directory" : "file"
73
+ raise(TargetExistsError,
74
+ "Target #{type} `#{File.basename(path)}` already exists.")
75
+ end
76
+ end
77
+
78
+ def find_files(source, files)
79
+ all_files = all_files_in(source)
80
+
81
+ files.map do |target_file|
82
+ found_file = all_files.find do |source_file|
83
+ file_matches?(source_file, target_file, source)
84
+ end
85
+
86
+ unless found_file
87
+ raise(TargetNotFoundError, "File `#{target_file}` not found.")
88
+ end
89
+
90
+ found_file
91
+ end
92
+ end
93
+
94
+ def file_matches?(source_file, target_file, source_dir)
95
+ base_path(source_file, source_dir) == target_file
96
+ end
97
+
98
+ def base_path(path, prefix)
99
+ path.sub(prefix, "").sub(/^\//, "").sub(/^\\/, "")
100
+ end
101
+
102
+ def extract_by_filter(target, filter, recursive_packages: false)
103
+ tmp = Dir.mktmpdir
104
+ extract_all(tmp, recursive_packages: recursive_packages)
105
+ found_files = find_by_filter(tmp, filter)
106
+ copy_files(found_files, target || Dir.pwd)
107
+ end
108
+
109
+ def find_by_filter(source, filter)
110
+ all_files = all_files_in(source)
111
+
112
+ found_files = all_files.select do |source_file|
113
+ file_matches_filter?(source_file, filter, source)
114
+ end
115
+
116
+ if found_files.empty?
117
+ raise(TargetNotFoundError, "Filter `#{filter}` matched no file.")
118
+ end
119
+
120
+ found_files
121
+ end
122
+
123
+ def file_matches_filter?(source_file, filter, source_dir)
124
+ File.fnmatch?(filter, base_path(source_file, source_dir))
125
+ end
126
+
127
+ def extract_all(target, recursive_packages: false)
29
128
  source = File.expand_path(@archive)
30
129
  target ||= default_target(source)
31
- raise(TargetNotEmptyError, "Target directory `#{File.basename(target)}` is not empty.") unless Dir.empty?(target)
130
+ ensure_empty(target)
32
131
 
33
132
  if recursive_packages
34
133
  extract_recursively(source, target)
@@ -39,11 +138,16 @@ module Excavate
39
138
  target
40
139
  end
41
140
 
42
- private
141
+ def ensure_empty(path)
142
+ unless Dir.empty?(path)
143
+ raise(TargetNotEmptyError,
144
+ "Target directory `#{File.basename(path)}` is not empty.")
145
+ end
146
+ end
43
147
 
44
148
  def default_target(source)
45
149
  target = File.expand_path(File.basename(source, ".*"))
46
- raise(TargetExistsError, "Target directory `#{File.basename(target)}` already exists.") if File.exist?(target)
150
+ ensure_not_exist(target)
47
151
 
48
152
  FileUtils.mkdir(target)
49
153
 
@@ -51,7 +155,11 @@ module Excavate
51
155
  end
52
156
 
53
157
  def extract_recursively(archive, target)
54
- extract_once(archive, target)
158
+ if File.directory?(archive)
159
+ duplicate_dir(archive, target)
160
+ else
161
+ extract_once(archive, target)
162
+ end
55
163
 
56
164
  all_files_in(target).each do |file|
57
165
  next unless archive?(file)
@@ -60,14 +168,25 @@ module Excavate
60
168
  end
61
169
  end
62
170
 
171
+ def duplicate_dir(source, target)
172
+ Dir.chdir(source) do
173
+ (Dir.entries(".") - [".", ".."]).each do |entry|
174
+ FileUtils.cp_r(entry, target)
175
+ end
176
+ end
177
+ end
178
+
63
179
  def extract_once(archive, target)
64
180
  extension = normalized_extension(archive)
65
181
  extractor_class = TYPES[extension]
66
- raise(UnknownArchiveError, "Could not unarchive `#{archive}`.") unless extractor_class
182
+ unless extractor_class
183
+ raise(UnknownArchiveError, "Could not unarchive `#{archive}`.")
184
+ end
67
185
 
68
186
  extractor_class.new(archive).extract(target)
69
187
  rescue StandardError => e
70
- raise unless extension == "exe" && e.message.start_with?("Invalid file format")
188
+ raise unless extension == "exe" &&
189
+ e.message.start_with?("Invalid file format")
71
190
 
72
191
  Extractors::CabExtractor.new(archive).extract(target)
73
192
  end
@@ -81,7 +200,7 @@ module Excavate
81
200
  rescue FFI::NullPointerError => e
82
201
  FileUtils.rmdir(target)
83
202
  raise unless normalized_extension(archive) == "exe" &&
84
- e.message.start_with?("invalid memory read at address=0x0000000000000000")
203
+ e.message.start_with?(INVALID_MEMORY_MESSAGE)
85
204
  end
86
205
 
87
206
  def normalized_extension(file)
data/lib/excavate/cli.rb CHANGED
@@ -8,6 +8,13 @@ module Excavate
8
8
  STATUS_UNKNOWN_ERROR = 1
9
9
  STATUS_TARGET_EXISTS = 2
10
10
  STATUS_TARGET_NOT_EMPTY = 3
11
+ STATUS_TARGET_NOT_FOUND = 4
12
+
13
+ ERROR_TO_STATUS = {
14
+ TargetExistsError => STATUS_TARGET_EXISTS,
15
+ TargetNotEmptyError => STATUS_TARGET_NOT_EMPTY,
16
+ TargetNotFoundError => STATUS_TARGET_NOT_FOUND,
17
+ }.freeze
11
18
 
12
19
  def self.exit_on_failure?
13
20
  false
@@ -23,15 +30,22 @@ module Excavate
23
30
  super(args, config)
24
31
  end
25
32
 
26
- desc "extract ARCHIVE", "Extract ARCHIVE to a new directory"
27
- option :recursive, aliases: :r, type: :boolean, default: false, desc: "Also extract all nested archives."
28
- def extract(archive)
29
- target = Excavate::Archive.new(archive).extract(recursive_packages: options[:recursive])
30
- success("Successfully extracted to #{File.basename(target)}/")
31
- rescue TargetExistsError => e
32
- error(e.message, STATUS_TARGET_EXISTS)
33
- rescue TargetNotEmptyError => e
34
- error(e.message, STATUS_TARGET_NOT_EMPTY)
33
+ desc "extract ARCHIVE [FILE...]",
34
+ "Extract FILE or all files from ARCHIVE to a new directory"
35
+ option :recursive, aliases: :r, type: :boolean, default: false,
36
+ desc: "Also extract all nested archives."
37
+ option :filter, type: :string,
38
+ desc: "Filter by pattern (supports **, *, ?, etc)"
39
+ def extract(archive, *files)
40
+ target = Excavate::Archive.new(archive).extract(
41
+ recursive_packages: options[:recursive],
42
+ files: files,
43
+ filter: options[:filter],
44
+ )
45
+
46
+ success("Successfully extracted to #{format_paths(target)}")
47
+ rescue Error => e
48
+ handle_error(e)
35
49
  end
36
50
  default_task :extract
37
51
 
@@ -42,9 +56,24 @@ module Excavate
42
56
  STATUS_SUCCESS
43
57
  end
44
58
 
59
+ def handle_error(exception)
60
+ status = ERROR_TO_STATUS[exception.class]
61
+ raise exception unless status
62
+
63
+ error(exception.message, status)
64
+ end
65
+
45
66
  def error(message, status)
46
67
  say(message, :red)
47
68
  status
48
69
  end
70
+
71
+ def format_paths(path_or_paths)
72
+ paths = Array(path_or_paths).map do |x|
73
+ File.directory?(x) ? "#{File.basename(x)}/" : File.basename(x)
74
+ end
75
+
76
+ paths.join(", ")
77
+ end
49
78
  end
50
79
  end
@@ -15,6 +15,8 @@ module Excavate
15
15
 
16
16
  Ole::Storage.open(@archive) do |ole|
17
17
  children(ole).each do |file|
18
+ next if ole.file.directory?(file)
19
+
18
20
  filename = prepare_filename(file)
19
21
  path = File.join(target, filename)
20
22
  content = ole.file.read(file)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Excavate
4
- VERSION = "0.2.2"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/excavate.rb CHANGED
@@ -8,7 +8,12 @@ require_relative "excavate/utils"
8
8
 
9
9
  module Excavate
10
10
  class Error < StandardError; end
11
- class UnknownArchiveError < Error; end
11
+
12
12
  class TargetExistsError < Error; end
13
+
13
14
  class TargetNotEmptyError < Error; end
15
+
16
+ class TargetNotFoundError < Error; end
17
+
18
+ class UnknownArchiveError < Error; end
14
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: excavate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-31 00:00:00.000000000 Z
11
+ date: 2021-11-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arr-pm
@@ -116,6 +116,7 @@ executables:
116
116
  extensions: []
117
117
  extra_rdoc_files: []
118
118
  files:
119
+ - ".github/workflows/metanorma.yml"
119
120
  - ".github/workflows/release.yml"
120
121
  - ".github/workflows/rspec.yml"
121
122
  - ".gitignore"
@@ -168,7 +169,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
168
169
  - !ruby/object:Gem::Version
169
170
  version: '0'
170
171
  requirements: []
171
- rubygems_version: 3.0.3
172
+ rubygems_version: 3.0.3.1
172
173
  signing_key:
173
174
  specification_version: 4
174
175
  summary: Extract nested archives with a single command.