archive_io 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/README.md +57 -0
- data/Rakefile +2 -0
- data/archive_io.gemspec +28 -0
- data/bin/console +10 -0
- data/lib/archive_io.rb +10 -0
- data/lib/archive_io/archive_reader.rb +70 -0
- data/lib/archive_io/file_reader.rb +23 -0
- data/lib/archive_io/header.rb +21 -0
- data/lib/archive_io/lib_archive.rb +35 -0
- data/lib/archive_io/version.rb +3 -0
- data/lib/archive_io/wildcard_pattern.rb +12 -0
- metadata +143 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 85be778c44e05a1acce8d7da11e8b8d59e3959f7
|
4
|
+
data.tar.gz: eeda3b05ca916422564bed68c861f2c28c3f75d9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 93ebdc5b74f7aee6183b7645b71a2985cbc43e7946b7a1467032f420a20a1e708ed35b1bade51ca2459bbb80a9d395859eb568be2d2ff6065109004c97667b56
|
7
|
+
data.tar.gz: bf0cc25bc5054e8cbe70bd5d35c41b03e3a9c82202c5edf4d197e920187755120781492421ccec98ab035afae7e9f9ac63f7f18282bfee1af510b9fbd67014dc
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# ArchiveIo
|
2
|
+
|
3
|
+
Library which can traverse archived file (using [libarchive](http://www.libarchive.org/) under the hood) and yields IO like object on each file entry inside it for further streamline processing.
|
4
|
+
|
5
|
+
**Note:** [libarchive](http://www.libarchive.org/) have to be pre-installed and available on the host system
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'archive_io'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install archive_io
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
Simple usage:
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
archive = ArchiveIO.open("archive.7z")
|
29
|
+
archive.each do |cursor|
|
30
|
+
puts cursor.pathname # prints out pathname inside archive
|
31
|
+
puts cursor.read(10) # prints out beginning of each file
|
32
|
+
end
|
33
|
+
archive.close
|
34
|
+
```
|
35
|
+
|
36
|
+
This library can come in handy if you want to process huge xml files reading straight from the archive without uncompressing it and works nicely together with `Nokogiri::XML::Reader` and can be used as follows:
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
archive = ArchiveIO.open("archive.7z")
|
40
|
+
archive.select("*.xml") do |cursor|
|
41
|
+
Nokogiri::XML::Reader(cursor).each do |xml_node|
|
42
|
+
# your custom xml processing logic goes here
|
43
|
+
end
|
44
|
+
end
|
45
|
+
archive.close
|
46
|
+
```
|
47
|
+
|
48
|
+
## Development
|
49
|
+
|
50
|
+
After checking out the repo, run `bundle install` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
51
|
+
|
52
|
+
To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
53
|
+
|
54
|
+
## Contributing
|
55
|
+
|
56
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/AMekss/archive_io.
|
57
|
+
|
data/Rakefile
ADDED
data/archive_io.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'archive_io/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "archive_io"
|
8
|
+
spec.version = ArchiveIO::VERSION
|
9
|
+
spec.authors = ["Artūrs Mekšs"]
|
10
|
+
spec.email = ["arturs.mekss@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Library which can traverse archived file and yields io like object on each file in it.}
|
13
|
+
spec.description = %q{Library which can traverse archived file (using `libarchive` under the hood) and yield io like object on each file entry inside it for further processing.}
|
14
|
+
spec.homepage = "https://github.com/AMekss/archive_io"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.bindir = "exe"
|
18
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "ffi", "~> 1.9"
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
24
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
25
|
+
spec.add_development_dependency "rspec", "~> 3.5"
|
26
|
+
spec.add_development_dependency "pry"
|
27
|
+
spec.add_development_dependency "nokogiri"
|
28
|
+
end
|
data/bin/console
ADDED
data/lib/archive_io.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
require "archive_io/lib_archive"
|
2
|
+
require "archive_io/header"
|
3
|
+
require "archive_io/file_reader"
|
4
|
+
require "archive_io/wildcard_pattern"
|
5
|
+
|
6
|
+
module ArchiveIO
|
7
|
+
class ArchiveReader
|
8
|
+
HEADER_HANDLING_STRATEGIES = {
|
9
|
+
LibArchive::OK => -> (pointer) { Header.new(pointer) },
|
10
|
+
LibArchive::EOF => -> (_) { nil },
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
ObjectSpace.define_finalizer(self, &method(:close))
|
15
|
+
end
|
16
|
+
|
17
|
+
def open(filename)
|
18
|
+
close
|
19
|
+
|
20
|
+
@archive = LibArchive::archive_read_new
|
21
|
+
@filename = filename
|
22
|
+
|
23
|
+
assert_operation_valid(LibArchive::archive_read_support_compression_all(archive))
|
24
|
+
assert_operation_valid(LibArchive::archive_read_support_format_all(archive))
|
25
|
+
assert_operation_valid(LibArchive::archive_read_open_filename(archive, filename, 10240))
|
26
|
+
end
|
27
|
+
|
28
|
+
def each
|
29
|
+
raise LibArchive::NoArchiveError, "Archive is not open, call open with filename on archive before this operation" unless archive
|
30
|
+
|
31
|
+
while header = next_header
|
32
|
+
yield FileReader.new(header.pathname, archive) if header.file?
|
33
|
+
end
|
34
|
+
|
35
|
+
open(@filename) #reopen file for the next call
|
36
|
+
end
|
37
|
+
|
38
|
+
def select(pattern)
|
39
|
+
wildcard = WildcardPattern.new(pattern)
|
40
|
+
|
41
|
+
each do |file|
|
42
|
+
yield file if wildcard.match?(file.pathname)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def close
|
47
|
+
LibArchive::archive_read_finish(archive) if archive
|
48
|
+
ensure
|
49
|
+
@archive = nil
|
50
|
+
@filename = nil
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
attr_reader :archive
|
56
|
+
|
57
|
+
def header_handler(code)
|
58
|
+
HEADER_HANDLING_STRATEGIES.fetch(code) { raise LibArchive::Error, archive }
|
59
|
+
end
|
60
|
+
|
61
|
+
def next_header
|
62
|
+
header = LibArchive::archive_entry_new
|
63
|
+
header_handler(LibArchive::archive_read_next_header(archive, header)).call(header.read_pointer)
|
64
|
+
end
|
65
|
+
|
66
|
+
def assert_operation_valid(operation_result_code)
|
67
|
+
raise LibArchive::Error, archive if operation_result_code != LibArchive::OK
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require "archive_io/lib_archive"
|
2
|
+
|
3
|
+
module ArchiveIO
|
4
|
+
class FileReader
|
5
|
+
attr_reader :pathname
|
6
|
+
|
7
|
+
def initialize(pathname, archive)
|
8
|
+
@pathname = pathname
|
9
|
+
@archive = archive
|
10
|
+
end
|
11
|
+
|
12
|
+
def read(size)
|
13
|
+
buffer = FFI::MemoryPointer.new(:char, size)
|
14
|
+
return nil if LibArchive::archive_read_data(@archive, buffer, size) < 0
|
15
|
+
buffer.get_string(0)
|
16
|
+
end
|
17
|
+
|
18
|
+
def close
|
19
|
+
# does nothing, purpose of having it is to support IO like interface
|
20
|
+
end
|
21
|
+
end
|
22
|
+
private_constant :FileReader
|
23
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "archive_io/lib_archive"
|
2
|
+
|
3
|
+
module ArchiveIO
|
4
|
+
class Header
|
5
|
+
S_IFMT = 0170000
|
6
|
+
S_IFREG = 0100000 # regular file
|
7
|
+
|
8
|
+
def initialize(pointer)
|
9
|
+
@pointer = pointer
|
10
|
+
end
|
11
|
+
|
12
|
+
def file?
|
13
|
+
LibArchive::archive_entry_filetype(@pointer) & S_IFMT == S_IFREG
|
14
|
+
end
|
15
|
+
|
16
|
+
def pathname
|
17
|
+
LibArchive::archive_entry_pathname(@pointer)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
private_constant :Header
|
21
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require "ffi"
|
2
|
+
|
3
|
+
module ArchiveIO
|
4
|
+
module LibArchive
|
5
|
+
extend FFI::Library
|
6
|
+
|
7
|
+
OK = 0
|
8
|
+
EOF = 1
|
9
|
+
|
10
|
+
ffi_lib ["archive", "libarchive.so.2"]
|
11
|
+
|
12
|
+
attach_function :archive_version_string, [], :string
|
13
|
+
attach_function :archive_read_open_filename, [:pointer, :string, :size_t], :int
|
14
|
+
attach_function :archive_read_support_compression_all, [:pointer], :int
|
15
|
+
attach_function :archive_read_support_format_all, [:pointer], :int
|
16
|
+
attach_function :archive_error_string, [:pointer], :string
|
17
|
+
attach_function :archive_read_new, [], :pointer
|
18
|
+
attach_function :archive_read_finish, [:pointer], :int
|
19
|
+
attach_function :archive_read_header_position, [:pointer], :int
|
20
|
+
attach_function :archive_read_next_header, [:pointer, :pointer], :int
|
21
|
+
attach_function :archive_read_data, [:pointer, :pointer, :size_t], :size_t
|
22
|
+
|
23
|
+
# entry
|
24
|
+
attach_function :archive_entry_new, [], :pointer
|
25
|
+
attach_function :archive_entry_filetype, [:pointer], :mode_t
|
26
|
+
attach_function :archive_entry_pathname, [:pointer], :string
|
27
|
+
|
28
|
+
class NoArchiveError < StandardError; end
|
29
|
+
class Error < StandardError
|
30
|
+
def initialize(archive)
|
31
|
+
super "#{LibArchive::archive_error_string(archive)}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: archive_io
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Artūrs Mekšs
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-08-29 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.9'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.9'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.11'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.11'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.5'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.5'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: nokogiri
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Library which can traverse archived file (using `libarchive` under the
|
98
|
+
hood) and yield io like object on each file entry inside it for further processing.
|
99
|
+
email:
|
100
|
+
- arturs.mekss@gmail.com
|
101
|
+
executables: []
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- ".rspec"
|
107
|
+
- Gemfile
|
108
|
+
- README.md
|
109
|
+
- Rakefile
|
110
|
+
- archive_io.gemspec
|
111
|
+
- bin/console
|
112
|
+
- lib/archive_io.rb
|
113
|
+
- lib/archive_io/archive_reader.rb
|
114
|
+
- lib/archive_io/file_reader.rb
|
115
|
+
- lib/archive_io/header.rb
|
116
|
+
- lib/archive_io/lib_archive.rb
|
117
|
+
- lib/archive_io/version.rb
|
118
|
+
- lib/archive_io/wildcard_pattern.rb
|
119
|
+
homepage: https://github.com/AMekss/archive_io
|
120
|
+
licenses: []
|
121
|
+
metadata: {}
|
122
|
+
post_install_message:
|
123
|
+
rdoc_options: []
|
124
|
+
require_paths:
|
125
|
+
- lib
|
126
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '0'
|
136
|
+
requirements: []
|
137
|
+
rubyforge_project:
|
138
|
+
rubygems_version: 2.5.1
|
139
|
+
signing_key:
|
140
|
+
specification_version: 4
|
141
|
+
summary: Library which can traverse archived file and yields io like object on each
|
142
|
+
file in it.
|
143
|
+
test_files: []
|