archive_io 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 85be778c44e05a1acce8d7da11e8b8d59e3959f7
4
+ data.tar.gz: eeda3b05ca916422564bed68c861f2c28c3f75d9
5
+ SHA512:
6
+ metadata.gz: 93ebdc5b74f7aee6183b7645b71a2985cbc43e7946b7a1467032f420a20a1e708ed35b1bade51ca2459bbb80a9d395859eb568be2d2ff6065109004c97667b56
7
+ data.tar.gz: bf0cc25bc5054e8cbe70bd5d35c41b03e3a9c82202c5edf4d197e920187755120781492421ccec98ab035afae7e9f9ac63f7f18282bfee1af510b9fbd67014dc
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /spec/examples.txt
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in archive_io.gemspec
4
+ gemspec
@@ -0,0 +1,57 @@
1
+ # ArchiveIo
2
+
3
+ Library which can traverse archived file (using [libarchive](http://www.libarchive.org/) under the hood) and yields IO like object on each file entry inside it for further streamline processing.
4
+
5
+ **Note:** [libarchive](http://www.libarchive.org/) have to be pre-installed and available on the host system
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'archive_io'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install archive_io
22
+
23
+ ## Usage
24
+
25
+ Simple usage:
26
+
27
+ ```ruby
28
+ archive = ArchiveIO.open("archive.7z")
29
+ archive.each do |cursor|
30
+ puts cursor.pathname # prints out pathname inside archive
31
+ puts cursor.read(10) # prints out beginning of each file
32
+ end
33
+ archive.close
34
+ ```
35
+
36
+ This library can come in handy if you want to process huge xml files reading straight from the archive without uncompressing it and works nicely together with `Nokogiri::XML::Reader` and can be used as follows:
37
+
38
+ ```ruby
39
+ archive = ArchiveIO.open("archive.7z")
40
+ archive.select("*.xml") do |cursor|
41
+ Nokogiri::XML::Reader(cursor).each do |xml_node|
42
+ # your custom xml processing logic goes here
43
+ end
44
+ end
45
+ archive.close
46
+ ```
47
+
48
+ ## Development
49
+
50
+ After checking out the repo, run `bundle install` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
51
+
52
+ To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
53
+
54
+ ## Contributing
55
+
56
+ Bug reports and pull requests are welcome on GitHub at https://github.com/AMekss/archive_io.
57
+
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'archive_io/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "archive_io"
8
+ spec.version = ArchiveIO::VERSION
9
+ spec.authors = ["Artūrs Mekšs"]
10
+ spec.email = ["arturs.mekss@gmail.com"]
11
+
12
+ spec.summary = %q{Library which can traverse archived file and yields io like object on each file in it.}
13
+ spec.description = %q{Library which can traverse archived file (using `libarchive` under the hood) and yield io like object on each file entry inside it for further processing.}
14
+ spec.homepage = "https://github.com/AMekss/archive_io"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "ffi", "~> 1.9"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.11"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ spec.add_development_dependency "rspec", "~> 3.5"
26
+ spec.add_development_dependency "pry"
27
+ spec.add_development_dependency "nokogiri"
28
+ end
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "archive_io"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ require "pry"
10
+ Pry.start
@@ -0,0 +1,10 @@
1
+ require "archive_io/version"
2
+ require "archive_io/archive_reader"
3
+
4
+ module ArchiveIO
5
+ def self.open(filename)
6
+ reader = ArchiveIO::ArchiveReader.new
7
+ reader.open(filename)
8
+ reader
9
+ end
10
+ end
@@ -0,0 +1,70 @@
1
+ require "archive_io/lib_archive"
2
+ require "archive_io/header"
3
+ require "archive_io/file_reader"
4
+ require "archive_io/wildcard_pattern"
5
+
6
+ module ArchiveIO
7
+ class ArchiveReader
8
+ HEADER_HANDLING_STRATEGIES = {
9
+ LibArchive::OK => -> (pointer) { Header.new(pointer) },
10
+ LibArchive::EOF => -> (_) { nil },
11
+ }.freeze
12
+
13
+ def initialize
14
+ ObjectSpace.define_finalizer(self, &method(:close))
15
+ end
16
+
17
+ def open(filename)
18
+ close
19
+
20
+ @archive = LibArchive::archive_read_new
21
+ @filename = filename
22
+
23
+ assert_operation_valid(LibArchive::archive_read_support_compression_all(archive))
24
+ assert_operation_valid(LibArchive::archive_read_support_format_all(archive))
25
+ assert_operation_valid(LibArchive::archive_read_open_filename(archive, filename, 10240))
26
+ end
27
+
28
+ def each
29
+ raise LibArchive::NoArchiveError, "Archive is not open, call open with filename on archive before this operation" unless archive
30
+
31
+ while header = next_header
32
+ yield FileReader.new(header.pathname, archive) if header.file?
33
+ end
34
+
35
+ open(@filename) #reopen file for the next call
36
+ end
37
+
38
+ def select(pattern)
39
+ wildcard = WildcardPattern.new(pattern)
40
+
41
+ each do |file|
42
+ yield file if wildcard.match?(file.pathname)
43
+ end
44
+ end
45
+
46
+ def close
47
+ LibArchive::archive_read_finish(archive) if archive
48
+ ensure
49
+ @archive = nil
50
+ @filename = nil
51
+ end
52
+
53
+ private
54
+
55
+ attr_reader :archive
56
+
57
+ def header_handler(code)
58
+ HEADER_HANDLING_STRATEGIES.fetch(code) { raise LibArchive::Error, archive }
59
+ end
60
+
61
+ def next_header
62
+ header = LibArchive::archive_entry_new
63
+ header_handler(LibArchive::archive_read_next_header(archive, header)).call(header.read_pointer)
64
+ end
65
+
66
+ def assert_operation_valid(operation_result_code)
67
+ raise LibArchive::Error, archive if operation_result_code != LibArchive::OK
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,23 @@
1
+ require "archive_io/lib_archive"
2
+
3
+ module ArchiveIO
4
+ class FileReader
5
+ attr_reader :pathname
6
+
7
+ def initialize(pathname, archive)
8
+ @pathname = pathname
9
+ @archive = archive
10
+ end
11
+
12
+ def read(size)
13
+ buffer = FFI::MemoryPointer.new(:char, size)
14
+ return nil if LibArchive::archive_read_data(@archive, buffer, size) < 0
15
+ buffer.get_string(0)
16
+ end
17
+
18
+ def close
19
+ # does nothing, purpose of having it is to support IO like interface
20
+ end
21
+ end
22
+ private_constant :FileReader
23
+ end
@@ -0,0 +1,21 @@
1
+ require "archive_io/lib_archive"
2
+
3
+ module ArchiveIO
4
+ class Header
5
+ S_IFMT = 0170000
6
+ S_IFREG = 0100000 # regular file
7
+
8
+ def initialize(pointer)
9
+ @pointer = pointer
10
+ end
11
+
12
+ def file?
13
+ LibArchive::archive_entry_filetype(@pointer) & S_IFMT == S_IFREG
14
+ end
15
+
16
+ def pathname
17
+ LibArchive::archive_entry_pathname(@pointer)
18
+ end
19
+ end
20
+ private_constant :Header
21
+ end
@@ -0,0 +1,35 @@
1
+ require "ffi"
2
+
3
+ module ArchiveIO
4
+ module LibArchive
5
+ extend FFI::Library
6
+
7
+ OK = 0
8
+ EOF = 1
9
+
10
+ ffi_lib ["archive", "libarchive.so.2"]
11
+
12
+ attach_function :archive_version_string, [], :string
13
+ attach_function :archive_read_open_filename, [:pointer, :string, :size_t], :int
14
+ attach_function :archive_read_support_compression_all, [:pointer], :int
15
+ attach_function :archive_read_support_format_all, [:pointer], :int
16
+ attach_function :archive_error_string, [:pointer], :string
17
+ attach_function :archive_read_new, [], :pointer
18
+ attach_function :archive_read_finish, [:pointer], :int
19
+ attach_function :archive_read_header_position, [:pointer], :int
20
+ attach_function :archive_read_next_header, [:pointer, :pointer], :int
21
+ attach_function :archive_read_data, [:pointer, :pointer, :size_t], :size_t
22
+
23
+ # entry
24
+ attach_function :archive_entry_new, [], :pointer
25
+ attach_function :archive_entry_filetype, [:pointer], :mode_t
26
+ attach_function :archive_entry_pathname, [:pointer], :string
27
+
28
+ class NoArchiveError < StandardError; end
29
+ class Error < StandardError
30
+ def initialize(archive)
31
+ super "#{LibArchive::archive_error_string(archive)}"
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,3 @@
1
+ module ArchiveIO
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,12 @@
1
+ module ArchiveIO
2
+ class WildcardPattern
3
+ def initialize(expression)
4
+ escaped = Regexp.escape(expression).gsub('\*','.*?')
5
+ @regex = Regexp.new "^#{escaped}$", Regexp::IGNORECASE
6
+ end
7
+
8
+ def match?(str)
9
+ !!(str =~ @regex)
10
+ end
11
+ end
12
+ end
metadata ADDED
@@ -0,0 +1,143 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: archive_io
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Artūrs Mekšs
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-08-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.11'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.11'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: nokogiri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Library which can traverse archived file (using `libarchive` under the
98
+ hood) and yield io like object on each file entry inside it for further processing.
99
+ email:
100
+ - arturs.mekss@gmail.com
101
+ executables: []
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - ".gitignore"
106
+ - ".rspec"
107
+ - Gemfile
108
+ - README.md
109
+ - Rakefile
110
+ - archive_io.gemspec
111
+ - bin/console
112
+ - lib/archive_io.rb
113
+ - lib/archive_io/archive_reader.rb
114
+ - lib/archive_io/file_reader.rb
115
+ - lib/archive_io/header.rb
116
+ - lib/archive_io/lib_archive.rb
117
+ - lib/archive_io/version.rb
118
+ - lib/archive_io/wildcard_pattern.rb
119
+ homepage: https://github.com/AMekss/archive_io
120
+ licenses: []
121
+ metadata: {}
122
+ post_install_message:
123
+ rdoc_options: []
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ required_rubygems_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ requirements: []
137
+ rubyforge_project:
138
+ rubygems_version: 2.5.1
139
+ signing_key:
140
+ specification_version: 4
141
+ summary: Library which can traverse archived file and yields io like object on each
142
+ file in it.
143
+ test_files: []