archive_io 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 85be778c44e05a1acce8d7da11e8b8d59e3959f7
4
+ data.tar.gz: eeda3b05ca916422564bed68c861f2c28c3f75d9
5
+ SHA512:
6
+ metadata.gz: 93ebdc5b74f7aee6183b7645b71a2985cbc43e7946b7a1467032f420a20a1e708ed35b1bade51ca2459bbb80a9d395859eb568be2d2ff6065109004c97667b56
7
+ data.tar.gz: bf0cc25bc5054e8cbe70bd5d35c41b03e3a9c82202c5edf4d197e920187755120781492421ccec98ab035afae7e9f9ac63f7f18282bfee1af510b9fbd67014dc
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /spec/examples.txt
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in archive_io.gemspec
4
+ gemspec
@@ -0,0 +1,57 @@
1
+ # ArchiveIo
2
+
3
+ Library which can traverse archived file (using [libarchive](http://www.libarchive.org/) under the hood) and yields IO like object on each file entry inside it for further streamline processing.
4
+
5
+ **Note:** [libarchive](http://www.libarchive.org/) have to be pre-installed and available on the host system
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'archive_io'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install archive_io
22
+
23
+ ## Usage
24
+
25
+ Simple usage:
26
+
27
+ ```ruby
28
+ archive = ArchiveIO.open("archive.7z")
29
+ archive.each do |cursor|
30
+ puts cursor.pathname # prints out pathname inside archive
31
+ puts cursor.read(10) # prints out beginning of each file
32
+ end
33
+ archive.close
34
+ ```
35
+
36
+ This library can come in handy if you want to process huge xml files reading straight from the archive without uncompressing it and works nicely together with `Nokogiri::XML::Reader` and can be used as follows:
37
+
38
+ ```ruby
39
+ archive = ArchiveIO.open("archive.7z")
40
+ archive.select("*.xml") do |cursor|
41
+ Nokogiri::XML::Reader(cursor).each do |xml_node|
42
+ # your custom xml processing logic goes here
43
+ end
44
+ end
45
+ archive.close
46
+ ```
47
+
48
+ ## Development
49
+
50
+ After checking out the repo, run `bundle install` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
51
+
52
+ To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
53
+
54
+ ## Contributing
55
+
56
+ Bug reports and pull requests are welcome on GitHub at https://github.com/AMekss/archive_io.
57
+
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'archive_io/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "archive_io"
8
+ spec.version = ArchiveIO::VERSION
9
+ spec.authors = ["Artūrs Mekšs"]
10
+ spec.email = ["arturs.mekss@gmail.com"]
11
+
12
+ spec.summary = %q{Library which can traverse archived file and yields io like object on each file in it.}
13
+ spec.description = %q{Library which can traverse archived file (using `libarchive` under the hood) and yield io like object on each file entry inside it for further processing.}
14
+ spec.homepage = "https://github.com/AMekss/archive_io"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "ffi", "~> 1.9"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.11"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ spec.add_development_dependency "rspec", "~> 3.5"
26
+ spec.add_development_dependency "pry"
27
+ spec.add_development_dependency "nokogiri"
28
+ end
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "archive_io"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ require "pry"
10
+ Pry.start
@@ -0,0 +1,10 @@
1
+ require "archive_io/version"
2
+ require "archive_io/archive_reader"
3
+
4
+ module ArchiveIO
5
+ def self.open(filename)
6
+ reader = ArchiveIO::ArchiveReader.new
7
+ reader.open(filename)
8
+ reader
9
+ end
10
+ end
@@ -0,0 +1,70 @@
1
+ require "archive_io/lib_archive"
2
+ require "archive_io/header"
3
+ require "archive_io/file_reader"
4
+ require "archive_io/wildcard_pattern"
5
+
6
+ module ArchiveIO
7
+ class ArchiveReader
8
+ HEADER_HANDLING_STRATEGIES = {
9
+ LibArchive::OK => -> (pointer) { Header.new(pointer) },
10
+ LibArchive::EOF => -> (_) { nil },
11
+ }.freeze
12
+
13
+ def initialize
14
+ ObjectSpace.define_finalizer(self, &method(:close))
15
+ end
16
+
17
+ def open(filename)
18
+ close
19
+
20
+ @archive = LibArchive::archive_read_new
21
+ @filename = filename
22
+
23
+ assert_operation_valid(LibArchive::archive_read_support_compression_all(archive))
24
+ assert_operation_valid(LibArchive::archive_read_support_format_all(archive))
25
+ assert_operation_valid(LibArchive::archive_read_open_filename(archive, filename, 10240))
26
+ end
27
+
28
+ def each
29
+ raise LibArchive::NoArchiveError, "Archive is not open, call open with filename on archive before this operation" unless archive
30
+
31
+ while header = next_header
32
+ yield FileReader.new(header.pathname, archive) if header.file?
33
+ end
34
+
35
+ open(@filename) #reopen file for the next call
36
+ end
37
+
38
+ def select(pattern)
39
+ wildcard = WildcardPattern.new(pattern)
40
+
41
+ each do |file|
42
+ yield file if wildcard.match?(file.pathname)
43
+ end
44
+ end
45
+
46
+ def close
47
+ LibArchive::archive_read_finish(archive) if archive
48
+ ensure
49
+ @archive = nil
50
+ @filename = nil
51
+ end
52
+
53
+ private
54
+
55
+ attr_reader :archive
56
+
57
+ def header_handler(code)
58
+ HEADER_HANDLING_STRATEGIES.fetch(code) { raise LibArchive::Error, archive }
59
+ end
60
+
61
+ def next_header
62
+ header = LibArchive::archive_entry_new
63
+ header_handler(LibArchive::archive_read_next_header(archive, header)).call(header.read_pointer)
64
+ end
65
+
66
+ def assert_operation_valid(operation_result_code)
67
+ raise LibArchive::Error, archive if operation_result_code != LibArchive::OK
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,23 @@
1
+ require "archive_io/lib_archive"
2
+
3
+ module ArchiveIO
4
+ class FileReader
5
+ attr_reader :pathname
6
+
7
+ def initialize(pathname, archive)
8
+ @pathname = pathname
9
+ @archive = archive
10
+ end
11
+
12
+ def read(size)
13
+ buffer = FFI::MemoryPointer.new(:char, size)
14
+ return nil if LibArchive::archive_read_data(@archive, buffer, size) < 0
15
+ buffer.get_string(0)
16
+ end
17
+
18
+ def close
19
+ # does nothing, purpose of having it is to support IO like interface
20
+ end
21
+ end
22
+ private_constant :FileReader
23
+ end
@@ -0,0 +1,21 @@
1
+ require "archive_io/lib_archive"
2
+
3
+ module ArchiveIO
4
+ class Header
5
+ S_IFMT = 0170000
6
+ S_IFREG = 0100000 # regular file
7
+
8
+ def initialize(pointer)
9
+ @pointer = pointer
10
+ end
11
+
12
+ def file?
13
+ LibArchive::archive_entry_filetype(@pointer) & S_IFMT == S_IFREG
14
+ end
15
+
16
+ def pathname
17
+ LibArchive::archive_entry_pathname(@pointer)
18
+ end
19
+ end
20
+ private_constant :Header
21
+ end
@@ -0,0 +1,35 @@
1
+ require "ffi"
2
+
3
+ module ArchiveIO
4
+ module LibArchive
5
+ extend FFI::Library
6
+
7
+ OK = 0
8
+ EOF = 1
9
+
10
+ ffi_lib ["archive", "libarchive.so.2"]
11
+
12
+ attach_function :archive_version_string, [], :string
13
+ attach_function :archive_read_open_filename, [:pointer, :string, :size_t], :int
14
+ attach_function :archive_read_support_compression_all, [:pointer], :int
15
+ attach_function :archive_read_support_format_all, [:pointer], :int
16
+ attach_function :archive_error_string, [:pointer], :string
17
+ attach_function :archive_read_new, [], :pointer
18
+ attach_function :archive_read_finish, [:pointer], :int
19
+ attach_function :archive_read_header_position, [:pointer], :int
20
+ attach_function :archive_read_next_header, [:pointer, :pointer], :int
21
+ attach_function :archive_read_data, [:pointer, :pointer, :size_t], :size_t
22
+
23
+ # entry
24
+ attach_function :archive_entry_new, [], :pointer
25
+ attach_function :archive_entry_filetype, [:pointer], :mode_t
26
+ attach_function :archive_entry_pathname, [:pointer], :string
27
+
28
+ class NoArchiveError < StandardError; end
29
+ class Error < StandardError
30
+ def initialize(archive)
31
+ super "#{LibArchive::archive_error_string(archive)}"
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,3 @@
1
+ module ArchiveIO
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,12 @@
1
+ module ArchiveIO
2
+ class WildcardPattern
3
+ def initialize(expression)
4
+ escaped = Regexp.escape(expression).gsub('\*','.*?')
5
+ @regex = Regexp.new "^#{escaped}$", Regexp::IGNORECASE
6
+ end
7
+
8
+ def match?(str)
9
+ !!(str =~ @regex)
10
+ end
11
+ end
12
+ end
metadata ADDED
@@ -0,0 +1,143 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: archive_io
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Artūrs Mekšs
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-08-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.11'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.11'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: nokogiri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Library which can traverse archived file (using `libarchive` under the
98
+ hood) and yield io like object on each file entry inside it for further processing.
99
+ email:
100
+ - arturs.mekss@gmail.com
101
+ executables: []
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - ".gitignore"
106
+ - ".rspec"
107
+ - Gemfile
108
+ - README.md
109
+ - Rakefile
110
+ - archive_io.gemspec
111
+ - bin/console
112
+ - lib/archive_io.rb
113
+ - lib/archive_io/archive_reader.rb
114
+ - lib/archive_io/file_reader.rb
115
+ - lib/archive_io/header.rb
116
+ - lib/archive_io/lib_archive.rb
117
+ - lib/archive_io/version.rb
118
+ - lib/archive_io/wildcard_pattern.rb
119
+ homepage: https://github.com/AMekss/archive_io
120
+ licenses: []
121
+ metadata: {}
122
+ post_install_message:
123
+ rdoc_options: []
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ required_rubygems_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ requirements: []
137
+ rubyforge_project:
138
+ rubygems_version: 2.5.1
139
+ signing_key:
140
+ specification_version: 4
141
+ summary: Library which can traverse archived file and yields io like object on each
142
+ file in it.
143
+ test_files: []