xsv 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9a277b15ef0d08bd690fad89ee2fde1d0e884d62aacd4f6ee1dc61863939ed17
4
+ data.tar.gz: 7f2b8583392bef4741108c843700e1f8f52dec66fc4ec3f046c5725e0ebd63ff
5
+ SHA512:
6
+ metadata.gz: 0a5d416c0c18876aef6473c8b6f181048ec04f892edd4eff6e8d02728bbb6fcf4251048abca8d80f6af342d4e266ba6167f0b5046b31b95c70866c0abe273dd4
7
+ data.tar.gz: 5d331bb4501e262fbd17391b949ba28678224fb738601c79c514f1eae9274286c38f75ab7b824e235d12fae6f20c8e11df5b002638191af783ec9b45ef2cc535
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.6.3
7
+ before_install: gem install bundler -v 1.17.2
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in xsv.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,28 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ xsv (0.1.0)
5
+ nokogiri (~> 1.10.8)
6
+ rubyzip (~> 2.2.0)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ mini_portile2 (2.4.0)
12
+ minitest (5.14.0)
13
+ nokogiri (1.10.8)
14
+ mini_portile2 (~> 2.4.0)
15
+ rake (10.5.0)
16
+ rubyzip (2.2.0)
17
+
18
+ PLATFORMS
19
+ ruby
20
+
21
+ DEPENDENCIES
22
+ bundler (~> 1.17)
23
+ minitest (~> 5.0)
24
+ rake (~> 10.0)
25
+ xsv!
26
+
27
+ BUNDLED WITH
28
+ 1.17.2
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Martijn Storck
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # Xsv .xlsx reader
2
+
3
+ Xsv is a very basic parser for Excel files in the .xlsx format that strives to
4
+ provide feature parity with common CSV readers and nothing more. This should
5
+ allow for fast parsing of large worksheets with minimal RAM and CPU consumption.
6
+
7
+ Xsv stands for 'Excel Separated Values' because Excel just gets in the way.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ ```ruby
14
+ gem 'xsv'
15
+ ```
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ Or install it yourself as:
22
+
23
+ $ gem install xsv
24
+
25
+ ## Usage
26
+
27
+ ```ruby
28
+ x = Xsv::File.new("sheet.xlsx")
29
+
30
+ x.sheets[0].each_row(read_headers: true) do |row|
31
+ row # => { "header1" => "value1", "header2", "value2" }
32
+ end
33
+ j
34
+ x.sheets[0].each_row do |row|
35
+ row # => ["header1", "header2"]
36
+ end
37
+ ```
38
+
39
+ ## Development
40
+
41
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
42
+
43
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
44
+
45
+ ## Contributing
46
+
47
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/xsv.
48
+
49
+ ## License
50
+
51
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/*_test.rb"]
8
+ end
9
+
10
+ task :default => :test
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "xsv"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/lib/xsv/file.rb ADDED
@@ -0,0 +1,43 @@
1
+ require 'nokogiri'
2
+ require 'zip'
3
+
4
+ module Xsv
5
+ class File
6
+
7
+ attr_reader :sheets, :shared_strings
8
+
9
+ def initialize(file)
10
+ @zip = Zip::File.open(file)
11
+ @sheets = []
12
+ fetch_shared_strings
13
+ fetch_sheets
14
+ end
15
+
16
+ def inspect
17
+ "#<#{self.class.name}:#{self.object_id}>"
18
+ end
19
+
20
+ private
21
+
22
+ def fetch_shared_strings
23
+ stream = @zip.glob("xl/sharedStrings.xml").first.get_input_stream
24
+ xml = Nokogiri::XML(stream)
25
+ expected_count = xml.css("sst").first["uniqueCount"].to_i
26
+ @shared_strings = xml.css("sst si t").map(&:inner_text)
27
+
28
+ if @shared_strings.count != expected_count
29
+ raise Xsv::Error, "Mismatch in shared strings count! #{expected_count} <> #{@shared_strings.count}"
30
+ end
31
+
32
+ stream.close
33
+ end
34
+
35
+ def fetch_sheets
36
+ @zip.glob("xl/worksheets/sheet*.xml").sort do |entry|
37
+ entry.name.scan(/\d+/).first.to_i
38
+ end.each do |entry|
39
+ @sheets << Xsv::Sheet.new(self, Nokogiri::XML(entry.get_input_stream))
40
+ end
41
+ end
42
+ end
43
+ end
data/lib/xsv/sheet.rb ADDED
@@ -0,0 +1,58 @@
1
+ module Xsv
2
+ class Sheet
3
+ attr_reader :xml
4
+
5
+ def initialize(sheet, xml)
6
+ @sheet = sheet
7
+ @xml = xml
8
+ @headers = []
9
+ end
10
+
11
+ def each_row(read_headers: false)
12
+ if read_headers
13
+ @headers = parse_row(@xml.css("sheetData row").first)
14
+ end
15
+
16
+ @xml.css("sheetData row").each do |row_xml|
17
+ yield(parse_row(row_xml))
18
+ end
19
+
20
+ true
21
+ end
22
+
23
+ private
24
+
25
+ def parse_row(xml)
26
+ if @headers.any?
27
+ row = {}
28
+ else
29
+ row = []
30
+ end
31
+
32
+ xml.css("c").each_with_index do |c_xml, i|
33
+ next if @headers.any? && i == 0
34
+
35
+ value = case c_xml["t"]
36
+ when "s"
37
+ @sheet.shared_strings[c_xml.css("v").inner_text.to_i]
38
+ when "str"
39
+ c_xml.css("v").inner_text
40
+ when "e" # N/A
41
+ nil
42
+ when nil
43
+ c_xml.css("v").inner_text.to_i
44
+ else
45
+ raise Xsv::Error, "Encountered unknown column type #{c_xml["t"]}"
46
+ end
47
+
48
+ if @headers.any?
49
+ row[@headers[i]] = value
50
+ else
51
+ row << value
52
+ end
53
+ end
54
+
55
+ row
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,3 @@
1
+ module Xsv
2
+ VERSION = "0.1.0"
3
+ end
data/lib/xsv.rb ADDED
@@ -0,0 +1,7 @@
1
+ require "xsv/file"
2
+ require "xsv/sheet"
3
+ require "xsv/version"
4
+
5
+ module Xsv
6
+ class Error < StandardError; end
7
+ end
data/xsv.gemspec ADDED
@@ -0,0 +1,40 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "xsv/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "xsv"
8
+ spec.version = Xsv::VERSION
9
+ spec.authors = ["Martijn Storck"]
10
+ spec.email = ["martijn@storck.io"]
11
+
12
+ spec.summary = "Minimal xlsx parser that provides nothing a CSV parser wouldn't"
13
+ spec.homepage = "https://github.com/martijn/xsv"
14
+ spec.license = "MIT"
15
+
16
+ if spec.respond_to?(:metadata)
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = "https://github.com/martijn/xsv"
19
+ spec.metadata["changelog_uri"] = "https://github.com/martijn/xsv/CHANGELOG.md"
20
+ else
21
+ raise "RubyGems 2.0 or newer is required to protect against " \
22
+ "public gem pushes."
23
+ end
24
+
25
+ # Specify which files should be added to the gem when it is released.
26
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
27
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
28
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
29
+ end
30
+ spec.bindir = "exe"
31
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
+ spec.require_paths = ["lib"]
33
+
34
+ spec.add_dependency "rubyzip", "~> 2.2.0"
35
+ spec.add_dependency "nokogiri", "~> 1.10.8"
36
+
37
+ spec.add_development_dependency "bundler", "~> 1.17"
38
+ spec.add_development_dependency "rake", "~> 10.0"
39
+ spec.add_development_dependency "minitest", "~> 5.0"
40
+ end
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xsv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Martijn Storck
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-02-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rubyzip
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 2.2.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 2.2.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.10.8
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.10.8
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.17'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.17'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '5.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '5.0'
83
+ description:
84
+ email:
85
+ - martijn@storck.io
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".travis.yml"
92
+ - Gemfile
93
+ - Gemfile.lock
94
+ - LICENSE.txt
95
+ - README.md
96
+ - Rakefile
97
+ - bin/console
98
+ - bin/setup
99
+ - lib/xsv.rb
100
+ - lib/xsv/file.rb
101
+ - lib/xsv/sheet.rb
102
+ - lib/xsv/version.rb
103
+ - xsv.gemspec
104
+ homepage: https://github.com/martijn/xsv
105
+ licenses:
106
+ - MIT
107
+ metadata:
108
+ homepage_uri: https://github.com/martijn/xsv
109
+ source_code_uri: https://github.com/martijn/xsv
110
+ changelog_uri: https://github.com/martijn/xsv/CHANGELOG.md
111
+ post_install_message:
112
+ rdoc_options: []
113
+ require_paths:
114
+ - lib
115
+ required_ruby_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ required_rubygems_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ requirements: []
126
+ rubygems_version: 3.0.3
127
+ signing_key:
128
+ specification_version: 4
129
+ summary: Minimal xlsx parser that provides nothing a CSV parser wouldn't
130
+ test_files: []