xsv 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fde081e219b3d4ae14aaa9d29de699e67df263ff0d8bfa1ffc10362099dc8eb
4
- data.tar.gz: f2cd6e1ae40a06bcac62368a26f9cf09157d5394bbb7f9ab34473a90be729d37
3
+ metadata.gz: 14550b4494b57e9363e5b189222e242177e9c1328377d5458bb475231a6726b7
4
+ data.tar.gz: bddb0f2b94dcd67563f8d1b5525c6dffae2f3718fd08d4800e9e09ac21c57d59
5
5
  SHA512:
6
- metadata.gz: 24a1070748166f3d18fffced8b310825efa72c121b4226ce588686c18eb07f80a8c328848b7f1c0915b4c002ea656916db263441dd2a442a60dd828e4ba8ccde
7
- data.tar.gz: 01530b8f31bc60aafbeb36a563faff5958bf98577ae3a85e527e217c0c988a5b5d4ef7e614811cdd590402312835b7f60c50c0a94c4a77bc0543729e41c6e826
6
+ metadata.gz: 38a39e5b4484214a1a530ddb5f5add47d56c3cff4df1b20e519bc4712e693c1ab6cca6270779f3dd24c7e019835f7b98daccc16f42fc5f905a66fe02623323c0
7
+ data.tar.gz: b60b28263fcf0352f85b254a61866463dca7ac7e077314ff1e3584ccfd6b90ccd03d7edf8e33f3f63441db17fa15f2ed7139cb4f55ae7c912a555ea5ff008e39
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ # Xsv Changelog
2
+
3
+ ## 0.3.3
4
+
5
+ Intial version with a changelog and reasonably complete YARD documentation.
data/Gemfile.lock CHANGED
@@ -10,7 +10,7 @@ GEM
10
10
  specs:
11
11
  minitest (5.14.0)
12
12
  ox (2.13.2)
13
- rake (10.5.0)
13
+ rake (13.0.1)
14
14
  rubyzip (2.2.0)
15
15
 
16
16
  PLATFORMS
@@ -19,7 +19,7 @@ PLATFORMS
19
19
  DEPENDENCIES
20
20
  bundler (~> 1.17)
21
21
  minitest (~> 5.0)
22
- rake (~> 10.0)
22
+ rake (~> 13.0)
23
23
  xsv!
24
24
 
25
25
  BUNDLED WITH
data/Rakefile CHANGED
@@ -2,9 +2,16 @@ require "bundler/gem_tasks"
2
2
  require "rake/testtask"
3
3
 
4
4
  Rake::TestTask.new(:test) do |t|
5
- t.libs << "test"
6
5
  t.libs << "lib"
6
+ t.libs << "test"
7
7
  t.test_files = FileList["test/**/*_test.rb"]
8
8
  end
9
9
 
10
- task :default => :test
10
+ Rake::TestTask.new(:bench) do |t|
11
+ t.libs << "lib"
12
+ t.libs << "test"
13
+ t.test_files = FileList["test/**/*_benchmark.rb"]
14
+ end
15
+
16
+ task :default => [:test, :bench]
17
+
data/lib/xsv/helpers.rb CHANGED
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
3
  module Helpers
4
+ # The default OOXML Spreadheet number formats according to the ECMA standard
5
+ # User formats are appended from index 174 onward
4
6
  BUILT_IN_NUMBER_FORMATS = {
5
7
  1 => "0",
6
8
  2 => "0.00",
@@ -38,13 +40,15 @@ module Xsv
38
40
  MINUTE = 60.freeze
39
41
  HOUR = 3600.freeze
40
42
  A_CODEPOINT = 'A'.ord.freeze
43
+ # The epoch for all dates in OOXML Spreadsheet documents
41
44
  EPOCH = Date.new(1899, 12, 30).freeze
42
45
 
43
- # Return the index number for the given Excel column name
46
+ # Return the index number for the given Excel column name (i.e. "A1" => 0)
44
47
  def column_index(col)
45
- col = col[/^[A-Z]+/]
46
-
47
- col.each_codepoint.reduce(0) { |sum, n| sum * 26 + (n - A_CODEPOINT + 1) } - 1
48
+ col.each_codepoint.reduce(0) do |sum, n|
49
+ break sum - 1 if n < A_CODEPOINT # reached a number
50
+ sum * 26 + (n - A_CODEPOINT + 1)
51
+ end
48
52
  end
49
53
 
50
54
  # Return a Date for the given Excel date value
@@ -73,6 +77,7 @@ module Xsv
73
77
  "%02d:%02d" % [hours, minutes]
74
78
  end
75
79
 
80
+ # Returns a time including a date as a {Time} object
76
81
  def parse_datetime(number)
77
82
  date_base = number.truncate
78
83
  time = parse_date(date_base).to_time
@@ -85,6 +90,7 @@ module Xsv
85
90
  time + hours * HOUR + minutes.round * MINUTE
86
91
  end
87
92
 
93
+ # Returns a number as either Integer or Float
88
94
  def parse_number(string)
89
95
  if string.include? "."
90
96
  string.to_f
@@ -93,6 +99,7 @@ module Xsv
93
99
  end
94
100
  end
95
101
 
102
+ # Apply date or time number formats, if applicable
96
103
  def parse_number_format(number, format)
97
104
  number = parse_number(number) if number.is_a?(String)
98
105
 
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
+ # Interpret the sharedStrings.xml file from the workbook
4
+ # This is used internally when opening a sheet.
3
5
  class SharedStringsParser < Ox::Sax
4
6
  def self.parse(io)
5
7
  strings = []
data/lib/xsv/sheet.rb CHANGED
@@ -1,14 +1,34 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
+ # Sheet represents a single worksheet from a workbook and is normally accessed through {Workbook#sheets}
4
+ #
5
+ # Xsv is designed for worksheets with a single table of data, optionally with a header row. Because sheet implements
6
+ # {Enumerable} the rows in the worksheet can be iterated over using methods such as `#each` and `#map`
7
+ #
8
+ # By default Sheet will return rows as arrays. But by calling the {#parse_headers!} method the first row of the sheet
9
+ # will be parsed and Sheet will switch to hash mode, returning each row as a hash with the values from the first
10
+ # row as keys.
11
+ #
12
+ # If the sheet contains leading data before the first row of data or the header row, this can be skipped by setting the
13
+ # {row_skip} attribute.
3
14
  class Sheet
4
15
  include Enumerable
5
16
  include Xsv::Helpers
6
17
 
18
+ # Returns the current mode. Call {#parse_headers!} to switch to `:hash` mode
19
+ # @return [Symbol] `:hash` or `:array`
7
20
  attr_reader :mode
8
21
 
9
- # Set a number of rows to skip at the top of the sheet (header row offset)
22
+ # Set a number of rows to skip at the top of the sheet (header row offset).
23
+ # For hash mode, do not skip the header row as this will be automatically
24
+ # skipped.
10
25
  attr_accessor :row_skip
11
26
 
27
+ # Create a new instance of Sheet. This is used internally by the {Workbook}.
28
+ # There is no need to create Sheets from application code.
29
+ #
30
+ # @param workbook [Workbook] The Workbook with shared data such as shared strings and styles
31
+ # @param io [IO] A handle to an open worksheet XML file
12
32
  def initialize(workbook, io)
13
33
  @workbook = workbook
14
34
  @io = io
@@ -19,11 +39,12 @@ module Xsv
19
39
  @last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
20
40
  end
21
41
 
42
+ # @return [String]
22
43
  def inspect
23
44
  "#<#{self.class.name}:#{self.object_id}>"
24
45
  end
25
46
 
26
- # Iterate over rows
47
+ # Iterate over rows, returning either hashes or arrays based on the current mode.
27
48
  def each_row
28
49
  @io.rewind
29
50
 
@@ -38,7 +59,8 @@ module Xsv
38
59
 
39
60
  alias each each_row
40
61
 
41
- # Get row by number, starting at 0
62
+ # Get row by number, starting at 0. Returns either a hash or an array based on the current row.
63
+ # If the specified index is out of bounds an empty row is returned.
42
64
  def [](number)
43
65
  each_with_index do |row, i|
44
66
  return row if i == number
@@ -49,6 +71,7 @@ module Xsv
49
71
 
50
72
  # Load headers in the top row of the worksheet. After parsing of headers
51
73
  # all methods return hashes instead of arrays
74
+ # @return [true]
52
75
  def parse_headers!
53
76
  @headers = parse_headers
54
77
  @mode = :hash
@@ -56,6 +79,7 @@ module Xsv
56
79
  true
57
80
  end
58
81
 
82
+ # Return the headers of the sheet as an array
59
83
  def headers
60
84
  if @headers.any?
61
85
  @headers
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
- # SheetBoundsHandler scans a sheet looking for the outer bounds of the content within
3
+ # SheetBoundsHandler scans a sheet looking for the outer bounds of the content within.
4
+ # This is used internally when opening a sheet to deal with worksheets that do not
5
+ # have a correct dimension tag.
4
6
  class SheetBoundsHandler < Ox::Sax
5
7
  include Xsv::Helpers
6
8
 
@@ -1,9 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
+ # This is the core worksheet parser, implemented as an Ox::Sax handler. This is
4
+ # used internally to enumerate rows.
3
5
  class SheetRowsHandler < Ox::Sax
4
6
  include Xsv::Helpers
5
7
 
6
8
  def format_cell
9
+ return nil if @current_value.empty?
10
+
7
11
  case @current_cell[:t]
8
12
  when "s"
9
13
  @workbook.shared_strings[@current_value.to_i]
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
3
  # StylesHandler interprets the relevant parts of styles.xml
4
+ # This is used internally when opening a sheet.
4
5
  class StylesHandler < Ox::Sax
5
6
  def self.get_styles(io, numFmts)
6
7
  @xfs = nil
data/lib/xsv/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
- VERSION = "0.3.2"
3
+ VERSION = "0.3.3"
4
4
  end
data/lib/xsv/workbook.rb CHANGED
@@ -2,11 +2,18 @@
2
2
  require 'zip'
3
3
 
4
4
  module Xsv
5
+ # An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
6
+ # multiple Sheets that are available in the array that's accessible through {#sheets}
5
7
  class Workbook
6
8
 
7
- attr_reader :sheets, :shared_strings, :xfs, :numFmts, :trim_empty_rows
9
+ # Access the Sheet objects contained in the workbook
10
+ # @return [Array<Sheet>]
11
+ attr_reader :sheets
8
12
 
9
- # Open the workbook of the given filename, string or buffer
13
+ attr_reader :shared_strings, :xfs, :numFmts, :trim_empty_rows
14
+
15
+ # Open the workbook of the given filename, string or buffer. For additional
16
+ # options see {.initialize}
10
17
  def self.open(data, **kws)
11
18
  if data.is_a?(IO)
12
19
  @workbook = self.new(Zip::File.open_buffer(data), **kws)
@@ -17,7 +24,8 @@ module Xsv
17
24
  end
18
25
  end
19
26
 
20
- # Open a workbook from an instance of Zip::File
27
+ # Open a workbook from an instance of {Zip::File}. Generally it's recommended
28
+ # to use the {.open} method instead of the constructor.
21
29
  #
22
30
  # Options:
23
31
  #
@@ -36,10 +44,23 @@ module Xsv
36
44
  fetch_sheets
37
45
  end
38
46
 
47
+ # @return [String]
39
48
  def inspect
40
49
  "#<#{self.class.name}:#{self.object_id}>"
41
50
  end
42
51
 
52
+ # Close the handle to the workbook file and leave all resources for the GC to collect
53
+ # @return [true]
54
+ def close
55
+ @zip.close
56
+ @sheets = nil
57
+ @xfs = nil
58
+ @numFmts = nil
59
+ @shared_strings = nil
60
+
61
+ true
62
+ end
63
+
43
64
  private
44
65
 
45
66
  def fetch_shared_strings
data/lib/xsv.rb CHANGED
@@ -11,6 +11,10 @@ require "xsv/styles_handler"
11
11
  require "xsv/version"
12
12
  require "xsv/workbook"
13
13
 
14
+ # XSV is a fast, lightweight parser for Office Open XML spreadsheet files
15
+ # (commonly known as Excel or .xlsx files). It strives to be minimal in the
16
+ # sense that it provides nothing a CSV reader wouldn't, meaning it only
17
+ # deals with minimal formatting and cannot create or modify documents.
14
18
  module Xsv
15
19
  class Error < StandardError; end
16
20
  # An AssertionFailed error indicates an unexpected condition, meaning a bug
data/xsv.gemspec CHANGED
@@ -9,7 +9,13 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Martijn Storck"]
10
10
  spec.email = ["martijn@storck.io"]
11
11
 
12
- spec.summary = "Minimal xlsx parser that provides nothing a CSV parser wouldn't"
12
+ spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
13
+ spec.description = <<-EOF
14
+ XSV is a fast, lightweight parser for Office Open XML spreadsheet files
15
+ (commonly known as Excel or .xlsx files). It strives to be minimal in the
16
+ sense that it provides nothing a CSV reader wouldn't, meaning it only
17
+ deals with minimal formatting and cannot create or modify documents.
18
+ EOF
13
19
  spec.homepage = "https://github.com/martijn/xsv"
14
20
  spec.license = "MIT"
15
21
 
@@ -37,6 +43,6 @@ Gem::Specification.new do |spec|
37
43
  spec.add_dependency "ox", "~> 2.13"
38
44
 
39
45
  spec.add_development_dependency "bundler", "~> 1.17"
40
- spec.add_development_dependency "rake", "~> 10.0"
46
+ spec.add_development_dependency "rake", "~> 13.0"
41
47
  spec.add_development_dependency "minitest", "~> 5.0"
42
48
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-23 00:00:00.000000000 Z
11
+ date: 2020-03-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '10.0'
61
+ version: '13.0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '10.0'
68
+ version: '13.0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: minitest
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -80,7 +80,11 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '5.0'
83
- description:
83
+ description: |2
84
+ XSV is a fast, lightweight parser for Office Open XML spreadsheet files
85
+ (commonly known as Excel or .xlsx files). It strives to be minimal in the
86
+ sense that it provides nothing a CSV reader wouldn't, meaning it only
87
+ deals with minimal formatting and cannot create or modify documents.
84
88
  email:
85
89
  - martijn@storck.io
86
90
  executables: []
@@ -89,6 +93,7 @@ extra_rdoc_files: []
89
93
  files:
90
94
  - ".gitignore"
91
95
  - ".travis.yml"
96
+ - CHANGELOG.md
92
97
  - Gemfile
93
98
  - Gemfile.lock
94
99
  - LICENSE.txt
@@ -132,5 +137,5 @@ requirements: []
132
137
  rubygems_version: 3.1.2
133
138
  signing_key:
134
139
  specification_version: 4
135
- summary: Minimal xlsx parser that provides nothing a CSV parser wouldn't
140
+ summary: A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't
136
141
  test_files: []