xsv 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fde081e219b3d4ae14aaa9d29de699e67df263ff0d8bfa1ffc10362099dc8eb
4
- data.tar.gz: f2cd6e1ae40a06bcac62368a26f9cf09157d5394bbb7f9ab34473a90be729d37
3
+ metadata.gz: 14550b4494b57e9363e5b189222e242177e9c1328377d5458bb475231a6726b7
4
+ data.tar.gz: bddb0f2b94dcd67563f8d1b5525c6dffae2f3718fd08d4800e9e09ac21c57d59
5
5
  SHA512:
6
- metadata.gz: 24a1070748166f3d18fffced8b310825efa72c121b4226ce588686c18eb07f80a8c328848b7f1c0915b4c002ea656916db263441dd2a442a60dd828e4ba8ccde
7
- data.tar.gz: 01530b8f31bc60aafbeb36a563faff5958bf98577ae3a85e527e217c0c988a5b5d4ef7e614811cdd590402312835b7f60c50c0a94c4a77bc0543729e41c6e826
6
+ metadata.gz: 38a39e5b4484214a1a530ddb5f5add47d56c3cff4df1b20e519bc4712e693c1ab6cca6270779f3dd24c7e019835f7b98daccc16f42fc5f905a66fe02623323c0
7
+ data.tar.gz: b60b28263fcf0352f85b254a61866463dca7ac7e077314ff1e3584ccfd6b90ccd03d7edf8e33f3f63441db17fa15f2ed7139cb4f55ae7c912a555ea5ff008e39
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ # Xsv Changelog
2
+
3
+ ## 0.3.3
4
+
5
+ Intial version with a changelog and reasonably complete YARD documentation.
data/Gemfile.lock CHANGED
@@ -10,7 +10,7 @@ GEM
10
10
  specs:
11
11
  minitest (5.14.0)
12
12
  ox (2.13.2)
13
- rake (10.5.0)
13
+ rake (13.0.1)
14
14
  rubyzip (2.2.0)
15
15
 
16
16
  PLATFORMS
@@ -19,7 +19,7 @@ PLATFORMS
19
19
  DEPENDENCIES
20
20
  bundler (~> 1.17)
21
21
  minitest (~> 5.0)
22
- rake (~> 10.0)
22
+ rake (~> 13.0)
23
23
  xsv!
24
24
 
25
25
  BUNDLED WITH
data/Rakefile CHANGED
@@ -2,9 +2,16 @@ require "bundler/gem_tasks"
2
2
  require "rake/testtask"
3
3
 
4
4
  Rake::TestTask.new(:test) do |t|
5
- t.libs << "test"
6
5
  t.libs << "lib"
6
+ t.libs << "test"
7
7
  t.test_files = FileList["test/**/*_test.rb"]
8
8
  end
9
9
 
10
- task :default => :test
10
+ Rake::TestTask.new(:bench) do |t|
11
+ t.libs << "lib"
12
+ t.libs << "test"
13
+ t.test_files = FileList["test/**/*_benchmark.rb"]
14
+ end
15
+
16
+ task :default => [:test, :bench]
17
+
data/lib/xsv/helpers.rb CHANGED
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
3
  module Helpers
4
+ # The default OOXML Spreadheet number formats according to the ECMA standard
5
+ # User formats are appended from index 174 onward
4
6
  BUILT_IN_NUMBER_FORMATS = {
5
7
  1 => "0",
6
8
  2 => "0.00",
@@ -38,13 +40,15 @@ module Xsv
38
40
  MINUTE = 60.freeze
39
41
  HOUR = 3600.freeze
40
42
  A_CODEPOINT = 'A'.ord.freeze
43
+ # The epoch for all dates in OOXML Spreadsheet documents
41
44
  EPOCH = Date.new(1899, 12, 30).freeze
42
45
 
43
- # Return the index number for the given Excel column name
46
+ # Return the index number for the given Excel column name (i.e. "A1" => 0)
44
47
  def column_index(col)
45
- col = col[/^[A-Z]+/]
46
-
47
- col.each_codepoint.reduce(0) { |sum, n| sum * 26 + (n - A_CODEPOINT + 1) } - 1
48
+ col.each_codepoint.reduce(0) do |sum, n|
49
+ break sum - 1 if n < A_CODEPOINT # reached a number
50
+ sum * 26 + (n - A_CODEPOINT + 1)
51
+ end
48
52
  end
49
53
 
50
54
  # Return a Date for the given Excel date value
@@ -73,6 +77,7 @@ module Xsv
73
77
  "%02d:%02d" % [hours, minutes]
74
78
  end
75
79
 
80
+ # Returns a time including a date as a {Time} object
76
81
  def parse_datetime(number)
77
82
  date_base = number.truncate
78
83
  time = parse_date(date_base).to_time
@@ -85,6 +90,7 @@ module Xsv
85
90
  time + hours * HOUR + minutes.round * MINUTE
86
91
  end
87
92
 
93
+ # Returns a number as either Integer or Float
88
94
  def parse_number(string)
89
95
  if string.include? "."
90
96
  string.to_f
@@ -93,6 +99,7 @@ module Xsv
93
99
  end
94
100
  end
95
101
 
102
+ # Apply date or time number formats, if applicable
96
103
  def parse_number_format(number, format)
97
104
  number = parse_number(number) if number.is_a?(String)
98
105
 
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
+ # Interpret the sharedStrings.xml file from the workbook
4
+ # This is used internally when opening a sheet.
3
5
  class SharedStringsParser < Ox::Sax
4
6
  def self.parse(io)
5
7
  strings = []
data/lib/xsv/sheet.rb CHANGED
@@ -1,14 +1,34 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
+ # Sheet represents a single worksheet from a workbook and is normally accessed through {Workbook#sheets}
4
+ #
5
+ # Xsv is designed for worksheets with a single table of data, optionally with a header row. Because sheet implements
6
+ # {Enumerable} the rows in the worksheet can be iterated over using methods such as `#each` and `#map`
7
+ #
8
+ # By default Sheet will return rows as arrays. But by calling the {#parse_headers!} method the first row of the sheet
9
+ # will be parsed and Sheet will switch to hash mode, returning each row as a hash with the values from the first
10
+ # row as keys.
11
+ #
12
+ # If the sheet contains leading data before the first row of data or the header row, this can be skipped by setting the
13
+ # {row_skip} attribute.
3
14
  class Sheet
4
15
  include Enumerable
5
16
  include Xsv::Helpers
6
17
 
18
+ # Returns the current mode. Call {#parse_headers!} to switch to `:hash` mode
19
+ # @return [Symbol] `:hash` or `:array`
7
20
  attr_reader :mode
8
21
 
9
- # Set a number of rows to skip at the top of the sheet (header row offset)
22
+ # Set a number of rows to skip at the top of the sheet (header row offset).
23
+ # For hash mode, do not skip the header row as this will be automatically
24
+ # skipped.
10
25
  attr_accessor :row_skip
11
26
 
27
+ # Create a new instance of Sheet. This is used internally by the {Workbook}.
28
+ # There is no need to create Sheets from application code.
29
+ #
30
+ # @param workbook [Workbook] The Workbook with shared data such as shared strings and styles
31
+ # @param io [IO] A handle to an open worksheet XML file
12
32
  def initialize(workbook, io)
13
33
  @workbook = workbook
14
34
  @io = io
@@ -19,11 +39,12 @@ module Xsv
19
39
  @last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
20
40
  end
21
41
 
42
+ # @return [String]
22
43
  def inspect
23
44
  "#<#{self.class.name}:#{self.object_id}>"
24
45
  end
25
46
 
26
- # Iterate over rows
47
+ # Iterate over rows, returning either hashes or arrays based on the current mode.
27
48
  def each_row
28
49
  @io.rewind
29
50
 
@@ -38,7 +59,8 @@ module Xsv
38
59
 
39
60
  alias each each_row
40
61
 
41
- # Get row by number, starting at 0
62
+ # Get row by number, starting at 0. Returns either a hash or an array based on the current row.
63
+ # If the specified index is out of bounds an empty row is returned.
42
64
  def [](number)
43
65
  each_with_index do |row, i|
44
66
  return row if i == number
@@ -49,6 +71,7 @@ module Xsv
49
71
 
50
72
  # Load headers in the top row of the worksheet. After parsing of headers
51
73
  # all methods return hashes instead of arrays
74
+ # @return [true]
52
75
  def parse_headers!
53
76
  @headers = parse_headers
54
77
  @mode = :hash
@@ -56,6 +79,7 @@ module Xsv
56
79
  true
57
80
  end
58
81
 
82
+ # Return the headers of the sheet as an array
59
83
  def headers
60
84
  if @headers.any?
61
85
  @headers
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
- # SheetBoundsHandler scans a sheet looking for the outer bounds of the content within
3
+ # SheetBoundsHandler scans a sheet looking for the outer bounds of the content within.
4
+ # This is used internally when opening a sheet to deal with worksheets that do not
5
+ # have a correct dimension tag.
4
6
  class SheetBoundsHandler < Ox::Sax
5
7
  include Xsv::Helpers
6
8
 
@@ -1,9 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
+ # This is the core worksheet parser, implemented as an Ox::Sax handler. This is
4
+ # used internally to enumerate rows.
3
5
  class SheetRowsHandler < Ox::Sax
4
6
  include Xsv::Helpers
5
7
 
6
8
  def format_cell
9
+ return nil if @current_value.empty?
10
+
7
11
  case @current_cell[:t]
8
12
  when "s"
9
13
  @workbook.shared_strings[@current_value.to_i]
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
3
  # StylesHandler interprets the relevant parts of styles.xml
4
+ # This is used internally when opening a sheet.
4
5
  class StylesHandler < Ox::Sax
5
6
  def self.get_styles(io, numFmts)
6
7
  @xfs = nil
data/lib/xsv/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
- VERSION = "0.3.2"
3
+ VERSION = "0.3.3"
4
4
  end
data/lib/xsv/workbook.rb CHANGED
@@ -2,11 +2,18 @@
2
2
  require 'zip'
3
3
 
4
4
  module Xsv
5
+ # An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
6
+ # multiple Sheets that are available in the array that's accessible through {#sheets}
5
7
  class Workbook
6
8
 
7
- attr_reader :sheets, :shared_strings, :xfs, :numFmts, :trim_empty_rows
9
+ # Access the Sheet objects contained in the workbook
10
+ # @return [Array<Sheet>]
11
+ attr_reader :sheets
8
12
 
9
- # Open the workbook of the given filename, string or buffer
13
+ attr_reader :shared_strings, :xfs, :numFmts, :trim_empty_rows
14
+
15
+ # Open the workbook of the given filename, string or buffer. For additional
16
+ # options see {.initialize}
10
17
  def self.open(data, **kws)
11
18
  if data.is_a?(IO)
12
19
  @workbook = self.new(Zip::File.open_buffer(data), **kws)
@@ -17,7 +24,8 @@ module Xsv
17
24
  end
18
25
  end
19
26
 
20
- # Open a workbook from an instance of Zip::File
27
+ # Open a workbook from an instance of {Zip::File}. Generally it's recommended
28
+ # to use the {.open} method instead of the constructor.
21
29
  #
22
30
  # Options:
23
31
  #
@@ -36,10 +44,23 @@ module Xsv
36
44
  fetch_sheets
37
45
  end
38
46
 
47
+ # @return [String]
39
48
  def inspect
40
49
  "#<#{self.class.name}:#{self.object_id}>"
41
50
  end
42
51
 
52
+ # Close the handle to the workbook file and leave all resources for the GC to collect
53
+ # @return [true]
54
+ def close
55
+ @zip.close
56
+ @sheets = nil
57
+ @xfs = nil
58
+ @numFmts = nil
59
+ @shared_strings = nil
60
+
61
+ true
62
+ end
63
+
43
64
  private
44
65
 
45
66
  def fetch_shared_strings
data/lib/xsv.rb CHANGED
@@ -11,6 +11,10 @@ require "xsv/styles_handler"
11
11
  require "xsv/version"
12
12
  require "xsv/workbook"
13
13
 
14
+ # XSV is a fast, lightweight parser for Office Open XML spreadsheet files
15
+ # (commonly known as Excel or .xlsx files). It strives to be minimal in the
16
+ # sense that it provides nothing a CSV reader wouldn't, meaning it only
17
+ # deals with minimal formatting and cannot create or modify documents.
14
18
  module Xsv
15
19
  class Error < StandardError; end
16
20
  # An AssertionFailed error indicates an unexpected condition, meaning a bug
data/xsv.gemspec CHANGED
@@ -9,7 +9,13 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Martijn Storck"]
10
10
  spec.email = ["martijn@storck.io"]
11
11
 
12
- spec.summary = "Minimal xlsx parser that provides nothing a CSV parser wouldn't"
12
+ spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
13
+ spec.description = <<-EOF
14
+ XSV is a fast, lightweight parser for Office Open XML spreadsheet files
15
+ (commonly known as Excel or .xlsx files). It strives to be minimal in the
16
+ sense that it provides nothing a CSV reader wouldn't, meaning it only
17
+ deals with minimal formatting and cannot create or modify documents.
18
+ EOF
13
19
  spec.homepage = "https://github.com/martijn/xsv"
14
20
  spec.license = "MIT"
15
21
 
@@ -37,6 +43,6 @@ Gem::Specification.new do |spec|
37
43
  spec.add_dependency "ox", "~> 2.13"
38
44
 
39
45
  spec.add_development_dependency "bundler", "~> 1.17"
40
- spec.add_development_dependency "rake", "~> 10.0"
46
+ spec.add_development_dependency "rake", "~> 13.0"
41
47
  spec.add_development_dependency "minitest", "~> 5.0"
42
48
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-23 00:00:00.000000000 Z
11
+ date: 2020-03-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '10.0'
61
+ version: '13.0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '10.0'
68
+ version: '13.0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: minitest
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -80,7 +80,11 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '5.0'
83
- description:
83
+ description: |2
84
+ XSV is a fast, lightweight parser for Office Open XML spreadsheet files
85
+ (commonly known as Excel or .xlsx files). It strives to be minimal in the
86
+ sense that it provides nothing a CSV reader wouldn't, meaning it only
87
+ deals with minimal formatting and cannot create or modify documents.
84
88
  email:
85
89
  - martijn@storck.io
86
90
  executables: []
@@ -89,6 +93,7 @@ extra_rdoc_files: []
89
93
  files:
90
94
  - ".gitignore"
91
95
  - ".travis.yml"
96
+ - CHANGELOG.md
92
97
  - Gemfile
93
98
  - Gemfile.lock
94
99
  - LICENSE.txt
@@ -132,5 +137,5 @@ requirements: []
132
137
  rubygems_version: 3.1.2
133
138
  signing_key:
134
139
  specification_version: 4
135
- summary: Minimal xlsx parser that provides nothing a CSV parser wouldn't
140
+ summary: A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't
136
141
  test_files: []