xsv 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ade681d66101849ddf0ecd1cb0211d309153f604cbe0fc8ed3254c7930265e0a
4
- data.tar.gz: e9c0d24538cfe42affb2690f2f00a5535277013898c781aa668eecbd3329df6b
3
+ metadata.gz: '0755959285e8f4e588fc8f72f45c48904bc0b840c1abc7b250faf6bad978e7f0'
4
+ data.tar.gz: 482143461be2e72994e8d9758d1a971e87355acdd16cb027a5631956b7898927
5
5
  SHA512:
6
- metadata.gz: fb21f559a6a3d8fd20192fab8c0543fbf430f49fc563e5c98e584258bb72cd0faa8b0cf8ce9792035d5db46f40a8db082eb2407c6a132e085d74cc69200c3e2a
7
- data.tar.gz: 87c1ffca0e26ffdaf9a91bae58d188bffedae87508c3a91d33fb06c1fbb6c121f40dfce12af3dd5080254613ae47a5481cab996f9a6284517967fd71893ede94
6
+ metadata.gz: a9a48303c59d254233e12994562a341854caffde500f78e5357edebfd16dca12cf7b9b39af6c3c9e1536491f1467456c0b8295bfebf4fddda0f315ab4fbe0875
7
+ data.tar.gz: db9fe14a1c829ca66d2d1daa59da9bab181c5b4ba17c89ebc0703369165310ad4390e8713effc329d0ad6ce9f932a822e368b7eec6ae1e3876b4ed27d4bc0969
@@ -9,9 +9,9 @@ name: Ruby
9
9
 
10
10
  on:
11
11
  push:
12
- branches: [ master ]
12
+ branches: [ main ]
13
13
  pull_request:
14
- branches: [ master ]
14
+ branches: [ main ]
15
15
 
16
16
  jobs:
17
17
  test:
@@ -19,7 +19,7 @@ jobs:
19
19
  runs-on: ubuntu-latest
20
20
  strategy:
21
21
  matrix:
22
- ruby-version: ['2.6', '2.7', '3.0', 'jruby', 'truffleruby']
22
+ ruby-version: ['2.5', '2.6', '2.7', '3.0', '3.1', 'jruby', 'truffleruby']
23
23
 
24
24
  steps:
25
25
  - uses: actions/checkout@v2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Xsv Changelog
2
2
 
3
+ ## 1.0.6 2022-01-07
4
+
5
+ - Code cleanup, small performance improvements
6
+
3
7
  ## 1.0.5 2022-01-05
4
8
 
5
9
  - Raise exception if given an empty buffer when opening workbook (thanks @kevin-j-m)
data/benchmark.rb ADDED
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/inline'
4
+
5
+ gemfile do
6
+ source "https://rubygems.org"
7
+
8
+ gemspec
9
+ gem "benchmark-memory"
10
+ gem "benchmark-perf"
11
+ end
12
+
13
+ def bench_perf(sheet)
14
+ result = Benchmark::Perf.cpu(repeat: 5) do
15
+ sheet.each do |row|
16
+ row.each do |cell|
17
+ cell
18
+ end
19
+ end
20
+ end
21
+
22
+ puts "Performance benchmark: #{result.avg}s avg #{result.stdev}s stdev"
23
+ end
24
+
25
+ def bench_mem(sheet)
26
+ Benchmark.memory do |bm|
27
+ bm.report do
28
+ sheet.each do |row|
29
+ row.each do |cell|
30
+ cell
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ file = File.read("test/files/10k-sheet.xlsx")
38
+
39
+ workbook = Xsv::Workbook.open(file)
40
+
41
+ puts "--- ARRAY MODE ---"
42
+
43
+ bench_perf(workbook.sheets[0])
44
+ bench_mem(workbook.sheets[0])
45
+
46
+ puts "\n--- HASH MODE ---"
47
+
48
+ workbook.sheets[0].parse_headers!
49
+
50
+ bench_perf(workbook.sheets[0])
51
+ bench_mem(workbook.sheets[0])
@@ -5,6 +5,9 @@ module Xsv
5
5
  ATTR_REGEX = /((\S+)="(.*?)")/m
6
6
 
7
7
  def parse(io)
8
+ responds_to_end_element = respond_to?(:end_element)
9
+ responds_to_characters = respond_to?(:characters)
10
+
8
11
  state = :look_start
9
12
  if io.is_a?(String)
10
13
  pbuf = io.dup
@@ -32,7 +35,7 @@ module Xsv
32
35
  if (o = pbuf.index("<"))
33
36
  chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
34
37
 
35
- if respond_to?(:characters) && !chars.empty?
38
+ if responds_to_characters && !chars.empty?
36
39
  if chars.index("&")
37
40
  chars.gsub!("&amp;", "&")
38
41
  chars.gsub!("&apos;", "'")
@@ -65,12 +68,12 @@ module Xsv
65
68
  end
66
69
 
67
70
  if tag_name.start_with?("/")
68
- end_element(tag_name[1..-1]) if respond_to?(:end_element)
71
+ end_element(tag_name[1..-1]) if responds_to_end_element
69
72
  elsif args.nil?
70
73
  start_element(tag_name, nil)
71
74
  else
72
75
  start_element(tag_name, args.scan(ATTR_REGEX).each_with_object({}) { |m, h| h[m[1].to_sym] = m[2] })
73
- end_element(tag_name) if args.end_with?("/") && respond_to?(:end_element)
76
+ end_element(tag_name) if responds_to_end_element && args.end_with?("/")
74
77
  end
75
78
 
76
79
  state = :look_start
@@ -30,8 +30,8 @@ module Xsv
30
30
  @state = nil
31
31
  @cell = nil
32
32
  @row = nil
33
- @maxRow = 0
34
- @maxColumn = 0
33
+ @max_row = 0
34
+ @max_column = 0
35
35
  @trim_empty_rows = trim_empty_rows
36
36
  end
37
37
 
@@ -42,28 +42,28 @@ module Xsv
42
42
  @cell = attrs[:r]
43
43
  when "v"
44
44
  col = column_index(@cell)
45
- @maxColumn = col if col > @maxColumn
46
- @maxRow = @row if @row > @maxRow
45
+ @max_column = col if col > @max_column
46
+ @max_row = @row if @row > @max_row
47
47
  when "row"
48
48
  @state = name
49
49
  @row = attrs[:r].to_i
50
50
  when "dimension"
51
51
  @state = name
52
52
 
53
- _firstCell, lastCell = attrs[:ref].split(":")
53
+ _first_cell, last_cell = attrs[:ref].split(":")
54
54
 
55
- if lastCell
56
- @maxColumn = column_index(lastCell)
55
+ if last_cell
56
+ @max_column = column_index(last_cell)
57
57
  unless @trim_empty_rows
58
- @maxRow = lastCell[/\d+$/].to_i
59
- @block.call(@maxRow, @maxColumn)
58
+ @max_row = last_cell[/\d+$/].to_i
59
+ @block.call(@max_row, @max_column)
60
60
  end
61
61
  end
62
62
  end
63
63
  end
64
64
 
65
65
  def end_element(name)
66
- @block.call(@maxRow, @maxColumn) if name == "sheetData"
66
+ @block.call(@max_row, @max_column) if name == "sheetData"
67
67
  end
68
68
  end
69
69
  end
@@ -14,11 +14,11 @@ module Xsv
14
14
  @last_row = last_row - @row_skip
15
15
  @block = block
16
16
 
17
- @state = nil
17
+ @store_characters = false
18
18
 
19
19
  @row_index = 0
20
20
  @current_row = {}
21
- @current_row_attrs = {}
21
+ @current_row_number = 0
22
22
  @current_cell = {}
23
23
  @current_value = +""
24
24
 
@@ -28,44 +28,36 @@ module Xsv
28
28
  def start_element(name, attrs)
29
29
  case name
30
30
  when "c"
31
- @state = name
32
31
  @current_cell = attrs
33
32
  @current_value.clear
34
- when "v", "is"
35
- @state = name
33
+ when "v", "is", "t"
34
+ @store_characters = true
36
35
  when "row"
37
- @state = name
38
36
  @current_row = @empty_row.dup
39
- @current_row_attrs = attrs
40
- when "t"
41
- @state = nil unless @state == "is"
42
- else
43
- @state = nil
37
+ @current_row_number = attrs[:r].to_i
44
38
  end
45
39
  end
46
40
 
47
41
  def characters(value)
48
- @current_value << value if @state == "v" || @state == "is"
42
+ @current_value << value if @store_characters
49
43
  end
50
44
 
51
45
  def end_element(name)
52
46
  case name
53
- when "v"
54
- @state = nil
47
+ when "v", "is", "t"
48
+ @store_characters = false
55
49
  when "c"
56
50
  col_index = column_index(@current_cell[:r])
57
51
 
58
- case @mode
59
- when :array
52
+ if @mode == :array
60
53
  @current_row[col_index] = format_cell
61
- when :hash
54
+ else
62
55
  @current_row[@headers[col_index]] = format_cell
63
56
  end
64
57
  when "row"
65
- real_row_number = @current_row_attrs[:r].to_i
66
- adjusted_row_number = real_row_number - @row_skip
58
+ return if @current_row_number <= @row_skip
67
59
 
68
- return if real_row_number <= @row_skip
60
+ adjusted_row_number = @current_row_number - @row_skip
69
61
 
70
62
  @row_index += 1
71
63
 
@@ -98,10 +90,7 @@ module Xsv
98
90
  nil
99
91
  when nil, "n"
100
92
  if @current_cell[:s]
101
- style = @workbook.xfs[@current_cell[:s].to_i]
102
- numFmt = @workbook.numFmts[style[:numFmtId].to_i]
103
-
104
- parse_number_format(@current_value, numFmt)
93
+ parse_number_format(@current_value, @workbook.get_num_fmt(@current_cell[:s].to_i))
105
94
  else
106
95
  parse_number(@current_value)
107
96
  end
@@ -5,21 +5,21 @@ module Xsv
5
5
  # This is used internally when opening a sheet.
6
6
  class StylesHandler < SaxParser
7
7
  def self.get_styles(io)
8
- handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, numFmts|
8
+ handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, num_fmts|
9
9
  @xfs = xfs
10
- @numFmts = numFmts
10
+ @num_fmts = num_fmts
11
11
  end
12
12
 
13
13
  handler.parse(io)
14
14
 
15
- [@xfs, @numFmts]
15
+ [@xfs, @num_fmts]
16
16
  end
17
17
 
18
- def initialize(numFmts, &block)
18
+ def initialize(num_fmts, &block)
19
19
  @block = block
20
20
  @state = nil
21
21
  @xfs = []
22
- @numFmts = numFmts
22
+ @num_fmts = num_fmts
23
23
  end
24
24
 
25
25
  def start_element(name, attrs)
@@ -27,16 +27,16 @@ module Xsv
27
27
  when "cellXfs"
28
28
  @state = "cellXfs"
29
29
  when "xf"
30
- @xfs << attrs if @state == "cellXfs"
30
+ @xfs << attrs.transform_values(&:to_i) if @state == "cellXfs"
31
31
  when "numFmt"
32
- @numFmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
32
+ @num_fmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
33
33
  end
34
34
  end
35
35
 
36
36
  def end_element(name)
37
37
  case name
38
38
  when "styleSheet"
39
- @block.call(@xfs, @numFmts)
39
+ @block.call(@xfs, @num_fmts)
40
40
  when "cellXfs"
41
41
  @state = nil
42
42
  end
data/lib/xsv/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Xsv
4
- VERSION = "1.0.5"
4
+ VERSION = "1.0.6"
5
5
  end
data/lib/xsv/workbook.rb CHANGED
@@ -10,7 +10,7 @@ module Xsv
10
10
  # @return [Array<Sheet>]
11
11
  attr_reader :sheets
12
12
 
13
- attr_reader :shared_strings, :xfs, :numFmts, :trim_empty_rows
13
+ attr_reader :shared_strings, :xfs, :num_fmts, :trim_empty_rows
14
14
 
15
15
  # Open the workbook of the given filename, string or buffer. For additional
16
16
  # options see {.initialize}
@@ -49,7 +49,7 @@ module Xsv
49
49
  @trim_empty_rows = trim_empty_rows
50
50
 
51
51
  @sheets = []
52
- @xfs, @numFmts = fetch_styles
52
+ @xfs, @num_fmts = fetch_styles
53
53
  @sheet_ids = fetch_sheet_ids
54
54
  @relationships = fetch_relationships
55
55
  @shared_strings = fetch_shared_strings
@@ -68,7 +68,7 @@ module Xsv
68
68
  @zip = nil
69
69
  @sheets = nil
70
70
  @xfs = nil
71
- @numFmts = nil
71
+ @num_fmts = nil
72
72
  @relationships = nil
73
73
  @shared_strings = nil
74
74
  @sheet_ids = nil
@@ -83,6 +83,11 @@ module Xsv
83
83
  @sheets.select { |s| s.name == name }
84
84
  end
85
85
 
86
+ # Get number format for given style index
87
+ def get_num_fmt(style)
88
+ @num_fmts[@xfs[style][:numFmtId]]
89
+ end
90
+
86
91
  private
87
92
 
88
93
  def fetch_shared_strings
data/xsv.gemspec CHANGED
@@ -43,5 +43,5 @@ Gem::Specification.new do |spec|
43
43
  spec.add_development_dependency "bundler", "< 3"
44
44
  spec.add_development_dependency "rake", "~> 13.0"
45
45
  spec.add_development_dependency "minitest", "~> 5.14.2"
46
- spec.add_development_dependency "standardrb", "~> 1.0"
46
+ spec.add_development_dependency "standard", "~> 1.6.0"
47
47
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-05 00:00:00.000000000 Z
11
+ date: 2022-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -73,19 +73,19 @@ dependencies:
73
73
  - !ruby/object:Gem::Version
74
74
  version: 5.14.2
75
75
  - !ruby/object:Gem::Dependency
76
- name: standardrb
76
+ name: standard
77
77
  requirement: !ruby/object:Gem::Requirement
78
78
  requirements:
79
79
  - - "~>"
80
80
  - !ruby/object:Gem::Version
81
- version: '1.0'
81
+ version: 1.6.0
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
- version: '1.0'
88
+ version: 1.6.0
89
89
  description: |2
90
90
  Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
91
91
  (commonly known as Excel or .xlsx files). It strives to be minimal in the
@@ -105,6 +105,7 @@ files:
105
105
  - LICENSE.txt
106
106
  - README.md
107
107
  - Rakefile
108
+ - benchmark.rb
108
109
  - bin/console
109
110
  - bin/setup
110
111
  - lib/xsv.rb