xsv 1.0.5 → 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ade681d66101849ddf0ecd1cb0211d309153f604cbe0fc8ed3254c7930265e0a
4
- data.tar.gz: e9c0d24538cfe42affb2690f2f00a5535277013898c781aa668eecbd3329df6b
3
+ metadata.gz: '0755959285e8f4e588fc8f72f45c48904bc0b840c1abc7b250faf6bad978e7f0'
4
+ data.tar.gz: 482143461be2e72994e8d9758d1a971e87355acdd16cb027a5631956b7898927
5
5
  SHA512:
6
- metadata.gz: fb21f559a6a3d8fd20192fab8c0543fbf430f49fc563e5c98e584258bb72cd0faa8b0cf8ce9792035d5db46f40a8db082eb2407c6a132e085d74cc69200c3e2a
7
- data.tar.gz: 87c1ffca0e26ffdaf9a91bae58d188bffedae87508c3a91d33fb06c1fbb6c121f40dfce12af3dd5080254613ae47a5481cab996f9a6284517967fd71893ede94
6
+ metadata.gz: a9a48303c59d254233e12994562a341854caffde500f78e5357edebfd16dca12cf7b9b39af6c3c9e1536491f1467456c0b8295bfebf4fddda0f315ab4fbe0875
7
+ data.tar.gz: db9fe14a1c829ca66d2d1daa59da9bab181c5b4ba17c89ebc0703369165310ad4390e8713effc329d0ad6ce9f932a822e368b7eec6ae1e3876b4ed27d4bc0969
@@ -9,9 +9,9 @@ name: Ruby
9
9
 
10
10
  on:
11
11
  push:
12
- branches: [ master ]
12
+ branches: [ main ]
13
13
  pull_request:
14
- branches: [ master ]
14
+ branches: [ main ]
15
15
 
16
16
  jobs:
17
17
  test:
@@ -19,7 +19,7 @@ jobs:
19
19
  runs-on: ubuntu-latest
20
20
  strategy:
21
21
  matrix:
22
- ruby-version: ['2.6', '2.7', '3.0', 'jruby', 'truffleruby']
22
+ ruby-version: ['2.5', '2.6', '2.7', '3.0', '3.1', 'jruby', 'truffleruby']
23
23
 
24
24
  steps:
25
25
  - uses: actions/checkout@v2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Xsv Changelog
2
2
 
3
+ ## 1.0.6 2022-01-07
4
+
5
+ - Code cleanup, small performance improvements
6
+
3
7
  ## 1.0.5 2022-01-05
4
8
 
5
9
  - Raise exception if given an empty buffer when opening workbook (thanks @kevin-j-m)
data/benchmark.rb ADDED
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/inline'
4
+
5
+ gemfile do
6
+ source "https://rubygems.org"
7
+
8
+ gemspec
9
+ gem "benchmark-memory"
10
+ gem "benchmark-perf"
11
+ end
12
+
13
+ def bench_perf(sheet)
14
+ result = Benchmark::Perf.cpu(repeat: 5) do
15
+ sheet.each do |row|
16
+ row.each do |cell|
17
+ cell
18
+ end
19
+ end
20
+ end
21
+
22
+ puts "Performance benchmark: #{result.avg}s avg #{result.stdev}s stdev"
23
+ end
24
+
25
+ def bench_mem(sheet)
26
+ Benchmark.memory do |bm|
27
+ bm.report do
28
+ sheet.each do |row|
29
+ row.each do |cell|
30
+ cell
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ file = File.read("test/files/10k-sheet.xlsx")
38
+
39
+ workbook = Xsv::Workbook.open(file)
40
+
41
+ puts "--- ARRAY MODE ---"
42
+
43
+ bench_perf(workbook.sheets[0])
44
+ bench_mem(workbook.sheets[0])
45
+
46
+ puts "\n--- HASH MODE ---"
47
+
48
+ workbook.sheets[0].parse_headers!
49
+
50
+ bench_perf(workbook.sheets[0])
51
+ bench_mem(workbook.sheets[0])
@@ -5,6 +5,9 @@ module Xsv
5
5
  ATTR_REGEX = /((\S+)="(.*?)")/m
6
6
 
7
7
  def parse(io)
8
+ responds_to_end_element = respond_to?(:end_element)
9
+ responds_to_characters = respond_to?(:characters)
10
+
8
11
  state = :look_start
9
12
  if io.is_a?(String)
10
13
  pbuf = io.dup
@@ -32,7 +35,7 @@ module Xsv
32
35
  if (o = pbuf.index("<"))
33
36
  chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
34
37
 
35
- if respond_to?(:characters) && !chars.empty?
38
+ if responds_to_characters && !chars.empty?
36
39
  if chars.index("&")
37
40
  chars.gsub!("&amp;", "&")
38
41
  chars.gsub!("&apos;", "'")
@@ -65,12 +68,12 @@ module Xsv
65
68
  end
66
69
 
67
70
  if tag_name.start_with?("/")
68
- end_element(tag_name[1..-1]) if respond_to?(:end_element)
71
+ end_element(tag_name[1..-1]) if responds_to_end_element
69
72
  elsif args.nil?
70
73
  start_element(tag_name, nil)
71
74
  else
72
75
  start_element(tag_name, args.scan(ATTR_REGEX).each_with_object({}) { |m, h| h[m[1].to_sym] = m[2] })
73
- end_element(tag_name) if args.end_with?("/") && respond_to?(:end_element)
76
+ end_element(tag_name) if responds_to_end_element && args.end_with?("/")
74
77
  end
75
78
 
76
79
  state = :look_start
@@ -30,8 +30,8 @@ module Xsv
30
30
  @state = nil
31
31
  @cell = nil
32
32
  @row = nil
33
- @maxRow = 0
34
- @maxColumn = 0
33
+ @max_row = 0
34
+ @max_column = 0
35
35
  @trim_empty_rows = trim_empty_rows
36
36
  end
37
37
 
@@ -42,28 +42,28 @@ module Xsv
42
42
  @cell = attrs[:r]
43
43
  when "v"
44
44
  col = column_index(@cell)
45
- @maxColumn = col if col > @maxColumn
46
- @maxRow = @row if @row > @maxRow
45
+ @max_column = col if col > @max_column
46
+ @max_row = @row if @row > @max_row
47
47
  when "row"
48
48
  @state = name
49
49
  @row = attrs[:r].to_i
50
50
  when "dimension"
51
51
  @state = name
52
52
 
53
- _firstCell, lastCell = attrs[:ref].split(":")
53
+ _first_cell, last_cell = attrs[:ref].split(":")
54
54
 
55
- if lastCell
56
- @maxColumn = column_index(lastCell)
55
+ if last_cell
56
+ @max_column = column_index(last_cell)
57
57
  unless @trim_empty_rows
58
- @maxRow = lastCell[/\d+$/].to_i
59
- @block.call(@maxRow, @maxColumn)
58
+ @max_row = last_cell[/\d+$/].to_i
59
+ @block.call(@max_row, @max_column)
60
60
  end
61
61
  end
62
62
  end
63
63
  end
64
64
 
65
65
  def end_element(name)
66
- @block.call(@maxRow, @maxColumn) if name == "sheetData"
66
+ @block.call(@max_row, @max_column) if name == "sheetData"
67
67
  end
68
68
  end
69
69
  end
@@ -14,11 +14,11 @@ module Xsv
14
14
  @last_row = last_row - @row_skip
15
15
  @block = block
16
16
 
17
- @state = nil
17
+ @store_characters = false
18
18
 
19
19
  @row_index = 0
20
20
  @current_row = {}
21
- @current_row_attrs = {}
21
+ @current_row_number = 0
22
22
  @current_cell = {}
23
23
  @current_value = +""
24
24
 
@@ -28,44 +28,36 @@ module Xsv
28
28
  def start_element(name, attrs)
29
29
  case name
30
30
  when "c"
31
- @state = name
32
31
  @current_cell = attrs
33
32
  @current_value.clear
34
- when "v", "is"
35
- @state = name
33
+ when "v", "is", "t"
34
+ @store_characters = true
36
35
  when "row"
37
- @state = name
38
36
  @current_row = @empty_row.dup
39
- @current_row_attrs = attrs
40
- when "t"
41
- @state = nil unless @state == "is"
42
- else
43
- @state = nil
37
+ @current_row_number = attrs[:r].to_i
44
38
  end
45
39
  end
46
40
 
47
41
  def characters(value)
48
- @current_value << value if @state == "v" || @state == "is"
42
+ @current_value << value if @store_characters
49
43
  end
50
44
 
51
45
  def end_element(name)
52
46
  case name
53
- when "v"
54
- @state = nil
47
+ when "v", "is", "t"
48
+ @store_characters = false
55
49
  when "c"
56
50
  col_index = column_index(@current_cell[:r])
57
51
 
58
- case @mode
59
- when :array
52
+ if @mode == :array
60
53
  @current_row[col_index] = format_cell
61
- when :hash
54
+ else
62
55
  @current_row[@headers[col_index]] = format_cell
63
56
  end
64
57
  when "row"
65
- real_row_number = @current_row_attrs[:r].to_i
66
- adjusted_row_number = real_row_number - @row_skip
58
+ return if @current_row_number <= @row_skip
67
59
 
68
- return if real_row_number <= @row_skip
60
+ adjusted_row_number = @current_row_number - @row_skip
69
61
 
70
62
  @row_index += 1
71
63
 
@@ -98,10 +90,7 @@ module Xsv
98
90
  nil
99
91
  when nil, "n"
100
92
  if @current_cell[:s]
101
- style = @workbook.xfs[@current_cell[:s].to_i]
102
- numFmt = @workbook.numFmts[style[:numFmtId].to_i]
103
-
104
- parse_number_format(@current_value, numFmt)
93
+ parse_number_format(@current_value, @workbook.get_num_fmt(@current_cell[:s].to_i))
105
94
  else
106
95
  parse_number(@current_value)
107
96
  end
@@ -5,21 +5,21 @@ module Xsv
5
5
  # This is used internally when opening a sheet.
6
6
  class StylesHandler < SaxParser
7
7
  def self.get_styles(io)
8
- handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, numFmts|
8
+ handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, num_fmts|
9
9
  @xfs = xfs
10
- @numFmts = numFmts
10
+ @num_fmts = num_fmts
11
11
  end
12
12
 
13
13
  handler.parse(io)
14
14
 
15
- [@xfs, @numFmts]
15
+ [@xfs, @num_fmts]
16
16
  end
17
17
 
18
- def initialize(numFmts, &block)
18
+ def initialize(num_fmts, &block)
19
19
  @block = block
20
20
  @state = nil
21
21
  @xfs = []
22
- @numFmts = numFmts
22
+ @num_fmts = num_fmts
23
23
  end
24
24
 
25
25
  def start_element(name, attrs)
@@ -27,16 +27,16 @@ module Xsv
27
27
  when "cellXfs"
28
28
  @state = "cellXfs"
29
29
  when "xf"
30
- @xfs << attrs if @state == "cellXfs"
30
+ @xfs << attrs.transform_values(&:to_i) if @state == "cellXfs"
31
31
  when "numFmt"
32
- @numFmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
32
+ @num_fmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
33
33
  end
34
34
  end
35
35
 
36
36
  def end_element(name)
37
37
  case name
38
38
  when "styleSheet"
39
- @block.call(@xfs, @numFmts)
39
+ @block.call(@xfs, @num_fmts)
40
40
  when "cellXfs"
41
41
  @state = nil
42
42
  end
data/lib/xsv/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Xsv
4
- VERSION = "1.0.5"
4
+ VERSION = "1.0.6"
5
5
  end
data/lib/xsv/workbook.rb CHANGED
@@ -10,7 +10,7 @@ module Xsv
10
10
  # @return [Array<Sheet>]
11
11
  attr_reader :sheets
12
12
 
13
- attr_reader :shared_strings, :xfs, :numFmts, :trim_empty_rows
13
+ attr_reader :shared_strings, :xfs, :num_fmts, :trim_empty_rows
14
14
 
15
15
  # Open the workbook of the given filename, string or buffer. For additional
16
16
  # options see {.initialize}
@@ -49,7 +49,7 @@ module Xsv
49
49
  @trim_empty_rows = trim_empty_rows
50
50
 
51
51
  @sheets = []
52
- @xfs, @numFmts = fetch_styles
52
+ @xfs, @num_fmts = fetch_styles
53
53
  @sheet_ids = fetch_sheet_ids
54
54
  @relationships = fetch_relationships
55
55
  @shared_strings = fetch_shared_strings
@@ -68,7 +68,7 @@ module Xsv
68
68
  @zip = nil
69
69
  @sheets = nil
70
70
  @xfs = nil
71
- @numFmts = nil
71
+ @num_fmts = nil
72
72
  @relationships = nil
73
73
  @shared_strings = nil
74
74
  @sheet_ids = nil
@@ -83,6 +83,11 @@ module Xsv
83
83
  @sheets.select { |s| s.name == name }
84
84
  end
85
85
 
86
+ # Get number format for given style index
87
+ def get_num_fmt(style)
88
+ @num_fmts[@xfs[style][:numFmtId]]
89
+ end
90
+
86
91
  private
87
92
 
88
93
  def fetch_shared_strings
data/xsv.gemspec CHANGED
@@ -43,5 +43,5 @@ Gem::Specification.new do |spec|
43
43
  spec.add_development_dependency "bundler", "< 3"
44
44
  spec.add_development_dependency "rake", "~> 13.0"
45
45
  spec.add_development_dependency "minitest", "~> 5.14.2"
46
- spec.add_development_dependency "standardrb", "~> 1.0"
46
+ spec.add_development_dependency "standard", "~> 1.6.0"
47
47
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-05 00:00:00.000000000 Z
11
+ date: 2022-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -73,19 +73,19 @@ dependencies:
73
73
  - !ruby/object:Gem::Version
74
74
  version: 5.14.2
75
75
  - !ruby/object:Gem::Dependency
76
- name: standardrb
76
+ name: standard
77
77
  requirement: !ruby/object:Gem::Requirement
78
78
  requirements:
79
79
  - - "~>"
80
80
  - !ruby/object:Gem::Version
81
- version: '1.0'
81
+ version: 1.6.0
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
- version: '1.0'
88
+ version: 1.6.0
89
89
  description: |2
90
90
  Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
91
91
  (commonly known as Excel or .xlsx files). It strives to be minimal in the
@@ -105,6 +105,7 @@ files:
105
105
  - LICENSE.txt
106
106
  - README.md
107
107
  - Rakefile
108
+ - benchmark.rb
108
109
  - bin/console
109
110
  - bin/setup
110
111
  - lib/xsv.rb