xsv 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f753b006d8a3c230447e56b70824c7df929c5b7bd074ec82cf82acdf971073c
4
- data.tar.gz: bb457169f14d259526443ff2afa47477dceb13edf13d99f089e2f843f9fa258f
3
+ metadata.gz: 6c27d848984359d8f492658ca2634981f6f05b66c75711ccce305f794ff04530
4
+ data.tar.gz: 69cb487a9c42dd09b980f3d1170f39f39f0176109bbdc332c529eab4fbc2a298
5
5
  SHA512:
6
- metadata.gz: e3686372d118d3d3ca17c403e34479ab00168f0ba5b2057b624cf0b90ed4ee28caf7f9237201c5f6649a7435adc2b77e29751eeec90a71ec577acfcbca2ff0b2
7
- data.tar.gz: '094d7b47e71c7ad96b1a477e672c2615203f814cc475cc6194772e8e048713d36e58780c9dbe17a2239a4a63869eea3cde8cbcc20a7dd07816cde9519ff6a2e7'
6
+ metadata.gz: 8001b27aec710a21ce6860bf3b7a9f37dad27ba4cef797df533df8092c3a61a3851836c8504bca3a0bff8f57dafe4ae2ea296ea7a554a9558b3d807ca9f0b62e
7
+ data.tar.gz: d397dc3daaccda23e5160fd6b71342fc69c57fed98276da65ef0691908afb751310c3c8f8d6ebc7b6b7b90b4e604a57e635919b7456ae64694703a7c7cf1aab4
@@ -19,7 +19,7 @@ jobs:
19
19
  runs-on: ubuntu-latest
20
20
  strategy:
21
21
  matrix:
22
- ruby-version: ['2.7', '3.0', '3.1', '3.2', '3.3', 'jruby', 'truffleruby']
22
+ ruby-version: ['3.2', '3.3', '3.4', '4.0', 'jruby', 'truffleruby']
23
23
 
24
24
  steps:
25
25
  - uses: actions/checkout@v3
data/CHANGELOG.md CHANGED
@@ -1,9 +1,24 @@
1
1
  # Xsv Changelog
2
2
 
3
- ## 1.3.1 2023-05-06
3
+ ## 1.4.0 2026-01-29
4
4
 
5
- - Fix issue #56 with multiple nil headers
6
- - Ignore colums with a `nil` header in hash mode
5
+ - Ruby 2.7, 3.0, and 3.1 are no longer supported. Xsv is now compatible with Ruby 3.2 through 4.0, latest JRuby, and latest TruffleRuby
6
+ - Add compatibility with Rubyzip 3
7
+ - Fix UTF-8 encoding issues when parsing XML with multi-byte characters
8
+ - Handle incomplete UTF-8 sequences at chunk boundaries in the streaming XML parser
9
+ - Fix parsing of rows without the `r` attribute (thanks @romanbsd)
10
+ - Performance: avoid calling `unescapeHTML` unless there are entities in the text
11
+ - Fix typos in CHANGELOG (thanks @jdufresne)
12
+
13
+ ## 1.3.2 2024-12-25
14
+
15
+ - Xsv is now compatible with Ruby 2.7 through 3.4, latest JRuby, and latest TruffleRuby
16
+ - Sheet#each_row returns Enumerator when no block is given (thanks @myabc)
17
+
18
+ ## 1.3.1 2024-05-06
19
+
20
+ - Fix issue #56 with multiple nil headers
21
+ - Ignore columns with a `nil` header in hash mode
7
22
 
8
23
  ## 1.3.0 2023-12-16
9
24
 
data/README.md CHANGED
@@ -34,8 +34,8 @@ Or install it yourself as:
34
34
 
35
35
  $ gem install xsv
36
36
 
37
- Xsv targets ruby >= 2.7 and has a just single dependency, `rubyzip`. It has been
38
- tested successfully with MRI, JRuby, and TruffleRuby. It has no native extensions
37
+ Xsv targets Ruby >= 3.2 and has just a single dependency, `rubyzip`. It has been
38
+ tested successfully with MRI (including Ruby 4.0), JRuby, and TruffleRuby. It has no native extensions
39
39
  and is designed to be thread-safe.
40
40
 
41
41
  ## Usage
data/lib/xsv/helpers.rb CHANGED
@@ -74,7 +74,7 @@ module Xsv
74
74
  # Compensate for rounding errors
75
75
  if minutes >= 60
76
76
  hours += (minutes / 60)
77
- minutes = minutes % 60
77
+ minutes %= 60
78
78
  end
79
79
 
80
80
  format("%02d:%02d", hours, minutes)
@@ -4,7 +4,48 @@ require "cgi"
4
4
 
5
5
  module Xsv
6
6
  class SaxParser
7
- ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/mn
7
+ ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/m
8
+
9
+ # Returns the number of bytes to trim from the end of a UTF-8 string
10
+ # to avoid splitting a multi-byte character. Returns 0 if the string
11
+ # ends with a complete character.
12
+ def self.incomplete_utf8_tail_size(bytes)
13
+ return 0 if bytes.empty?
14
+
15
+ # Check up to 3 bytes from the end (max UTF-8 char is 4 bytes)
16
+ check_length = [bytes.bytesize, 3].min
17
+ tail = bytes.byteslice(-check_length, check_length)
18
+
19
+ tail.each_byte.with_index.reverse_each do |byte, i|
20
+ # Check if this is a leading byte (starts a multi-byte sequence)
21
+ if byte >= 0xC0 # 11000000 - start of multi-byte sequence
22
+ # i is position in tail, bytes after leading byte = check_length - i - 1
23
+ # total bytes in sequence = 1 (leading) + continuation bytes = check_length - i
24
+ bytes_in_sequence = check_length - i
25
+
26
+ # Determine expected length from leading byte
27
+ expected_length = if byte >= 0xF0 # 11110xxx - 4 byte sequence
28
+ 4
29
+ elsif byte >= 0xE0 # 1110xxxx - 3 byte sequence
30
+ 3
31
+ else # 110xxxxx - 2 byte sequence
32
+ 2
33
+ end
34
+
35
+ # If we don't have enough bytes, this sequence is incomplete
36
+ return bytes_in_sequence if bytes_in_sequence < expected_length
37
+
38
+ # Sequence is complete
39
+ return 0
40
+ elsif byte < 0x80
41
+ # ASCII byte - string ends with complete character
42
+ return 0
43
+ end
44
+ # else: continuation byte (10xxxxxx), keep looking for leading byte
45
+ end
46
+
47
+ 0
48
+ end
8
49
 
9
50
  def parse(io)
10
51
  responds_to_end_element = respond_to?(:end_element)
@@ -16,17 +57,36 @@ module Xsv
16
57
  eof_reached = true
17
58
  must_read = false
18
59
  else
19
- pbuf = String.new(capacity: 8192)
60
+ pbuf = String.new(capacity: 8192, encoding: "utf-8")
20
61
  eof_reached = false
21
62
  must_read = true
22
63
  end
64
+ leftover = String.new(encoding: "binary")
23
65
 
24
66
  loop do
25
67
  if must_read
26
68
  begin
27
- pbuf << io.sysread(2048)
28
- rescue EOFError, TypeError
29
- # EOFError is thrown by IO, rubyzip returns nil from sysread on EOF
69
+ chunk = io.sysread(2048)
70
+ if chunk
71
+ # Prepend any leftover bytes from previous incomplete UTF-8 sequence
72
+ chunk = leftover << chunk unless leftover.empty?
73
+
74
+ # Check if chunk ends with incomplete UTF-8 sequence
75
+ trim = SaxParser.incomplete_utf8_tail_size(chunk)
76
+ if trim > 0
77
+ leftover = chunk.byteslice(-trim, trim)
78
+ chunk = chunk.byteslice(0, chunk.bytesize - trim)
79
+ else
80
+ leftover = String.new(encoding: "binary")
81
+ end
82
+
83
+ pbuf << chunk.force_encoding("utf-8")
84
+ else
85
+ # rubyzip < 3 returns nil from sysread on EOF
86
+ eof_reached = true
87
+ end
88
+ rescue EOFError
89
+ # EOFError is thrown by IO and rubyzip >= 3
30
90
  eof_reached = true
31
91
  end
32
92
 
@@ -38,7 +98,11 @@ module Xsv
38
98
  chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
39
99
 
40
100
  if responds_to_characters && !chars.empty?
41
- characters(CGI.unescapeHTML(chars))
101
+ if chars.include?("&")
102
+ characters(CGI.unescapeHTML(chars))
103
+ else
104
+ characters(chars)
105
+ end
42
106
  end
43
107
 
44
108
  state = :look_end
@@ -75,7 +139,7 @@ module Xsv
75
139
  start_element(tag_name, nil)
76
140
  else
77
141
  attribute_buffer = {}
78
- attributes = args.scan(ATTR_REGEX)
142
+ attributes = args.force_encoding("utf-8").scan(ATTR_REGEX)
79
143
  while (attr = attributes.delete_at(0))
80
144
  attribute_buffer[attr[1].to_sym] = attr[2]
81
145
  end
data/lib/xsv/sheet.rb CHANGED
@@ -55,6 +55,8 @@ module Xsv
55
55
 
56
56
  # Iterate over rows, returning either hashes or arrays based on the current mode.
57
57
  def each_row(&block)
58
+ return to_enum(__method__) unless block
59
+
58
60
  @io.rewind
59
61
  SheetRowsHandler.new(@mode, @headers, empty_row, @workbook, @row_skip, @last_row, &block).parse(@io)
60
62
  true
@@ -34,7 +34,12 @@ module Xsv
34
34
  @store_characters = true
35
35
  when "row"
36
36
  @current_row = (@mode == :array) ? [] : @empty_row.dup
37
- @current_row_number = attrs[:r].to_i
37
+ if attrs[:r]
38
+ @current_row_number = attrs[:r].to_i
39
+ else
40
+ # Use position-based numbering when r attribute is missing
41
+ @current_row_number += 1
42
+ end
38
43
  end
39
44
  end
40
45
 
data/lib/xsv/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Xsv
4
- VERSION = "1.3.1"
4
+ VERSION = "1.4.0"
5
5
  end
data/xsv.gemspec CHANGED
@@ -36,12 +36,12 @@ Gem::Specification.new do |spec|
36
36
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
37
  spec.require_paths = ["lib"]
38
38
 
39
- spec.required_ruby_version = ">= 2.7"
39
+ spec.required_ruby_version = ">= 3.2"
40
40
 
41
- spec.add_dependency "rubyzip", ">= 1.3", "< 3"
41
+ spec.add_dependency "rubyzip", ">= 1.3", "< 4"
42
42
 
43
- spec.add_development_dependency "bundler", "< 3"
44
- spec.add_development_dependency "rake", "~> 13.1.0"
45
- spec.add_development_dependency "minitest", "~> 5.20.0"
46
- spec.add_development_dependency "standard", "~> 1.32.1"
43
+ spec.add_development_dependency "bundler"
44
+ spec.add_development_dependency "rake", "~> 13.2"
45
+ spec.add_development_dependency "minitest", "~> 5.24"
46
+ spec.add_development_dependency "standard", "~> 1.44"
47
47
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2024-05-06 00:00:00.000000000 Z
10
+ date: 2026-01-29 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rubyzip
@@ -19,7 +18,7 @@ dependencies:
19
18
  version: '1.3'
20
19
  - - "<"
21
20
  - !ruby/object:Gem::Version
22
- version: '3'
21
+ version: '4'
23
22
  type: :runtime
24
23
  prerelease: false
25
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,63 +28,63 @@ dependencies:
29
28
  version: '1.3'
30
29
  - - "<"
31
30
  - !ruby/object:Gem::Version
32
- version: '3'
31
+ version: '4'
33
32
  - !ruby/object:Gem::Dependency
34
33
  name: bundler
35
34
  requirement: !ruby/object:Gem::Requirement
36
35
  requirements:
37
- - - "<"
36
+ - - ">="
38
37
  - !ruby/object:Gem::Version
39
- version: '3'
38
+ version: '0'
40
39
  type: :development
41
40
  prerelease: false
42
41
  version_requirements: !ruby/object:Gem::Requirement
43
42
  requirements:
44
- - - "<"
43
+ - - ">="
45
44
  - !ruby/object:Gem::Version
46
- version: '3'
45
+ version: '0'
47
46
  - !ruby/object:Gem::Dependency
48
47
  name: rake
49
48
  requirement: !ruby/object:Gem::Requirement
50
49
  requirements:
51
50
  - - "~>"
52
51
  - !ruby/object:Gem::Version
53
- version: 13.1.0
52
+ version: '13.2'
54
53
  type: :development
55
54
  prerelease: false
56
55
  version_requirements: !ruby/object:Gem::Requirement
57
56
  requirements:
58
57
  - - "~>"
59
58
  - !ruby/object:Gem::Version
60
- version: 13.1.0
59
+ version: '13.2'
61
60
  - !ruby/object:Gem::Dependency
62
61
  name: minitest
63
62
  requirement: !ruby/object:Gem::Requirement
64
63
  requirements:
65
64
  - - "~>"
66
65
  - !ruby/object:Gem::Version
67
- version: 5.20.0
66
+ version: '5.24'
68
67
  type: :development
69
68
  prerelease: false
70
69
  version_requirements: !ruby/object:Gem::Requirement
71
70
  requirements:
72
71
  - - "~>"
73
72
  - !ruby/object:Gem::Version
74
- version: 5.20.0
73
+ version: '5.24'
75
74
  - !ruby/object:Gem::Dependency
76
75
  name: standard
77
76
  requirement: !ruby/object:Gem::Requirement
78
77
  requirements:
79
78
  - - "~>"
80
79
  - !ruby/object:Gem::Version
81
- version: 1.32.1
80
+ version: '1.44'
82
81
  type: :development
83
82
  prerelease: false
84
83
  version_requirements: !ruby/object:Gem::Requirement
85
84
  requirements:
86
85
  - - "~>"
87
86
  - !ruby/object:Gem::Version
88
- version: 1.32.1
87
+ version: '1.44'
89
88
  description: |2
90
89
  Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
91
90
  (commonly known as Excel or .xlsx files). It strives to be minimal in the
@@ -128,7 +127,6 @@ metadata:
128
127
  homepage_uri: https://github.com/martijn/xsv
129
128
  source_code_uri: https://github.com/martijn/xsv
130
129
  changelog_uri: https://raw.githubusercontent.com/martijn/xsv/main/CHANGELOG.md
131
- post_install_message:
132
130
  rdoc_options: []
133
131
  require_paths:
134
132
  - lib
@@ -136,15 +134,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
136
134
  requirements:
137
135
  - - ">="
138
136
  - !ruby/object:Gem::Version
139
- version: '2.7'
137
+ version: '3.2'
140
138
  required_rubygems_version: !ruby/object:Gem::Requirement
141
139
  requirements:
142
140
  - - ">="
143
141
  - !ruby/object:Gem::Version
144
142
  version: '0'
145
143
  requirements: []
146
- rubygems_version: 3.5.1
147
- signing_key:
144
+ rubygems_version: 3.6.2
148
145
  specification_version: 4
149
146
  summary: A fast and lightweight xlsx parser that provides nothing a CSV parser wouldn't
150
147
  test_files: []