xsv 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e03874b3017fd111c7b63d68bb2273f406cab5e129b4edb174c2710aed0cbab9
4
- data.tar.gz: 39ebe2ffdc657efd737f62026d9cc18a531262f4722189f3d2a3012a1e0885d9
3
+ metadata.gz: 6c27d848984359d8f492658ca2634981f6f05b66c75711ccce305f794ff04530
4
+ data.tar.gz: 69cb487a9c42dd09b980f3d1170f39f39f0176109bbdc332c529eab4fbc2a298
5
5
  SHA512:
6
- metadata.gz: 95e8ea84b5a39cb1158f7f1a8eb531f00f76d94eacfa370d74080b943f213dbf6d5aa9df237b54896769f4ff9e3a0a1a1393863d832e6b9cf2ceb21e2069d711
7
- data.tar.gz: 9b0392cebd9f720d0c716a99f8a179a4def088e90ed79721c40fdecba518365e1e23f8fb82bbf41dbb474329d44414b1742d375a44388a2fc56cd074c870b2f3
6
+ metadata.gz: 8001b27aec710a21ce6860bf3b7a9f37dad27ba4cef797df533df8092c3a61a3851836c8504bca3a0bff8f57dafe4ae2ea296ea7a554a9558b3d807ca9f0b62e
7
+ data.tar.gz: d397dc3daaccda23e5160fd6b71342fc69c57fed98276da65ef0691908afb751310c3c8f8d6ebc7b6b7b90b4e604a57e635919b7456ae64694703a7c7cf1aab4
@@ -19,7 +19,7 @@ jobs:
19
19
  runs-on: ubuntu-latest
20
20
  strategy:
21
21
  matrix:
22
- ruby-version: ['2.7', '3.0', '3.1', '3.2', '3.3', '3.4', 'jruby', 'truffleruby']
22
+ ruby-version: ['3.2', '3.3', '3.4', '4.0', 'jruby', 'truffleruby']
23
23
 
24
24
  steps:
25
25
  - uses: actions/checkout@v3
data/CHANGELOG.md CHANGED
@@ -1,14 +1,24 @@
1
1
  # Xsv Changelog
2
2
 
3
+ ## 1.4.0 2026-01-29
4
+
5
+ - Ruby 2.7, 3.0, and 3.1 are no longer supported. Xsv is now compatible with Ruby 3.2 through 4.0, latest JRuby, and latest TruffleRuby
6
+ - Add compatibility with Rubyzip 3
7
+ - Fix UTF-8 encoding issues when parsing XML with multi-byte characters
8
+ - Handle incomplete UTF-8 sequences at chunk boundaries in the streaming XML parser
9
+ - Fix parsing of rows without the `r` attribute (thanks @romanbsd)
10
+ - Performance: avoid calling `unescapeHTML` unless there are entities in the text
11
+ - Fix typos in CHANGELOG (thanks @jdufresne)
12
+
3
13
  ## 1.3.2 2024-12-25
4
14
 
5
- - Xsv is now compatbile with Ruby 2.7 through 3.4, latest JRuby, and latest TruffleRuby
15
+ - Xsv is now compatible with Ruby 2.7 through 3.4, latest JRuby, and latest TruffleRuby
6
16
  - Sheet#each_row returns Enumerator when no block is given (thanks @myabc)
7
17
 
8
18
  ## 1.3.1 2024-05-06
9
19
 
10
- - Fix issue #56 with multiple nil headers
11
- - Ignore colums with a `nil` header in hash mode
20
+ - Fix issue #56 with multiple nil headers
21
+ - Ignore columns with a `nil` header in hash mode
12
22
 
13
23
  ## 1.3.0 2023-12-16
14
24
 
data/README.md CHANGED
@@ -34,8 +34,8 @@ Or install it yourself as:
34
34
 
35
35
  $ gem install xsv
36
36
 
37
- Xsv targets ruby >= 2.7 and has a just single dependency, `rubyzip`. It has been
38
- tested successfully with MRI, JRuby, and TruffleRuby. It has no native extensions
37
+ Xsv targets Ruby >= 3.2 and has just a single dependency, `rubyzip`. It has been
38
+ tested successfully with MRI (including Ruby 4.0), JRuby, and TruffleRuby. It has no native extensions
39
39
  and is designed to be thread-safe.
40
40
 
41
41
  ## Usage
@@ -4,7 +4,48 @@ require "cgi"
4
4
 
5
5
  module Xsv
6
6
  class SaxParser
7
- ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/mn
7
+ ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/m
8
+
9
+ # Returns the number of bytes to trim from the end of a UTF-8 string
10
+ # to avoid splitting a multi-byte character. Returns 0 if the string
11
+ # ends with a complete character.
12
+ def self.incomplete_utf8_tail_size(bytes)
13
+ return 0 if bytes.empty?
14
+
15
+ # Check up to 3 bytes from the end (max UTF-8 char is 4 bytes)
16
+ check_length = [bytes.bytesize, 3].min
17
+ tail = bytes.byteslice(-check_length, check_length)
18
+
19
+ tail.each_byte.with_index.reverse_each do |byte, i|
20
+ # Check if this is a leading byte (starts a multi-byte sequence)
21
+ if byte >= 0xC0 # 11000000 - start of multi-byte sequence
22
+ # i is position in tail, bytes after leading byte = check_length - i - 1
23
+ # total bytes in sequence = 1 (leading) + continuation bytes = check_length - i
24
+ bytes_in_sequence = check_length - i
25
+
26
+ # Determine expected length from leading byte
27
+ expected_length = if byte >= 0xF0 # 11110xxx - 4 byte sequence
28
+ 4
29
+ elsif byte >= 0xE0 # 1110xxxx - 3 byte sequence
30
+ 3
31
+ else # 110xxxxx - 2 byte sequence
32
+ 2
33
+ end
34
+
35
+ # If we don't have enough bytes, this sequence is incomplete
36
+ return bytes_in_sequence if bytes_in_sequence < expected_length
37
+
38
+ # Sequence is complete
39
+ return 0
40
+ elsif byte < 0x80
41
+ # ASCII byte - string ends with complete character
42
+ return 0
43
+ end
44
+ # else: continuation byte (10xxxxxx), keep looking for leading byte
45
+ end
46
+
47
+ 0
48
+ end
8
49
 
9
50
  def parse(io)
10
51
  responds_to_end_element = respond_to?(:end_element)
@@ -16,17 +57,36 @@ module Xsv
16
57
  eof_reached = true
17
58
  must_read = false
18
59
  else
19
- pbuf = String.new(capacity: 8192)
60
+ pbuf = String.new(capacity: 8192, encoding: "utf-8")
20
61
  eof_reached = false
21
62
  must_read = true
22
63
  end
64
+ leftover = String.new(encoding: "binary")
23
65
 
24
66
  loop do
25
67
  if must_read
26
68
  begin
27
- pbuf << io.sysread(2048)
28
- rescue EOFError, TypeError
29
- # EOFError is thrown by IO, rubyzip returns nil from sysread on EOF
69
+ chunk = io.sysread(2048)
70
+ if chunk
71
+ # Prepend any leftover bytes from previous incomplete UTF-8 sequence
72
+ chunk = leftover << chunk unless leftover.empty?
73
+
74
+ # Check if chunk ends with incomplete UTF-8 sequence
75
+ trim = SaxParser.incomplete_utf8_tail_size(chunk)
76
+ if trim > 0
77
+ leftover = chunk.byteslice(-trim, trim)
78
+ chunk = chunk.byteslice(0, chunk.bytesize - trim)
79
+ else
80
+ leftover = String.new(encoding: "binary")
81
+ end
82
+
83
+ pbuf << chunk.force_encoding("utf-8")
84
+ else
85
+ # rubyzip < 3 returns nil from sysread on EOF
86
+ eof_reached = true
87
+ end
88
+ rescue EOFError
89
+ # EOFError is thrown by IO and rubyzip >= 3
30
90
  eof_reached = true
31
91
  end
32
92
 
@@ -38,7 +98,11 @@ module Xsv
38
98
  chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
39
99
 
40
100
  if responds_to_characters && !chars.empty?
41
- characters(CGI.unescapeHTML(chars))
101
+ if chars.include?("&")
102
+ characters(CGI.unescapeHTML(chars))
103
+ else
104
+ characters(chars)
105
+ end
42
106
  end
43
107
 
44
108
  state = :look_end
@@ -75,7 +139,7 @@ module Xsv
75
139
  start_element(tag_name, nil)
76
140
  else
77
141
  attribute_buffer = {}
78
- attributes = args.scan(ATTR_REGEX)
142
+ attributes = args.force_encoding("utf-8").scan(ATTR_REGEX)
79
143
  while (attr = attributes.delete_at(0))
80
144
  attribute_buffer[attr[1].to_sym] = attr[2]
81
145
  end
@@ -34,7 +34,12 @@ module Xsv
34
34
  @store_characters = true
35
35
  when "row"
36
36
  @current_row = (@mode == :array) ? [] : @empty_row.dup
37
- @current_row_number = attrs[:r].to_i
37
+ if attrs[:r]
38
+ @current_row_number = attrs[:r].to_i
39
+ else
40
+ # Use position-based numbering when r attribute is missing
41
+ @current_row_number += 1
42
+ end
38
43
  end
39
44
  end
40
45
 
data/lib/xsv/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Xsv
4
- VERSION = "1.3.2"
4
+ VERSION = "1.4.0"
5
5
  end
data/xsv.gemspec CHANGED
@@ -36,13 +36,12 @@ Gem::Specification.new do |spec|
36
36
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
37
  spec.require_paths = ["lib"]
38
38
 
39
- spec.required_ruby_version = ">= 2.7"
39
+ spec.required_ruby_version = ">= 3.2"
40
40
 
41
- spec.add_dependency "rubyzip", ">= 1.3", "< 3"
41
+ spec.add_dependency "rubyzip", ">= 1.3", "< 4"
42
42
 
43
- spec.add_development_dependency "bundler", "< 3"
43
+ spec.add_development_dependency "bundler"
44
44
  spec.add_development_dependency "rake", "~> 13.2"
45
45
  spec.add_development_dependency "minitest", "~> 5.24"
46
- # Maintain Ruby 2.7 compatibility
47
- spec.add_development_dependency "standard", "1.37.0"
46
+ spec.add_development_dependency "standard", "~> 1.44"
48
47
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2024-12-25 00:00:00.000000000 Z
10
+ date: 2026-01-29 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: rubyzip
@@ -18,7 +18,7 @@ dependencies:
18
18
  version: '1.3'
19
19
  - - "<"
20
20
  - !ruby/object:Gem::Version
21
- version: '3'
21
+ version: '4'
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -28,21 +28,21 @@ dependencies:
28
28
  version: '1.3'
29
29
  - - "<"
30
30
  - !ruby/object:Gem::Version
31
- version: '3'
31
+ version: '4'
32
32
  - !ruby/object:Gem::Dependency
33
33
  name: bundler
34
34
  requirement: !ruby/object:Gem::Requirement
35
35
  requirements:
36
- - - "<"
36
+ - - ">="
37
37
  - !ruby/object:Gem::Version
38
- version: '3'
38
+ version: '0'
39
39
  type: :development
40
40
  prerelease: false
41
41
  version_requirements: !ruby/object:Gem::Requirement
42
42
  requirements:
43
- - - "<"
43
+ - - ">="
44
44
  - !ruby/object:Gem::Version
45
- version: '3'
45
+ version: '0'
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: rake
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -75,16 +75,16 @@ dependencies:
75
75
  name: standard
76
76
  requirement: !ruby/object:Gem::Requirement
77
77
  requirements:
78
- - - '='
78
+ - - "~>"
79
79
  - !ruby/object:Gem::Version
80
- version: 1.37.0
80
+ version: '1.44'
81
81
  type: :development
82
82
  prerelease: false
83
83
  version_requirements: !ruby/object:Gem::Requirement
84
84
  requirements:
85
- - - '='
85
+ - - "~>"
86
86
  - !ruby/object:Gem::Version
87
- version: 1.37.0
87
+ version: '1.44'
88
88
  description: |2
89
89
  Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
90
90
  (commonly known as Excel or .xlsx files). It strives to be minimal in the
@@ -134,7 +134,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
134
134
  requirements:
135
135
  - - ">="
136
136
  - !ruby/object:Gem::Version
137
- version: '2.7'
137
+ version: '3.2'
138
138
  required_rubygems_version: !ruby/object:Gem::Requirement
139
139
  requirements:
140
140
  - - ">="