xsv 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/CHANGELOG.md +13 -3
- data/README.md +2 -2
- data/lib/xsv/sax_parser.rb +71 -7
- data/lib/xsv/sheet_rows_handler.rb +6 -1
- data/lib/xsv/version.rb +1 -1
- data/xsv.gemspec +4 -5
- metadata +13 -13
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6c27d848984359d8f492658ca2634981f6f05b66c75711ccce305f794ff04530
|
|
4
|
+
data.tar.gz: 69cb487a9c42dd09b980f3d1170f39f39f0176109bbdc332c529eab4fbc2a298
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8001b27aec710a21ce6860bf3b7a9f37dad27ba4cef797df533df8092c3a61a3851836c8504bca3a0bff8f57dafe4ae2ea296ea7a554a9558b3d807ca9f0b62e
|
|
7
|
+
data.tar.gz: d397dc3daaccda23e5160fd6b71342fc69c57fed98276da65ef0691908afb751310c3c8f8d6ebc7b6b7b90b4e604a57e635919b7456ae64694703a7c7cf1aab4
|
data/.github/workflows/ruby.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,14 +1,24 @@
|
|
|
1
1
|
# Xsv Changelog
|
|
2
2
|
|
|
3
|
+
## 1.4.0 2026-01-29
|
|
4
|
+
|
|
5
|
+
- Ruby 2.7, 3.0, and 3.1 are no longer supported. Xsv is now compatible with Ruby 3.2 through 4.0, latest JRuby, and latest TruffleRuby
|
|
6
|
+
- Add compatibility with Rubyzip 3
|
|
7
|
+
- Fix UTF-8 encoding issues when parsing XML with multi-byte characters
|
|
8
|
+
- Handle incomplete UTF-8 sequences at chunk boundaries in the streaming XML parser
|
|
9
|
+
- Fix parsing of rows without the `r` attribute (thanks @romanbsd)
|
|
10
|
+
- Performance: avoid calling `unescapeHTML` unless there are entities in the text
|
|
11
|
+
- Fix typos in CHANGELOG (thanks @jdufresne)
|
|
12
|
+
|
|
3
13
|
## 1.3.2 2024-12-25
|
|
4
14
|
|
|
5
|
-
- Xsv is now
|
|
15
|
+
- Xsv is now compatible with Ruby 2.7 through 3.4, latest JRuby, and latest TruffleRuby
|
|
6
16
|
- Sheet#each_row returns Enumerator when no block is given (thanks @myabc)
|
|
7
17
|
|
|
8
18
|
## 1.3.1 2024-05-06
|
|
9
19
|
|
|
10
|
-
- Fix issue #56 with multiple nil headers
|
|
11
|
-
- Ignore
|
|
20
|
+
- Fix issue #56 with multiple nil headers
|
|
21
|
+
- Ignore columns with a `nil` header in hash mode
|
|
12
22
|
|
|
13
23
|
## 1.3.0 2023-12-16
|
|
14
24
|
|
data/README.md
CHANGED
|
@@ -34,8 +34,8 @@ Or install it yourself as:
|
|
|
34
34
|
|
|
35
35
|
$ gem install xsv
|
|
36
36
|
|
|
37
|
-
Xsv targets
|
|
38
|
-
tested successfully with MRI, JRuby, and TruffleRuby. It has no native extensions
|
|
37
|
+
Xsv targets Ruby >= 3.2 and has just a single dependency, `rubyzip`. It has been
|
|
38
|
+
tested successfully with MRI (including Ruby 4.0), JRuby, and TruffleRuby. It has no native extensions
|
|
39
39
|
and is designed to be thread-safe.
|
|
40
40
|
|
|
41
41
|
## Usage
|
data/lib/xsv/sax_parser.rb
CHANGED
|
@@ -4,7 +4,48 @@ require "cgi"
|
|
|
4
4
|
|
|
5
5
|
module Xsv
|
|
6
6
|
class SaxParser
|
|
7
|
-
ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/
|
|
7
|
+
ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/m
|
|
8
|
+
|
|
9
|
+
# Returns the number of bytes to trim from the end of a UTF-8 string
|
|
10
|
+
# to avoid splitting a multi-byte character. Returns 0 if the string
|
|
11
|
+
# ends with a complete character.
|
|
12
|
+
def self.incomplete_utf8_tail_size(bytes)
|
|
13
|
+
return 0 if bytes.empty?
|
|
14
|
+
|
|
15
|
+
# Check up to 3 bytes from the end (max UTF-8 char is 4 bytes)
|
|
16
|
+
check_length = [bytes.bytesize, 3].min
|
|
17
|
+
tail = bytes.byteslice(-check_length, check_length)
|
|
18
|
+
|
|
19
|
+
tail.each_byte.with_index.reverse_each do |byte, i|
|
|
20
|
+
# Check if this is a leading byte (starts a multi-byte sequence)
|
|
21
|
+
if byte >= 0xC0 # 11000000 - start of multi-byte sequence
|
|
22
|
+
# i is position in tail, bytes after leading byte = check_length - i - 1
|
|
23
|
+
# total bytes in sequence = 1 (leading) + continuation bytes = check_length - i
|
|
24
|
+
bytes_in_sequence = check_length - i
|
|
25
|
+
|
|
26
|
+
# Determine expected length from leading byte
|
|
27
|
+
expected_length = if byte >= 0xF0 # 11110xxx - 4 byte sequence
|
|
28
|
+
4
|
|
29
|
+
elsif byte >= 0xE0 # 1110xxxx - 3 byte sequence
|
|
30
|
+
3
|
|
31
|
+
else # 110xxxxx - 2 byte sequence
|
|
32
|
+
2
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# If we don't have enough bytes, this sequence is incomplete
|
|
36
|
+
return bytes_in_sequence if bytes_in_sequence < expected_length
|
|
37
|
+
|
|
38
|
+
# Sequence is complete
|
|
39
|
+
return 0
|
|
40
|
+
elsif byte < 0x80
|
|
41
|
+
# ASCII byte - string ends with complete character
|
|
42
|
+
return 0
|
|
43
|
+
end
|
|
44
|
+
# else: continuation byte (10xxxxxx), keep looking for leading byte
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
0
|
|
48
|
+
end
|
|
8
49
|
|
|
9
50
|
def parse(io)
|
|
10
51
|
responds_to_end_element = respond_to?(:end_element)
|
|
@@ -16,17 +57,36 @@ module Xsv
|
|
|
16
57
|
eof_reached = true
|
|
17
58
|
must_read = false
|
|
18
59
|
else
|
|
19
|
-
pbuf = String.new(capacity: 8192)
|
|
60
|
+
pbuf = String.new(capacity: 8192, encoding: "utf-8")
|
|
20
61
|
eof_reached = false
|
|
21
62
|
must_read = true
|
|
22
63
|
end
|
|
64
|
+
leftover = String.new(encoding: "binary")
|
|
23
65
|
|
|
24
66
|
loop do
|
|
25
67
|
if must_read
|
|
26
68
|
begin
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
69
|
+
chunk = io.sysread(2048)
|
|
70
|
+
if chunk
|
|
71
|
+
# Prepend any leftover bytes from previous incomplete UTF-8 sequence
|
|
72
|
+
chunk = leftover << chunk unless leftover.empty?
|
|
73
|
+
|
|
74
|
+
# Check if chunk ends with incomplete UTF-8 sequence
|
|
75
|
+
trim = SaxParser.incomplete_utf8_tail_size(chunk)
|
|
76
|
+
if trim > 0
|
|
77
|
+
leftover = chunk.byteslice(-trim, trim)
|
|
78
|
+
chunk = chunk.byteslice(0, chunk.bytesize - trim)
|
|
79
|
+
else
|
|
80
|
+
leftover = String.new(encoding: "binary")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
pbuf << chunk.force_encoding("utf-8")
|
|
84
|
+
else
|
|
85
|
+
# rubyzip < 3 returns nil from sysread on EOF
|
|
86
|
+
eof_reached = true
|
|
87
|
+
end
|
|
88
|
+
rescue EOFError
|
|
89
|
+
# EOFError is thrown by IO and rubyzip >= 3
|
|
30
90
|
eof_reached = true
|
|
31
91
|
end
|
|
32
92
|
|
|
@@ -38,7 +98,11 @@ module Xsv
|
|
|
38
98
|
chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
|
|
39
99
|
|
|
40
100
|
if responds_to_characters && !chars.empty?
|
|
41
|
-
|
|
101
|
+
if chars.include?("&")
|
|
102
|
+
characters(CGI.unescapeHTML(chars))
|
|
103
|
+
else
|
|
104
|
+
characters(chars)
|
|
105
|
+
end
|
|
42
106
|
end
|
|
43
107
|
|
|
44
108
|
state = :look_end
|
|
@@ -75,7 +139,7 @@ module Xsv
|
|
|
75
139
|
start_element(tag_name, nil)
|
|
76
140
|
else
|
|
77
141
|
attribute_buffer = {}
|
|
78
|
-
attributes = args.scan(ATTR_REGEX)
|
|
142
|
+
attributes = args.force_encoding("utf-8").scan(ATTR_REGEX)
|
|
79
143
|
while (attr = attributes.delete_at(0))
|
|
80
144
|
attribute_buffer[attr[1].to_sym] = attr[2]
|
|
81
145
|
end
|
|
@@ -34,7 +34,12 @@ module Xsv
|
|
|
34
34
|
@store_characters = true
|
|
35
35
|
when "row"
|
|
36
36
|
@current_row = (@mode == :array) ? [] : @empty_row.dup
|
|
37
|
-
|
|
37
|
+
if attrs[:r]
|
|
38
|
+
@current_row_number = attrs[:r].to_i
|
|
39
|
+
else
|
|
40
|
+
# Use position-based numbering when r attribute is missing
|
|
41
|
+
@current_row_number += 1
|
|
42
|
+
end
|
|
38
43
|
end
|
|
39
44
|
end
|
|
40
45
|
|
data/lib/xsv/version.rb
CHANGED
data/xsv.gemspec
CHANGED
|
@@ -36,13 +36,12 @@ Gem::Specification.new do |spec|
|
|
|
36
36
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
37
37
|
spec.require_paths = ["lib"]
|
|
38
38
|
|
|
39
|
-
spec.required_ruby_version = ">= 2
|
|
39
|
+
spec.required_ruby_version = ">= 3.2"
|
|
40
40
|
|
|
41
|
-
spec.add_dependency "rubyzip", ">= 1.3", "<
|
|
41
|
+
spec.add_dependency "rubyzip", ">= 1.3", "< 4"
|
|
42
42
|
|
|
43
|
-
spec.add_development_dependency "bundler"
|
|
43
|
+
spec.add_development_dependency "bundler"
|
|
44
44
|
spec.add_development_dependency "rake", "~> 13.2"
|
|
45
45
|
spec.add_development_dependency "minitest", "~> 5.24"
|
|
46
|
-
|
|
47
|
-
spec.add_development_dependency "standard", "1.37.0"
|
|
46
|
+
spec.add_development_dependency "standard", "~> 1.44"
|
|
48
47
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 2026-01-29 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: rubyzip
|
|
@@ -18,7 +18,7 @@ dependencies:
|
|
|
18
18
|
version: '1.3'
|
|
19
19
|
- - "<"
|
|
20
20
|
- !ruby/object:Gem::Version
|
|
21
|
-
version: '
|
|
21
|
+
version: '4'
|
|
22
22
|
type: :runtime
|
|
23
23
|
prerelease: false
|
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
|
@@ -28,21 +28,21 @@ dependencies:
|
|
|
28
28
|
version: '1.3'
|
|
29
29
|
- - "<"
|
|
30
30
|
- !ruby/object:Gem::Version
|
|
31
|
-
version: '
|
|
31
|
+
version: '4'
|
|
32
32
|
- !ruby/object:Gem::Dependency
|
|
33
33
|
name: bundler
|
|
34
34
|
requirement: !ruby/object:Gem::Requirement
|
|
35
35
|
requirements:
|
|
36
|
-
- - "
|
|
36
|
+
- - ">="
|
|
37
37
|
- !ruby/object:Gem::Version
|
|
38
|
-
version: '
|
|
38
|
+
version: '0'
|
|
39
39
|
type: :development
|
|
40
40
|
prerelease: false
|
|
41
41
|
version_requirements: !ruby/object:Gem::Requirement
|
|
42
42
|
requirements:
|
|
43
|
-
- - "
|
|
43
|
+
- - ">="
|
|
44
44
|
- !ruby/object:Gem::Version
|
|
45
|
-
version: '
|
|
45
|
+
version: '0'
|
|
46
46
|
- !ruby/object:Gem::Dependency
|
|
47
47
|
name: rake
|
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -75,16 +75,16 @@ dependencies:
|
|
|
75
75
|
name: standard
|
|
76
76
|
requirement: !ruby/object:Gem::Requirement
|
|
77
77
|
requirements:
|
|
78
|
-
- -
|
|
78
|
+
- - "~>"
|
|
79
79
|
- !ruby/object:Gem::Version
|
|
80
|
-
version: 1.
|
|
80
|
+
version: '1.44'
|
|
81
81
|
type: :development
|
|
82
82
|
prerelease: false
|
|
83
83
|
version_requirements: !ruby/object:Gem::Requirement
|
|
84
84
|
requirements:
|
|
85
|
-
- -
|
|
85
|
+
- - "~>"
|
|
86
86
|
- !ruby/object:Gem::Version
|
|
87
|
-
version: 1.
|
|
87
|
+
version: '1.44'
|
|
88
88
|
description: |2
|
|
89
89
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
90
90
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
@@ -134,7 +134,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
134
134
|
requirements:
|
|
135
135
|
- - ">="
|
|
136
136
|
- !ruby/object:Gem::Version
|
|
137
|
-
version: '2
|
|
137
|
+
version: '3.2'
|
|
138
138
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
requirements:
|
|
140
140
|
- - ">="
|