xsv 1.3.2 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +39 -0
- data/.github/workflows/ruby.yml +1 -1
- data/CHANGELOG.md +18 -3
- data/README.md +2 -2
- data/lib/xsv/sax_parser.rb +71 -7
- data/lib/xsv/sheet.rb +18 -6
- data/lib/xsv/sheet_bounds_handler.rb +1 -1
- data/lib/xsv/sheet_rows_handler.rb +6 -1
- data/lib/xsv/version.rb +1 -1
- data/xsv.gemspec +4 -5
- metadata +14 -13
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a0fb3d682de516914c72424844559dc8ac53ef4a3ba0b906c90f44261b2411fd
|
|
4
|
+
data.tar.gz: ca269e4b84ea4c5cb865d1cd5599cf18740e840f9f26d575f2065688950b7d2a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2cd6b2190e84323f58e2efc1cf8ff91ae625c9febb366886d4530178d1e0c5644ca10584fe237dff8f77e8644c300b874e2feabc32116293fbd9bd63514ed272
|
|
7
|
+
data.tar.gz: f27435b037cbc53d772dd57923f9f38aa4a3becd4f8c96d4ad7f79df45fac29ee804c0c62b0a57cf69f019f04304ae95c6229f3ed8f8cb7fcb7d8caa460bfbc3
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
release:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
with:
|
|
17
|
+
fetch-depth: 0 # Fetch all history for changelog generation
|
|
18
|
+
|
|
19
|
+
- name: Create GitHub Release
|
|
20
|
+
env:
|
|
21
|
+
GH_TOKEN: ${{ github.token }}
|
|
22
|
+
run: |
|
|
23
|
+
# Extract version from tag
|
|
24
|
+
VERSION=${GITHUB_REF#refs/tags/}
|
|
25
|
+
|
|
26
|
+
# Generate release notes from commits since previous tag
|
|
27
|
+
PREV_TAG=$(git describe --tags --abbrev=0 ${VERSION}^ 2>/dev/null || echo "")
|
|
28
|
+
|
|
29
|
+
if [ -n "$PREV_TAG" ]; then
|
|
30
|
+
NOTES=$(git log ${PREV_TAG}..${VERSION} --pretty=format:"- %s (%h)" --no-merges)
|
|
31
|
+
else
|
|
32
|
+
NOTES="Initial release"
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
# Create the release
|
|
36
|
+
gh release create ${VERSION} \
|
|
37
|
+
--title "${VERSION}" \
|
|
38
|
+
--notes "${NOTES}" \
|
|
39
|
+
--verify-tag
|
data/.github/workflows/ruby.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,14 +1,29 @@
|
|
|
1
1
|
# Xsv Changelog
|
|
2
2
|
|
|
3
|
+
## 1.4.1 2026-04-11
|
|
4
|
+
|
|
5
|
+
- Add Range support to Sheet#[] (thanks @paddor)
|
|
6
|
+
- Fix SheetBoundsHandler to detect rows with inlineStr cells (thanks @kikumiyako)
|
|
7
|
+
|
|
8
|
+
## 1.4.0 2026-01-29
|
|
9
|
+
|
|
10
|
+
- Ruby 2.7, 3.0, and 3.1 are no longer supported. Xsv is now compatible with Ruby 3.2 through 4.0, latest JRuby, and latest TruffleRuby
|
|
11
|
+
- Add compatibility with Rubyzip 3
|
|
12
|
+
- Fix UTF-8 encoding issues when parsing XML with multi-byte characters
|
|
13
|
+
- Handle incomplete UTF-8 sequences at chunk boundaries in the streaming XML parser
|
|
14
|
+
- Fix parsing of rows without the `r` attribute (thanks @romanbsd)
|
|
15
|
+
- Performance: avoid calling `unescapeHTML` unless there are entities in the text
|
|
16
|
+
- Fix typos in CHANGELOG (thanks @jdufresne)
|
|
17
|
+
|
|
3
18
|
## 1.3.2 2024-12-25
|
|
4
19
|
|
|
5
|
-
- Xsv is now
|
|
20
|
+
- Xsv is now compatible with Ruby 2.7 through 3.4, latest JRuby, and latest TruffleRuby
|
|
6
21
|
- Sheet#each_row returns Enumerator when no block is given (thanks @myabc)
|
|
7
22
|
|
|
8
23
|
## 1.3.1 2024-05-06
|
|
9
24
|
|
|
10
|
-
- Fix issue #56 with multiple nil headers
|
|
11
|
-
- Ignore
|
|
25
|
+
- Fix issue #56 with multiple nil headers
|
|
26
|
+
- Ignore columns with a `nil` header in hash mode
|
|
12
27
|
|
|
13
28
|
## 1.3.0 2023-12-16
|
|
14
29
|
|
data/README.md
CHANGED
|
@@ -34,8 +34,8 @@ Or install it yourself as:
|
|
|
34
34
|
|
|
35
35
|
$ gem install xsv
|
|
36
36
|
|
|
37
|
-
Xsv targets
|
|
38
|
-
tested successfully with MRI, JRuby, and TruffleRuby. It has no native extensions
|
|
37
|
+
Xsv targets Ruby >= 3.2 and has just a single dependency, `rubyzip`. It has been
|
|
38
|
+
tested successfully with MRI (including Ruby 4.0), JRuby, and TruffleRuby. It has no native extensions
|
|
39
39
|
and is designed to be thread-safe.
|
|
40
40
|
|
|
41
41
|
## Usage
|
data/lib/xsv/sax_parser.rb
CHANGED
|
@@ -4,7 +4,48 @@ require "cgi"
|
|
|
4
4
|
|
|
5
5
|
module Xsv
|
|
6
6
|
class SaxParser
|
|
7
|
-
ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/
|
|
7
|
+
ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/m
|
|
8
|
+
|
|
9
|
+
# Returns the number of bytes to trim from the end of a UTF-8 string
|
|
10
|
+
# to avoid splitting a multi-byte character. Returns 0 if the string
|
|
11
|
+
# ends with a complete character.
|
|
12
|
+
def self.incomplete_utf8_tail_size(bytes)
|
|
13
|
+
return 0 if bytes.empty?
|
|
14
|
+
|
|
15
|
+
# Check up to 3 bytes from the end (max UTF-8 char is 4 bytes)
|
|
16
|
+
check_length = [bytes.bytesize, 3].min
|
|
17
|
+
tail = bytes.byteslice(-check_length, check_length)
|
|
18
|
+
|
|
19
|
+
tail.each_byte.with_index.reverse_each do |byte, i|
|
|
20
|
+
# Check if this is a leading byte (starts a multi-byte sequence)
|
|
21
|
+
if byte >= 0xC0 # 11000000 - start of multi-byte sequence
|
|
22
|
+
# i is position in tail, bytes after leading byte = check_length - i - 1
|
|
23
|
+
# total bytes in sequence = 1 (leading) + continuation bytes = check_length - i
|
|
24
|
+
bytes_in_sequence = check_length - i
|
|
25
|
+
|
|
26
|
+
# Determine expected length from leading byte
|
|
27
|
+
expected_length = if byte >= 0xF0 # 11110xxx - 4 byte sequence
|
|
28
|
+
4
|
|
29
|
+
elsif byte >= 0xE0 # 1110xxxx - 3 byte sequence
|
|
30
|
+
3
|
|
31
|
+
else # 110xxxxx - 2 byte sequence
|
|
32
|
+
2
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# If we don't have enough bytes, this sequence is incomplete
|
|
36
|
+
return bytes_in_sequence if bytes_in_sequence < expected_length
|
|
37
|
+
|
|
38
|
+
# Sequence is complete
|
|
39
|
+
return 0
|
|
40
|
+
elsif byte < 0x80
|
|
41
|
+
# ASCII byte - string ends with complete character
|
|
42
|
+
return 0
|
|
43
|
+
end
|
|
44
|
+
# else: continuation byte (10xxxxxx), keep looking for leading byte
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
0
|
|
48
|
+
end
|
|
8
49
|
|
|
9
50
|
def parse(io)
|
|
10
51
|
responds_to_end_element = respond_to?(:end_element)
|
|
@@ -16,17 +57,36 @@ module Xsv
|
|
|
16
57
|
eof_reached = true
|
|
17
58
|
must_read = false
|
|
18
59
|
else
|
|
19
|
-
pbuf = String.new(capacity: 8192)
|
|
60
|
+
pbuf = String.new(capacity: 8192, encoding: "utf-8")
|
|
20
61
|
eof_reached = false
|
|
21
62
|
must_read = true
|
|
22
63
|
end
|
|
64
|
+
leftover = String.new(encoding: "binary")
|
|
23
65
|
|
|
24
66
|
loop do
|
|
25
67
|
if must_read
|
|
26
68
|
begin
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
69
|
+
chunk = io.sysread(2048)
|
|
70
|
+
if chunk
|
|
71
|
+
# Prepend any leftover bytes from previous incomplete UTF-8 sequence
|
|
72
|
+
chunk = leftover << chunk unless leftover.empty?
|
|
73
|
+
|
|
74
|
+
# Check if chunk ends with incomplete UTF-8 sequence
|
|
75
|
+
trim = SaxParser.incomplete_utf8_tail_size(chunk)
|
|
76
|
+
if trim > 0
|
|
77
|
+
leftover = chunk.byteslice(-trim, trim)
|
|
78
|
+
chunk = chunk.byteslice(0, chunk.bytesize - trim)
|
|
79
|
+
else
|
|
80
|
+
leftover = String.new(encoding: "binary")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
pbuf << chunk.force_encoding("utf-8")
|
|
84
|
+
else
|
|
85
|
+
# rubyzip < 3 returns nil from sysread on EOF
|
|
86
|
+
eof_reached = true
|
|
87
|
+
end
|
|
88
|
+
rescue EOFError
|
|
89
|
+
# EOFError is thrown by IO and rubyzip >= 3
|
|
30
90
|
eof_reached = true
|
|
31
91
|
end
|
|
32
92
|
|
|
@@ -38,7 +98,11 @@ module Xsv
|
|
|
38
98
|
chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
|
|
39
99
|
|
|
40
100
|
if responds_to_characters && !chars.empty?
|
|
41
|
-
|
|
101
|
+
if chars.include?("&")
|
|
102
|
+
characters(CGI.unescapeHTML(chars))
|
|
103
|
+
else
|
|
104
|
+
characters(chars)
|
|
105
|
+
end
|
|
42
106
|
end
|
|
43
107
|
|
|
44
108
|
state = :look_end
|
|
@@ -75,7 +139,7 @@ module Xsv
|
|
|
75
139
|
start_element(tag_name, nil)
|
|
76
140
|
else
|
|
77
141
|
attribute_buffer = {}
|
|
78
|
-
attributes = args.scan(ATTR_REGEX)
|
|
142
|
+
attributes = args.force_encoding("utf-8").scan(ATTR_REGEX)
|
|
79
143
|
while (attr = attributes.delete_at(0))
|
|
80
144
|
attribute_buffer[attr[1].to_sym] = attr[2]
|
|
81
145
|
end
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -64,14 +64,26 @@ module Xsv
|
|
|
64
64
|
|
|
65
65
|
alias_method :each, :each_row
|
|
66
66
|
|
|
67
|
-
# Get row by number, starting at 0. Returns either a hash or an array
|
|
67
|
+
# Get row by number or a range of rows, starting at 0. Returns either a hash or an array
|
|
68
|
+
# based on the current mode. When called with a Range, returns an array of rows.
|
|
68
69
|
# If the specified index is out of bounds an empty row is returned.
|
|
69
|
-
def [](
|
|
70
|
-
|
|
71
|
-
|
|
70
|
+
def [](number_or_range)
|
|
71
|
+
case number_or_range
|
|
72
|
+
when Range
|
|
73
|
+
rows = []
|
|
74
|
+
each_with_index do |row, i|
|
|
75
|
+
rows << row if number_or_range.cover?(i)
|
|
76
|
+
end
|
|
77
|
+
rows
|
|
78
|
+
when Integer
|
|
79
|
+
each_with_index do |row, i|
|
|
80
|
+
return row if i == number_or_range
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
empty_row
|
|
84
|
+
else
|
|
85
|
+
raise ArgumentError, "Expected Integer or Range, got #{number_or_range.class}"
|
|
72
86
|
end
|
|
73
|
-
|
|
74
|
-
empty_row
|
|
75
87
|
end
|
|
76
88
|
|
|
77
89
|
# Load headers in the top row of the worksheet. After parsing of headers
|
|
@@ -34,7 +34,12 @@ module Xsv
|
|
|
34
34
|
@store_characters = true
|
|
35
35
|
when "row"
|
|
36
36
|
@current_row = (@mode == :array) ? [] : @empty_row.dup
|
|
37
|
-
|
|
37
|
+
if attrs[:r]
|
|
38
|
+
@current_row_number = attrs[:r].to_i
|
|
39
|
+
else
|
|
40
|
+
# Use position-based numbering when r attribute is missing
|
|
41
|
+
@current_row_number += 1
|
|
42
|
+
end
|
|
38
43
|
end
|
|
39
44
|
end
|
|
40
45
|
|
data/lib/xsv/version.rb
CHANGED
data/xsv.gemspec
CHANGED
|
@@ -36,13 +36,12 @@ Gem::Specification.new do |spec|
|
|
|
36
36
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
37
37
|
spec.require_paths = ["lib"]
|
|
38
38
|
|
|
39
|
-
spec.required_ruby_version = ">= 2
|
|
39
|
+
spec.required_ruby_version = ">= 3.2"
|
|
40
40
|
|
|
41
|
-
spec.add_dependency "rubyzip", ">= 1.3", "<
|
|
41
|
+
spec.add_dependency "rubyzip", ">= 1.3", "< 4"
|
|
42
42
|
|
|
43
|
-
spec.add_development_dependency "bundler"
|
|
43
|
+
spec.add_development_dependency "bundler"
|
|
44
44
|
spec.add_development_dependency "rake", "~> 13.2"
|
|
45
45
|
spec.add_development_dependency "minitest", "~> 5.24"
|
|
46
|
-
|
|
47
|
-
spec.add_development_dependency "standard", "1.37.0"
|
|
46
|
+
spec.add_development_dependency "standard", "~> 1.44"
|
|
48
47
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 2026-04-11 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: rubyzip
|
|
@@ -18,7 +18,7 @@ dependencies:
|
|
|
18
18
|
version: '1.3'
|
|
19
19
|
- - "<"
|
|
20
20
|
- !ruby/object:Gem::Version
|
|
21
|
-
version: '
|
|
21
|
+
version: '4'
|
|
22
22
|
type: :runtime
|
|
23
23
|
prerelease: false
|
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
|
@@ -28,21 +28,21 @@ dependencies:
|
|
|
28
28
|
version: '1.3'
|
|
29
29
|
- - "<"
|
|
30
30
|
- !ruby/object:Gem::Version
|
|
31
|
-
version: '
|
|
31
|
+
version: '4'
|
|
32
32
|
- !ruby/object:Gem::Dependency
|
|
33
33
|
name: bundler
|
|
34
34
|
requirement: !ruby/object:Gem::Requirement
|
|
35
35
|
requirements:
|
|
36
|
-
- - "
|
|
36
|
+
- - ">="
|
|
37
37
|
- !ruby/object:Gem::Version
|
|
38
|
-
version: '
|
|
38
|
+
version: '0'
|
|
39
39
|
type: :development
|
|
40
40
|
prerelease: false
|
|
41
41
|
version_requirements: !ruby/object:Gem::Requirement
|
|
42
42
|
requirements:
|
|
43
|
-
- - "
|
|
43
|
+
- - ">="
|
|
44
44
|
- !ruby/object:Gem::Version
|
|
45
|
-
version: '
|
|
45
|
+
version: '0'
|
|
46
46
|
- !ruby/object:Gem::Dependency
|
|
47
47
|
name: rake
|
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -75,16 +75,16 @@ dependencies:
|
|
|
75
75
|
name: standard
|
|
76
76
|
requirement: !ruby/object:Gem::Requirement
|
|
77
77
|
requirements:
|
|
78
|
-
- -
|
|
78
|
+
- - "~>"
|
|
79
79
|
- !ruby/object:Gem::Version
|
|
80
|
-
version: 1.
|
|
80
|
+
version: '1.44'
|
|
81
81
|
type: :development
|
|
82
82
|
prerelease: false
|
|
83
83
|
version_requirements: !ruby/object:Gem::Requirement
|
|
84
84
|
requirements:
|
|
85
|
-
- -
|
|
85
|
+
- - "~>"
|
|
86
86
|
- !ruby/object:Gem::Version
|
|
87
|
-
version: 1.
|
|
87
|
+
version: '1.44'
|
|
88
88
|
description: |2
|
|
89
89
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
90
90
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
@@ -96,6 +96,7 @@ executables: []
|
|
|
96
96
|
extensions: []
|
|
97
97
|
extra_rdoc_files: []
|
|
98
98
|
files:
|
|
99
|
+
- ".github/workflows/release.yml"
|
|
99
100
|
- ".github/workflows/ruby.yml"
|
|
100
101
|
- ".gitignore"
|
|
101
102
|
- ".standard.yml"
|
|
@@ -134,7 +135,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
134
135
|
requirements:
|
|
135
136
|
- - ">="
|
|
136
137
|
- !ruby/object:Gem::Version
|
|
137
|
-
version: '2
|
|
138
|
+
version: '3.2'
|
|
138
139
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
140
|
requirements:
|
|
140
141
|
- - ">="
|