xsv 1.3.2 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e03874b3017fd111c7b63d68bb2273f406cab5e129b4edb174c2710aed0cbab9
4
- data.tar.gz: 39ebe2ffdc657efd737f62026d9cc18a531262f4722189f3d2a3012a1e0885d9
3
+ metadata.gz: a0fb3d682de516914c72424844559dc8ac53ef4a3ba0b906c90f44261b2411fd
4
+ data.tar.gz: ca269e4b84ea4c5cb865d1cd5599cf18740e840f9f26d575f2065688950b7d2a
5
5
  SHA512:
6
- metadata.gz: 95e8ea84b5a39cb1158f7f1a8eb531f00f76d94eacfa370d74080b943f213dbf6d5aa9df237b54896769f4ff9e3a0a1a1393863d832e6b9cf2ceb21e2069d711
7
- data.tar.gz: 9b0392cebd9f720d0c716a99f8a179a4def088e90ed79721c40fdecba518365e1e23f8fb82bbf41dbb474329d44414b1742d375a44388a2fc56cd074c870b2f3
6
+ metadata.gz: 2cd6b2190e84323f58e2efc1cf8ff91ae625c9febb366886d4530178d1e0c5644ca10584fe237dff8f77e8644c300b874e2feabc32116293fbd9bd63514ed272
7
+ data.tar.gz: f27435b037cbc53d772dd57923f9f38aa4a3becd4f8c96d4ad7f79df45fac29ee804c0c62b0a57cf69f019f04304ae95c6229f3ed8f8cb7fcb7d8caa460bfbc3
@@ -0,0 +1,39 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ release:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ with:
17
+ fetch-depth: 0 # Fetch all history for changelog generation
18
+
19
+ - name: Create GitHub Release
20
+ env:
21
+ GH_TOKEN: ${{ github.token }}
22
+ run: |
23
+ # Extract version from tag
24
+ VERSION=${GITHUB_REF#refs/tags/}
25
+
26
+ # Generate release notes from commits since previous tag
27
+ PREV_TAG=$(git describe --tags --abbrev=0 ${VERSION}^ 2>/dev/null || echo "")
28
+
29
+ if [ -n "$PREV_TAG" ]; then
30
+ NOTES=$(git log ${PREV_TAG}..${VERSION} --pretty=format:"- %s (%h)" --no-merges)
31
+ else
32
+ NOTES="Initial release"
33
+ fi
34
+
35
+ # Create the release
36
+ gh release create ${VERSION} \
37
+ --title "${VERSION}" \
38
+ --notes "${NOTES}" \
39
+ --verify-tag
@@ -19,7 +19,7 @@ jobs:
19
19
  runs-on: ubuntu-latest
20
20
  strategy:
21
21
  matrix:
22
- ruby-version: ['2.7', '3.0', '3.1', '3.2', '3.3', '3.4', 'jruby', 'truffleruby']
22
+ ruby-version: ['3.2', '3.3', '3.4', '4.0', 'jruby', 'truffleruby']
23
23
 
24
24
  steps:
25
25
  - uses: actions/checkout@v3
data/CHANGELOG.md CHANGED
@@ -1,14 +1,29 @@
1
1
  # Xsv Changelog
2
2
 
3
+ ## 1.4.1 2026-04-11
4
+
5
+ - Add Range support to Sheet#[] (thanks @paddor)
6
+ - Fix SheetBoundsHandler to detect rows with inlineStr cells (thanks @kikumiyako)
7
+
8
+ ## 1.4.0 2026-01-29
9
+
10
+ - Ruby 2.7, 3.0, and 3.1 are no longer supported. Xsv is now compatible with Ruby 3.2 through 4.0, latest JRuby, and latest TruffleRuby
11
+ - Add compatibility with Rubyzip 3
12
+ - Fix UTF-8 encoding issues when parsing XML with multi-byte characters
13
+ - Handle incomplete UTF-8 sequences at chunk boundaries in the streaming XML parser
14
+ - Fix parsing of rows without the `r` attribute (thanks @romanbsd)
15
+ - Performance: avoid calling `unescapeHTML` unless there are entities in the text
16
+ - Fix typos in CHANGELOG (thanks @jdufresne)
17
+
3
18
  ## 1.3.2 2024-12-25
4
19
 
5
- - Xsv is now compatbile with Ruby 2.7 through 3.4, latest JRuby, and latest TruffleRuby
20
+ - Xsv is now compatible with Ruby 2.7 through 3.4, latest JRuby, and latest TruffleRuby
6
21
  - Sheet#each_row returns Enumerator when no block is given (thanks @myabc)
7
22
 
8
23
  ## 1.3.1 2024-05-06
9
24
 
10
- - Fix issue #56 with multiple nil headers
11
- - Ignore colums with a `nil` header in hash mode
25
+ - Fix issue #56 with multiple nil headers
26
+ - Ignore columns with a `nil` header in hash mode
12
27
 
13
28
  ## 1.3.0 2023-12-16
14
29
 
data/README.md CHANGED
@@ -34,8 +34,8 @@ Or install it yourself as:
34
34
 
35
35
  $ gem install xsv
36
36
 
37
- Xsv targets ruby >= 2.7 and has a just single dependency, `rubyzip`. It has been
38
- tested successfully with MRI, JRuby, and TruffleRuby. It has no native extensions
37
+ Xsv targets Ruby >= 3.2 and has just a single dependency, `rubyzip`. It has been
38
+ tested successfully with MRI (including Ruby 4.0), JRuby, and TruffleRuby. It has no native extensions
39
39
  and is designed to be thread-safe.
40
40
 
41
41
  ## Usage
@@ -4,7 +4,48 @@ require "cgi"
4
4
 
5
5
  module Xsv
6
6
  class SaxParser
7
- ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/mn
7
+ ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/m
8
+
9
+ # Returns the number of bytes to trim from the end of a UTF-8 string
10
+ # to avoid splitting a multi-byte character. Returns 0 if the string
11
+ # ends with a complete character.
12
+ def self.incomplete_utf8_tail_size(bytes)
13
+ return 0 if bytes.empty?
14
+
15
+ # Check up to 3 bytes from the end (max UTF-8 char is 4 bytes)
16
+ check_length = [bytes.bytesize, 3].min
17
+ tail = bytes.byteslice(-check_length, check_length)
18
+
19
+ tail.each_byte.with_index.reverse_each do |byte, i|
20
+ # Check if this is a leading byte (starts a multi-byte sequence)
21
+ if byte >= 0xC0 # 11000000 - start of multi-byte sequence
22
+ # i is position in tail, bytes after leading byte = check_length - i - 1
23
+ # total bytes in sequence = 1 (leading) + continuation bytes = check_length - i
24
+ bytes_in_sequence = check_length - i
25
+
26
+ # Determine expected length from leading byte
27
+ expected_length = if byte >= 0xF0 # 11110xxx - 4 byte sequence
28
+ 4
29
+ elsif byte >= 0xE0 # 1110xxxx - 3 byte sequence
30
+ 3
31
+ else # 110xxxxx - 2 byte sequence
32
+ 2
33
+ end
34
+
35
+ # If we don't have enough bytes, this sequence is incomplete
36
+ return bytes_in_sequence if bytes_in_sequence < expected_length
37
+
38
+ # Sequence is complete
39
+ return 0
40
+ elsif byte < 0x80
41
+ # ASCII byte - string ends with complete character
42
+ return 0
43
+ end
44
+ # else: continuation byte (10xxxxxx), keep looking for leading byte
45
+ end
46
+
47
+ 0
48
+ end
8
49
 
9
50
  def parse(io)
10
51
  responds_to_end_element = respond_to?(:end_element)
@@ -16,17 +57,36 @@ module Xsv
16
57
  eof_reached = true
17
58
  must_read = false
18
59
  else
19
- pbuf = String.new(capacity: 8192)
60
+ pbuf = String.new(capacity: 8192, encoding: "utf-8")
20
61
  eof_reached = false
21
62
  must_read = true
22
63
  end
64
+ leftover = String.new(encoding: "binary")
23
65
 
24
66
  loop do
25
67
  if must_read
26
68
  begin
27
- pbuf << io.sysread(2048)
28
- rescue EOFError, TypeError
29
- # EOFError is thrown by IO, rubyzip returns nil from sysread on EOF
69
+ chunk = io.sysread(2048)
70
+ if chunk
71
+ # Prepend any leftover bytes from previous incomplete UTF-8 sequence
72
+ chunk = leftover << chunk unless leftover.empty?
73
+
74
+ # Check if chunk ends with incomplete UTF-8 sequence
75
+ trim = SaxParser.incomplete_utf8_tail_size(chunk)
76
+ if trim > 0
77
+ leftover = chunk.byteslice(-trim, trim)
78
+ chunk = chunk.byteslice(0, chunk.bytesize - trim)
79
+ else
80
+ leftover = String.new(encoding: "binary")
81
+ end
82
+
83
+ pbuf << chunk.force_encoding("utf-8")
84
+ else
85
+ # rubyzip < 3 returns nil from sysread on EOF
86
+ eof_reached = true
87
+ end
88
+ rescue EOFError
89
+ # EOFError is thrown by IO and rubyzip >= 3
30
90
  eof_reached = true
31
91
  end
32
92
 
@@ -38,7 +98,11 @@ module Xsv
38
98
  chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
39
99
 
40
100
  if responds_to_characters && !chars.empty?
41
- characters(CGI.unescapeHTML(chars))
101
+ if chars.include?("&")
102
+ characters(CGI.unescapeHTML(chars))
103
+ else
104
+ characters(chars)
105
+ end
42
106
  end
43
107
 
44
108
  state = :look_end
@@ -75,7 +139,7 @@ module Xsv
75
139
  start_element(tag_name, nil)
76
140
  else
77
141
  attribute_buffer = {}
78
- attributes = args.scan(ATTR_REGEX)
142
+ attributes = args.force_encoding("utf-8").scan(ATTR_REGEX)
79
143
  while (attr = attributes.delete_at(0))
80
144
  attribute_buffer[attr[1].to_sym] = attr[2]
81
145
  end
data/lib/xsv/sheet.rb CHANGED
@@ -64,14 +64,26 @@ module Xsv
64
64
 
65
65
  alias_method :each, :each_row
66
66
 
67
- # Get row by number, starting at 0. Returns either a hash or an array based on the current row.
67
+ # Get row by number or a range of rows, starting at 0. Returns either a hash or an array
68
+ # based on the current mode. When called with a Range, returns an array of rows.
68
69
  # If the specified index is out of bounds an empty row is returned.
69
- def [](number)
70
- each_with_index do |row, i|
71
- return row if i == number
70
+ def [](number_or_range)
71
+ case number_or_range
72
+ when Range
73
+ rows = []
74
+ each_with_index do |row, i|
75
+ rows << row if number_or_range.cover?(i)
76
+ end
77
+ rows
78
+ when Integer
79
+ each_with_index do |row, i|
80
+ return row if i == number_or_range
81
+ end
82
+
83
+ empty_row
84
+ else
85
+ raise ArgumentError, "Expected Integer or Range, got #{number_or_range.class}"
72
86
  end
73
-
74
- empty_row
75
87
  end
76
88
 
77
89
  # Load headers in the top row of the worksheet. After parsing of headers
@@ -40,7 +40,7 @@ module Xsv
40
40
  when "c"
41
41
  @state = name
42
42
  @cell = attrs[:r]
43
- when "v"
43
+ when "v", "is"
44
44
  col = column_index(@cell)
45
45
  @max_column = col if col > @max_column
46
46
  @max_row = @row if @row > @max_row
@@ -34,7 +34,12 @@ module Xsv
34
34
  @store_characters = true
35
35
  when "row"
36
36
  @current_row = (@mode == :array) ? [] : @empty_row.dup
37
- @current_row_number = attrs[:r].to_i
37
+ if attrs[:r]
38
+ @current_row_number = attrs[:r].to_i
39
+ else
40
+ # Use position-based numbering when r attribute is missing
41
+ @current_row_number += 1
42
+ end
38
43
  end
39
44
  end
40
45
 
data/lib/xsv/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Xsv
4
- VERSION = "1.3.2"
4
+ VERSION = "1.4.1"
5
5
  end
data/xsv.gemspec CHANGED
@@ -36,13 +36,12 @@ Gem::Specification.new do |spec|
36
36
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
37
  spec.require_paths = ["lib"]
38
38
 
39
- spec.required_ruby_version = ">= 2.7"
39
+ spec.required_ruby_version = ">= 3.2"
40
40
 
41
- spec.add_dependency "rubyzip", ">= 1.3", "< 3"
41
+ spec.add_dependency "rubyzip", ">= 1.3", "< 4"
42
42
 
43
- spec.add_development_dependency "bundler", "< 3"
43
+ spec.add_development_dependency "bundler"
44
44
  spec.add_development_dependency "rake", "~> 13.2"
45
45
  spec.add_development_dependency "minitest", "~> 5.24"
46
- # Maintain Ruby 2.7 compatibility
47
- spec.add_development_dependency "standard", "1.37.0"
46
+ spec.add_development_dependency "standard", "~> 1.44"
48
47
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2024-12-25 00:00:00.000000000 Z
10
+ date: 2026-04-11 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: rubyzip
@@ -18,7 +18,7 @@ dependencies:
18
18
  version: '1.3'
19
19
  - - "<"
20
20
  - !ruby/object:Gem::Version
21
- version: '3'
21
+ version: '4'
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -28,21 +28,21 @@ dependencies:
28
28
  version: '1.3'
29
29
  - - "<"
30
30
  - !ruby/object:Gem::Version
31
- version: '3'
31
+ version: '4'
32
32
  - !ruby/object:Gem::Dependency
33
33
  name: bundler
34
34
  requirement: !ruby/object:Gem::Requirement
35
35
  requirements:
36
- - - "<"
36
+ - - ">="
37
37
  - !ruby/object:Gem::Version
38
- version: '3'
38
+ version: '0'
39
39
  type: :development
40
40
  prerelease: false
41
41
  version_requirements: !ruby/object:Gem::Requirement
42
42
  requirements:
43
- - - "<"
43
+ - - ">="
44
44
  - !ruby/object:Gem::Version
45
- version: '3'
45
+ version: '0'
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: rake
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -75,16 +75,16 @@ dependencies:
75
75
  name: standard
76
76
  requirement: !ruby/object:Gem::Requirement
77
77
  requirements:
78
- - - '='
78
+ - - "~>"
79
79
  - !ruby/object:Gem::Version
80
- version: 1.37.0
80
+ version: '1.44'
81
81
  type: :development
82
82
  prerelease: false
83
83
  version_requirements: !ruby/object:Gem::Requirement
84
84
  requirements:
85
- - - '='
85
+ - - "~>"
86
86
  - !ruby/object:Gem::Version
87
- version: 1.37.0
87
+ version: '1.44'
88
88
  description: |2
89
89
  Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
90
90
  (commonly known as Excel or .xlsx files). It strives to be minimal in the
@@ -96,6 +96,7 @@ executables: []
96
96
  extensions: []
97
97
  extra_rdoc_files: []
98
98
  files:
99
+ - ".github/workflows/release.yml"
99
100
  - ".github/workflows/ruby.yml"
100
101
  - ".gitignore"
101
102
  - ".standard.yml"
@@ -134,7 +135,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
134
135
  requirements:
135
136
  - - ">="
136
137
  - !ruby/object:Gem::Version
137
- version: '2.7'
138
+ version: '3.2'
138
139
  required_rubygems_version: !ruby/object:Gem::Requirement
139
140
  requirements:
140
141
  - - ">="