xsv 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -1
- data/README.md +14 -10
- data/lib/xsv/sax_parser.rb +21 -13
- data/lib/xsv/shared_strings_parser.rb +1 -1
- data/lib/xsv/sheet.rb +6 -0
- data/lib/xsv/sheet_rows_handler.rb +1 -1
- data/lib/xsv/sheets_ids_handler.rb +1 -1
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +5 -2
- data/lib/xsv.rb +2 -0
- data/xsv.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 58e5d405e39f42d0e5287d47dd65c65b39a0ab5a2fc7fde3fd85c7211469e6e1
|
|
4
|
+
data.tar.gz: 7100a73ce192536f81a34ffbb1b431a793edf9cb71c1612a547a64a686a8330f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3b8fcbab2e2aa1f02dc0b51051a9b60dd2518b18b72007c2f3e77fa99248e864069d54b0bb43d783f8bb6ef79b6c2504c8cd05c244a2e9c85cddb882de224556
|
|
7
|
+
data.tar.gz: 3ec5120d8b6e365996985c75f4c291e3f4805e9876fd93ae2dfe071c5bd69ad751677cf71121ac23f6e9bed75ab80296b12346d79696cb6105974345d289bb7e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Xsv Changelog
|
|
2
2
|
|
|
3
|
+
## 1.2.0 2023-01-01
|
|
4
|
+
|
|
5
|
+
**This release contains the following minor breaking changes**
|
|
6
|
+
|
|
7
|
+
- Raise an error when entering hash mode on a sheet with duplicate headers to prevent unintentional behaviour (fixes #44)
|
|
8
|
+
- Xsv now returns frozen strings to further improve performance. This means it's no longer possible to call mutating methods on strings read from worksheets without unfreezing them first.
|
|
9
|
+
- Unescape all HTML entities in XML characters (thanks @til)
|
|
10
|
+
|
|
11
|
+
## 1.1.1 2022-04-01
|
|
12
|
+
|
|
13
|
+
- Improve compatibility with files generated by the Open XML SDK (#40)
|
|
14
|
+
|
|
3
15
|
## 1.1.0 2022-02-13
|
|
4
16
|
|
|
5
17
|
- New, shorter `Xsv.open` syntax as a drop-in replacement for `Xsv::Workbook.open`, which is still supported
|
|
@@ -115,4 +127,4 @@ Fix a Gemfile small Gemfile issue that broke the 0.3.3 and 0.3.4 releases
|
|
|
115
127
|
|
|
116
128
|
## 0.3.3 - 2020-03-02
|
|
117
129
|
|
|
118
|
-
|
|
130
|
+
Initial version with a changelog and reasonably complete YARD documentation.
|
data/README.md
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
# Xsv .xlsx reader
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
[](https://github.com/martijn/xsv/actions/workflows/ruby.yml)
|
|
6
|
+
[](https://app.codecov.io/gh/martijn/xsv)
|
|
7
|
+
[](https://rubydoc.info/github/martijn/xsv)
|
|
8
|
+
[](https://badge.fury.io/rb/xsv)
|
|
7
9
|
|
|
8
10
|
Xsv is a fast, lightweight, pure Ruby parser for ISO/IEC 29500 Office Open XML spreadsheet files
|
|
9
11
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
@@ -35,10 +37,9 @@ Or install it yourself as:
|
|
|
35
37
|
|
|
36
38
|
$ gem install xsv
|
|
37
39
|
|
|
38
|
-
Xsv targets ruby >= 2.
|
|
39
|
-
tested successfully with MRI, JRuby, and TruffleRuby.
|
|
40
|
-
|
|
41
|
-
when that becomes stable.
|
|
40
|
+
Xsv targets ruby >= 2.6 and has a just single dependency, `rubyzip`. It has been
|
|
41
|
+
tested successfully with MRI, JRuby, and TruffleRuby. It has no native extensions
|
|
42
|
+
and is designed to be thread-safe.
|
|
42
43
|
|
|
43
44
|
## Usage
|
|
44
45
|
|
|
@@ -84,8 +85,11 @@ sheet.parse_headers!
|
|
|
84
85
|
sheet[0] # => {"header1" => "value1", "header2" => "value2"}
|
|
85
86
|
```
|
|
86
87
|
|
|
87
|
-
|
|
88
|
-
|
|
88
|
+
Because of the way Ruby hashes work will raise `Xsv::DuplicateHeaders` if it detects
|
|
89
|
+
duplicate values in the header row when calling `#parse_headers!` or when opening
|
|
90
|
+
a workbook with `parse_headers: true`.
|
|
91
|
+
|
|
92
|
+
`Xsv::Sheet` implements `Enumerable` so along with `#each`
|
|
89
93
|
you can call methods like `#first`, `#filter`/`#select`, and `#map` on it.
|
|
90
94
|
|
|
91
95
|
### Opening a string or buffer instead of filename
|
data/lib/xsv/sax_parser.rb
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "cgi"
|
|
4
|
+
|
|
3
5
|
module Xsv
|
|
4
6
|
class SaxParser
|
|
5
|
-
ATTR_REGEX = /((\
|
|
7
|
+
ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/mn
|
|
6
8
|
|
|
7
9
|
def parse(io)
|
|
8
10
|
responds_to_end_element = respond_to?(:end_element)
|
|
@@ -36,14 +38,7 @@ module Xsv
|
|
|
36
38
|
chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
|
|
37
39
|
|
|
38
40
|
if responds_to_characters && !chars.empty?
|
|
39
|
-
|
|
40
|
-
chars.gsub!("&", "&")
|
|
41
|
-
chars.gsub!("'", "'")
|
|
42
|
-
chars.gsub!(">", ">")
|
|
43
|
-
chars.gsub!("<", "<")
|
|
44
|
-
chars.gsub!(""", '"')
|
|
45
|
-
end
|
|
46
|
-
characters(chars)
|
|
41
|
+
characters(CGI.unescapeHTML(chars))
|
|
47
42
|
end
|
|
48
43
|
|
|
49
44
|
state = :look_end
|
|
@@ -67,13 +62,15 @@ module Xsv
|
|
|
67
62
|
args = nil
|
|
68
63
|
end
|
|
69
64
|
|
|
65
|
+
stripped_tag_name = strip_namespace(tag_name)
|
|
66
|
+
|
|
70
67
|
if tag_name.start_with?("/")
|
|
71
|
-
end_element(tag_name[1..]) if responds_to_end_element
|
|
68
|
+
end_element(strip_namespace(tag_name[1..])) if responds_to_end_element
|
|
72
69
|
elsif args.nil?
|
|
73
|
-
start_element(
|
|
70
|
+
start_element(stripped_tag_name, nil)
|
|
74
71
|
else
|
|
75
|
-
start_element(
|
|
76
|
-
end_element(
|
|
72
|
+
start_element(stripped_tag_name, args.scan(ATTR_REGEX).each_with_object({}) { |(_, k, v), h| h[k.to_sym] = v })
|
|
73
|
+
end_element(stripped_tag_name) if responds_to_end_element && args.end_with?("/")
|
|
77
74
|
end
|
|
78
75
|
|
|
79
76
|
state = :look_start
|
|
@@ -85,5 +82,16 @@ module Xsv
|
|
|
85
82
|
end
|
|
86
83
|
end
|
|
87
84
|
end
|
|
85
|
+
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
# I am not proud of this, but there's simply no need to deal with xmlns for this application ¯\_(ツ)_/¯
|
|
89
|
+
def strip_namespace(tag)
|
|
90
|
+
if (offset = tag.index(":"))
|
|
91
|
+
tag[offset + 1..]
|
|
92
|
+
else
|
|
93
|
+
tag
|
|
94
|
+
end
|
|
95
|
+
end
|
|
88
96
|
end
|
|
89
97
|
end
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -83,6 +83,12 @@ module Xsv
|
|
|
83
83
|
# @return [self]
|
|
84
84
|
def parse_headers!
|
|
85
85
|
@headers = parse_headers
|
|
86
|
+
|
|
87
|
+
# Check for duplicate headers, but don't care about nil columns
|
|
88
|
+
if (duplicate_header = @headers.detect { |h| @headers.count(h) > 1 })
|
|
89
|
+
raise Xsv::DuplicateHeaders, "Duplicate header '#{duplicate_header}' found, consider parsing this sheet in array mode."
|
|
90
|
+
end
|
|
91
|
+
|
|
86
92
|
@mode = :hash
|
|
87
93
|
|
|
88
94
|
self
|
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
|
@@ -93,8 +93,11 @@ module Xsv
|
|
|
93
93
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
|
94
94
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
|
95
95
|
end.map do |entry|
|
|
96
|
-
rel = @relationships.detect
|
|
97
|
-
|
|
96
|
+
rel = @relationships.detect do |r|
|
|
97
|
+
entry.name.end_with?(r[:Target].sub(/^\//, "")) && # ignore leading / in some files
|
|
98
|
+
r[:Type].end_with?("worksheet")
|
|
99
|
+
end
|
|
100
|
+
sheet_ids = @sheet_ids.detect { |i| i[:id] == rel[:Id] }
|
|
98
101
|
Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids).tap do |sheet|
|
|
99
102
|
sheet.parse_headers! if mode == :hash
|
|
100
103
|
end
|
data/lib/xsv.rb
CHANGED
|
@@ -21,6 +21,8 @@ require "xsv/workbook"
|
|
|
21
21
|
module Xsv
|
|
22
22
|
class Error < StandardError; end
|
|
23
23
|
|
|
24
|
+
class DuplicateHeaders < StandardError; end
|
|
25
|
+
|
|
24
26
|
# An AssertionFailed error indicates an unexpected condition, meaning a bug
|
|
25
27
|
# or misinterpreted .xlsx document
|
|
26
28
|
class AssertionFailed < StandardError; end
|
data/xsv.gemspec
CHANGED
|
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
if spec.respond_to?(:metadata)
|
|
22
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
|
23
23
|
spec.metadata["source_code_uri"] = "https://github.com/martijn/xsv"
|
|
24
|
-
spec.metadata["changelog_uri"] = "https://
|
|
24
|
+
spec.metadata["changelog_uri"] = "https://raw.githubusercontent.com/martijn/xsv/main/CHANGELOG.md"
|
|
25
25
|
else
|
|
26
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
|
27
27
|
"public gem pushes."
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2023-01-01 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -141,7 +141,7 @@ licenses:
|
|
|
141
141
|
metadata:
|
|
142
142
|
homepage_uri: https://github.com/martijn/xsv
|
|
143
143
|
source_code_uri: https://github.com/martijn/xsv
|
|
144
|
-
changelog_uri: https://
|
|
144
|
+
changelog_uri: https://raw.githubusercontent.com/martijn/xsv/main/CHANGELOG.md
|
|
145
145
|
post_install_message:
|
|
146
146
|
rdoc_options: []
|
|
147
147
|
require_paths:
|