creek 2.4.1 → 2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +3 -0
- data/lib/creek/shared_strings.rb +2 -2
- data/lib/creek/sheet.rb +2 -1
- data/lib/creek/styles/converter.rb +12 -2
- data/lib/creek/version.rb +1 -1
- data/spec/fixtures/escaped.xlsx +0 -0
- data/spec/fixtures/escaped2.xlsx +0 -0
- data/spec/fixtures/sst.xml +3 -0
- data/spec/shared_string_spec.rb +2 -1
- data/spec/sheet_spec.rb +12 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 997fbfad1c0930d6cc9afa229a7d4e43f4f157ac2a8cda3df9863994a7b4a3fe
|
4
|
+
data.tar.gz: 38988cdecc3124a3ab9f6d70a99ab43a562b826475d17d04f6b556643aa56f8f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4b31ff774645765b4c7b06e314ac3dfbdcf61a164f77dbf68effefdd343fa3848373ebbcf07e82a12fd6141c39be711dc9fb127301867cb4a4eea70f21888ae
|
7
|
+
data.tar.gz: 841f1fd8bbb98db165913e1bbc3c8f3adef2b7c93e1c145c75049110a2386763a4ed2aa72a659f971297fdef95f664c561e0ded1902d7701b68c990a9e77827f
|
data/README.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
[](https://badge.fury.io/rb/creek)
|
2
|
+
[](https://ruby-gem-downloads-badge.herokuapp.com/creek?type=total&total_label=downloads)
|
3
|
+
|
1
4
|
# Creek - Stream parser for large Excel (xlsx and xlsm) files.
|
2
5
|
|
3
6
|
Creek is a Ruby gem that provides a fast, simple and efficient method of parsing large Excel (xlsx and xlsm) files.
|
data/lib/creek/shared_strings.rb
CHANGED
@@ -31,9 +31,9 @@ module Creek
|
|
31
31
|
xml.css('si').each_with_index do |si, idx|
|
32
32
|
text_nodes = si.css('t')
|
33
33
|
if text_nodes.count == 1 # plain text node
|
34
|
-
dictionary[idx] = text_nodes.first.content
|
34
|
+
dictionary[idx] = Creek::Styles::Converter.unescape_string(text_nodes.first.content)
|
35
35
|
else # rich text nodes with text fragments
|
36
|
-
dictionary[idx] = text_nodes.map(
|
36
|
+
dictionary[idx] = text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('')
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
data/lib/creek/sheet.rb
CHANGED
@@ -116,7 +116,8 @@ module Creek
|
|
116
116
|
cell = node.attributes['r']
|
117
117
|
elsif (['v', 't'].include? node.name) and (node.node_type.eql? opener)
|
118
118
|
unless cell.nil?
|
119
|
-
|
119
|
+
node.read
|
120
|
+
cells[(use_simple_rows_format ? cell.tr("0-9", "") : cell)] = convert(node.value, cell_type, cell_style_idx)
|
120
121
|
end
|
121
122
|
end
|
122
123
|
end
|
@@ -4,6 +4,10 @@ module Creek
|
|
4
4
|
class Styles
|
5
5
|
class Converter
|
6
6
|
include Creek::Styles::Constants
|
7
|
+
|
8
|
+
# Excel non-printable character escape sequence
|
9
|
+
HEX_ESCAPE_REGEXP = /_x[0-9A-Za-z]{4}_/
|
10
|
+
|
7
11
|
##
|
8
12
|
# The heart of typecasting. The ruby type is determined either explicitly
|
9
13
|
# from the cell xml or implicitly from the cell style, and this
|
@@ -45,9 +49,9 @@ module Creek
|
|
45
49
|
when 'b'
|
46
50
|
value.to_i == 1
|
47
51
|
when 'str'
|
48
|
-
value
|
52
|
+
unescape_string(value)
|
49
53
|
when 'inlineStr'
|
50
|
-
value
|
54
|
+
unescape_string(value)
|
51
55
|
|
52
56
|
##
|
53
57
|
# Type can also be determined by a style,
|
@@ -112,6 +116,12 @@ module Creek
|
|
112
116
|
end
|
113
117
|
end
|
114
118
|
|
119
|
+
def self.unescape_string(value)
|
120
|
+
# excel encodes some non-printable characters using a hex code in the format _xHHHH_
|
121
|
+
# e.g. Carriage Return (\r) is encoded as _x000D_
|
122
|
+
value.gsub(HEX_ESCAPE_REGEXP) { |match| match[2, 4].to_i(16).chr(Encoding::UTF_8) }
|
123
|
+
end
|
124
|
+
|
115
125
|
private
|
116
126
|
|
117
127
|
def self.base_date(options)
|
data/lib/creek/version.rb
CHANGED
Binary file
|
Binary file
|
data/spec/fixtures/sst.xml
CHANGED
data/spec/shared_string_spec.rb
CHANGED
@@ -7,12 +7,13 @@ describe 'shared strings' do
|
|
7
7
|
doc = Nokogiri::XML(shared_strings_xml_file)
|
8
8
|
dictionary = Creek::SharedStrings.parse_shared_string_from_document(doc)
|
9
9
|
|
10
|
-
expect(dictionary.keys.size).to eq(
|
10
|
+
expect(dictionary.keys.size).to eq(6)
|
11
11
|
expect(dictionary[0]).to eq('Cell A1')
|
12
12
|
expect(dictionary[1]).to eq('Cell B1')
|
13
13
|
expect(dictionary[2]).to eq('My Cell')
|
14
14
|
expect(dictionary[3]).to eq('Cell A2')
|
15
15
|
expect(dictionary[4]).to eq('Cell B2')
|
16
|
+
expect(dictionary[5]).to eq("Cell with\rescaped\rcharacters")
|
16
17
|
end
|
17
18
|
|
18
19
|
end
|
data/spec/sheet_spec.rb
CHANGED
@@ -12,6 +12,18 @@ describe 'sheet' do
|
|
12
12
|
cell[cell_name] if cell
|
13
13
|
end
|
14
14
|
|
15
|
+
context 'escaped ampersand' do
|
16
|
+
let(:book_escaped) { Creek::Book.new('spec/fixtures/escaped.xlsx') }
|
17
|
+
it 'does NOT escape ampersand' do
|
18
|
+
expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
|
19
|
+
end
|
20
|
+
|
21
|
+
let(:book_escaped2) { Creek::Book.new('spec/fixtures/escaped2.xlsx') }
|
22
|
+
it 'does escape ampersand' do
|
23
|
+
expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
15
27
|
describe '#rows' do
|
16
28
|
context 'with excel with images' do
|
17
29
|
context 'with images preloading' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: creek
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.4.
|
4
|
+
version: 2.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- pythonicrubyist
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -136,6 +136,8 @@ files:
|
|
136
136
|
- lib/creek/version.rb
|
137
137
|
- spec/.DS_Store
|
138
138
|
- spec/drawing_spec.rb
|
139
|
+
- spec/fixtures/escaped.xlsx
|
140
|
+
- spec/fixtures/escaped2.xlsx
|
139
141
|
- spec/fixtures/invalid.xls
|
140
142
|
- spec/fixtures/large_numbers.xlsx
|
141
143
|
- spec/fixtures/sample-as-zip.zip
|
@@ -174,13 +176,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
174
176
|
version: '0'
|
175
177
|
requirements: []
|
176
178
|
rubyforge_project:
|
177
|
-
rubygems_version: 2.
|
179
|
+
rubygems_version: 2.7.8
|
178
180
|
signing_key:
|
179
181
|
specification_version: 4
|
180
182
|
summary: A Ruby gem for parsing large Excel(xlsx and xlsm) files.
|
181
183
|
test_files:
|
182
184
|
- spec/.DS_Store
|
183
185
|
- spec/drawing_spec.rb
|
186
|
+
- spec/fixtures/escaped.xlsx
|
187
|
+
- spec/fixtures/escaped2.xlsx
|
184
188
|
- spec/fixtures/invalid.xls
|
185
189
|
- spec/fixtures/large_numbers.xlsx
|
186
190
|
- spec/fixtures/sample-as-zip.zip
|