creek 2.4.1 → 2.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +3 -0
- data/lib/creek/shared_strings.rb +2 -2
- data/lib/creek/sheet.rb +2 -1
- data/lib/creek/styles/converter.rb +12 -2
- data/lib/creek/version.rb +1 -1
- data/spec/fixtures/escaped.xlsx +0 -0
- data/spec/fixtures/escaped2.xlsx +0 -0
- data/spec/fixtures/sst.xml +3 -0
- data/spec/shared_string_spec.rb +2 -1
- data/spec/sheet_spec.rb +12 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 997fbfad1c0930d6cc9afa229a7d4e43f4f157ac2a8cda3df9863994a7b4a3fe
|
4
|
+
data.tar.gz: 38988cdecc3124a3ab9f6d70a99ab43a562b826475d17d04f6b556643aa56f8f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4b31ff774645765b4c7b06e314ac3dfbdcf61a164f77dbf68effefdd343fa3848373ebbcf07e82a12fd6141c39be711dc9fb127301867cb4a4eea70f21888ae
|
7
|
+
data.tar.gz: 841f1fd8bbb98db165913e1bbc3c8f3adef2b7c93e1c145c75049110a2386763a4ed2aa72a659f971297fdef95f664c561e0ded1902d7701b68c990a9e77827f
|
data/README.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
[![version](https://badge.fury.io/rb/creek.svg)](https://badge.fury.io/rb/creek)
|
2
|
+
[![downloads](https://ruby-gem-downloads-badge.herokuapp.com/creek?type=total&total_label=downloads)](https://ruby-gem-downloads-badge.herokuapp.com/creek?type=total&total_label=downloads)
|
3
|
+
|
1
4
|
# Creek - Stream parser for large Excel (xlsx and xlsm) files.
|
2
5
|
|
3
6
|
Creek is a Ruby gem that provides a fast, simple and efficient method of parsing large Excel (xlsx and xlsm) files.
|
data/lib/creek/shared_strings.rb
CHANGED
@@ -31,9 +31,9 @@ module Creek
|
|
31
31
|
xml.css('si').each_with_index do |si, idx|
|
32
32
|
text_nodes = si.css('t')
|
33
33
|
if text_nodes.count == 1 # plain text node
|
34
|
-
dictionary[idx] = text_nodes.first.content
|
34
|
+
dictionary[idx] = Creek::Styles::Converter.unescape_string(text_nodes.first.content)
|
35
35
|
else # rich text nodes with text fragments
|
36
|
-
dictionary[idx] = text_nodes.map(
|
36
|
+
dictionary[idx] = text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('')
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
data/lib/creek/sheet.rb
CHANGED
@@ -116,7 +116,8 @@ module Creek
|
|
116
116
|
cell = node.attributes['r']
|
117
117
|
elsif (['v', 't'].include? node.name) and (node.node_type.eql? opener)
|
118
118
|
unless cell.nil?
|
119
|
-
|
119
|
+
node.read
|
120
|
+
cells[(use_simple_rows_format ? cell.tr("0-9", "") : cell)] = convert(node.value, cell_type, cell_style_idx)
|
120
121
|
end
|
121
122
|
end
|
122
123
|
end
|
@@ -4,6 +4,10 @@ module Creek
|
|
4
4
|
class Styles
|
5
5
|
class Converter
|
6
6
|
include Creek::Styles::Constants
|
7
|
+
|
8
|
+
# Excel non-printable character escape sequence
|
9
|
+
HEX_ESCAPE_REGEXP = /_x[0-9A-Za-z]{4}_/
|
10
|
+
|
7
11
|
##
|
8
12
|
# The heart of typecasting. The ruby type is determined either explicitly
|
9
13
|
# from the cell xml or implicitly from the cell style, and this
|
@@ -45,9 +49,9 @@ module Creek
|
|
45
49
|
when 'b'
|
46
50
|
value.to_i == 1
|
47
51
|
when 'str'
|
48
|
-
value
|
52
|
+
unescape_string(value)
|
49
53
|
when 'inlineStr'
|
50
|
-
value
|
54
|
+
unescape_string(value)
|
51
55
|
|
52
56
|
##
|
53
57
|
# Type can also be determined by a style,
|
@@ -112,6 +116,12 @@ module Creek
|
|
112
116
|
end
|
113
117
|
end
|
114
118
|
|
119
|
+
def self.unescape_string(value)
|
120
|
+
# excel encodes some non-printable characters using a hex code in the format _xHHHH_
|
121
|
+
# e.g. Carriage Return (\r) is encoded as _x000D_
|
122
|
+
value.gsub(HEX_ESCAPE_REGEXP) { |match| match[2, 4].to_i(16).chr(Encoding::UTF_8) }
|
123
|
+
end
|
124
|
+
|
115
125
|
private
|
116
126
|
|
117
127
|
def self.base_date(options)
|
data/lib/creek/version.rb
CHANGED
Binary file
|
Binary file
|
data/spec/fixtures/sst.xml
CHANGED
data/spec/shared_string_spec.rb
CHANGED
@@ -7,12 +7,13 @@ describe 'shared strings' do
|
|
7
7
|
doc = Nokogiri::XML(shared_strings_xml_file)
|
8
8
|
dictionary = Creek::SharedStrings.parse_shared_string_from_document(doc)
|
9
9
|
|
10
|
-
expect(dictionary.keys.size).to eq(
|
10
|
+
expect(dictionary.keys.size).to eq(6)
|
11
11
|
expect(dictionary[0]).to eq('Cell A1')
|
12
12
|
expect(dictionary[1]).to eq('Cell B1')
|
13
13
|
expect(dictionary[2]).to eq('My Cell')
|
14
14
|
expect(dictionary[3]).to eq('Cell A2')
|
15
15
|
expect(dictionary[4]).to eq('Cell B2')
|
16
|
+
expect(dictionary[5]).to eq("Cell with\rescaped\rcharacters")
|
16
17
|
end
|
17
18
|
|
18
19
|
end
|
data/spec/sheet_spec.rb
CHANGED
@@ -12,6 +12,18 @@ describe 'sheet' do
|
|
12
12
|
cell[cell_name] if cell
|
13
13
|
end
|
14
14
|
|
15
|
+
context 'escaped ampersand' do
|
16
|
+
let(:book_escaped) { Creek::Book.new('spec/fixtures/escaped.xlsx') }
|
17
|
+
it 'does NOT escape ampersand' do
|
18
|
+
expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
|
19
|
+
end
|
20
|
+
|
21
|
+
let(:book_escaped2) { Creek::Book.new('spec/fixtures/escaped2.xlsx') }
|
22
|
+
it 'does escape ampersand' do
|
23
|
+
expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
15
27
|
describe '#rows' do
|
16
28
|
context 'with excel with images' do
|
17
29
|
context 'with images preloading' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: creek
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.4.
|
4
|
+
version: 2.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- pythonicrubyist
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -136,6 +136,8 @@ files:
|
|
136
136
|
- lib/creek/version.rb
|
137
137
|
- spec/.DS_Store
|
138
138
|
- spec/drawing_spec.rb
|
139
|
+
- spec/fixtures/escaped.xlsx
|
140
|
+
- spec/fixtures/escaped2.xlsx
|
139
141
|
- spec/fixtures/invalid.xls
|
140
142
|
- spec/fixtures/large_numbers.xlsx
|
141
143
|
- spec/fixtures/sample-as-zip.zip
|
@@ -174,13 +176,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
174
176
|
version: '0'
|
175
177
|
requirements: []
|
176
178
|
rubyforge_project:
|
177
|
-
rubygems_version: 2.
|
179
|
+
rubygems_version: 2.7.8
|
178
180
|
signing_key:
|
179
181
|
specification_version: 4
|
180
182
|
summary: A Ruby gem for parsing large Excel(xlsx and xlsm) files.
|
181
183
|
test_files:
|
182
184
|
- spec/.DS_Store
|
183
185
|
- spec/drawing_spec.rb
|
186
|
+
- spec/fixtures/escaped.xlsx
|
187
|
+
- spec/fixtures/escaped2.xlsx
|
184
188
|
- spec/fixtures/invalid.xls
|
185
189
|
- spec/fixtures/large_numbers.xlsx
|
186
190
|
- spec/fixtures/sample-as-zip.zip
|