creek 2.4.1 → 2.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: ddab26a3269b8c9367e6e891bd9aff7e21c96310
4
- data.tar.gz: 1959b38cd75e8836c39b2e2d0f8e332a8145f7bd
2
+ SHA256:
3
+ metadata.gz: 997fbfad1c0930d6cc9afa229a7d4e43f4f157ac2a8cda3df9863994a7b4a3fe
4
+ data.tar.gz: 38988cdecc3124a3ab9f6d70a99ab43a562b826475d17d04f6b556643aa56f8f
5
5
  SHA512:
6
- metadata.gz: 35d72aab09ffc085e5869fe6a93d3249733820175e711749c7b13574f0545e9101cde0acb28dc800eb8be3900c46860095df32de728eda269724de3849a5a7d1
7
- data.tar.gz: 88eb9a761857575f87dcf45d72362e7bc4000c410ce7ccc924a54383987cf2a64fa3693a8b19a38ed4fc3544e8c6fc63b0aef540332f140aa70a15ca949bdda5
6
+ metadata.gz: f4b31ff774645765b4c7b06e314ac3dfbdcf61a164f77dbf68effefdd343fa3848373ebbcf07e82a12fd6141c39be711dc9fb127301867cb4a4eea70f21888ae
7
+ data.tar.gz: 841f1fd8bbb98db165913e1bbc3c8f3adef2b7c93e1c145c75049110a2386763a4ed2aa72a659f971297fdef95f664c561e0ded1902d7701b68c990a9e77827f
data/README.md CHANGED
@@ -1,3 +1,6 @@
1
+ [![version](https://badge.fury.io/rb/creek.svg)](https://badge.fury.io/rb/creek)
2
+ [![downloads](https://ruby-gem-downloads-badge.herokuapp.com/creek?type=total&total_label=downloads)](https://ruby-gem-downloads-badge.herokuapp.com/creek?type=total&total_label=downloads)
3
+
1
4
  # Creek - Stream parser for large Excel (xlsx and xlsm) files.
2
5
 
3
6
  Creek is a Ruby gem that provides a fast, simple and efficient method of parsing large Excel (xlsx and xlsm) files.
@@ -31,9 +31,9 @@ module Creek
31
31
  xml.css('si').each_with_index do |si, idx|
32
32
  text_nodes = si.css('t')
33
33
  if text_nodes.count == 1 # plain text node
34
- dictionary[idx] = text_nodes.first.content
34
+ dictionary[idx] = Creek::Styles::Converter.unescape_string(text_nodes.first.content)
35
35
  else # rich text nodes with text fragments
36
- dictionary[idx] = text_nodes.map(&:content).join('')
36
+ dictionary[idx] = text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('')
37
37
  end
38
38
  end
39
39
 
@@ -116,7 +116,8 @@ module Creek
116
116
  cell = node.attributes['r']
117
117
  elsif (['v', 't'].include? node.name) and (node.node_type.eql? opener)
118
118
  unless cell.nil?
119
- cells[(use_simple_rows_format ? cell.tr("0-9", "") : cell)] = convert(node.inner_xml, cell_type, cell_style_idx)
119
+ node.read
120
+ cells[(use_simple_rows_format ? cell.tr("0-9", "") : cell)] = convert(node.value, cell_type, cell_style_idx)
120
121
  end
121
122
  end
122
123
  end
@@ -4,6 +4,10 @@ module Creek
4
4
  class Styles
5
5
  class Converter
6
6
  include Creek::Styles::Constants
7
+
8
+ # Excel non-printable character escape sequence
9
+ HEX_ESCAPE_REGEXP = /_x[0-9A-Za-z]{4}_/
10
+
7
11
  ##
8
12
  # The heart of typecasting. The ruby type is determined either explicitly
9
13
  # from the cell xml or implicitly from the cell style, and this
@@ -45,9 +49,9 @@ module Creek
45
49
  when 'b'
46
50
  value.to_i == 1
47
51
  when 'str'
48
- value
52
+ unescape_string(value)
49
53
  when 'inlineStr'
50
- value
54
+ unescape_string(value)
51
55
 
52
56
  ##
53
57
  # Type can also be determined by a style,
@@ -112,6 +116,12 @@ module Creek
112
116
  end
113
117
  end
114
118
 
119
+ def self.unescape_string(value)
120
+ # excel encodes some non-printable characters using a hex code in the format _xHHHH_
121
+ # e.g. Carriage Return (\r) is encoded as _x000D_
122
+ value.gsub(HEX_ESCAPE_REGEXP) { |match| match[2, 4].to_i(16).chr(Encoding::UTF_8) }
123
+ end
124
+
115
125
  private
116
126
 
117
127
  def self.base_date(options)
@@ -1,3 +1,3 @@
1
1
  module Creek
2
- VERSION = "2.4.1"
2
+ VERSION = "2.4.2"
3
3
  end
@@ -75,4 +75,7 @@
75
75
  <t>B2</t>
76
76
  </r>
77
77
  </si>
78
+ <si>
79
+ <t>Cell with_x000D_escaped_x000D_characters</t>
80
+ </si>
78
81
  </sst>
@@ -7,12 +7,13 @@ describe 'shared strings' do
7
7
  doc = Nokogiri::XML(shared_strings_xml_file)
8
8
  dictionary = Creek::SharedStrings.parse_shared_string_from_document(doc)
9
9
 
10
- expect(dictionary.keys.size).to eq(5)
10
+ expect(dictionary.keys.size).to eq(6)
11
11
  expect(dictionary[0]).to eq('Cell A1')
12
12
  expect(dictionary[1]).to eq('Cell B1')
13
13
  expect(dictionary[2]).to eq('My Cell')
14
14
  expect(dictionary[3]).to eq('Cell A2')
15
15
  expect(dictionary[4]).to eq('Cell B2')
16
+ expect(dictionary[5]).to eq("Cell with\rescaped\rcharacters")
16
17
  end
17
18
 
18
19
  end
@@ -12,6 +12,18 @@ describe 'sheet' do
12
12
  cell[cell_name] if cell
13
13
  end
14
14
 
15
+ context 'escaped ampersand' do
16
+ let(:book_escaped) { Creek::Book.new('spec/fixtures/escaped.xlsx') }
17
+ it 'does NOT escape ampersand' do
18
+ expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
19
+ end
20
+
21
+ let(:book_escaped2) { Creek::Book.new('spec/fixtures/escaped2.xlsx') }
22
+ it 'does escape ampersand' do
23
+ expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
24
+ end
25
+ end
26
+
15
27
  describe '#rows' do
16
28
  context 'with excel with images' do
17
29
  context 'with images preloading' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: creek
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.1
4
+ version: 2.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - pythonicrubyist
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-13 00:00:00.000000000 Z
11
+ date: 2019-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -136,6 +136,8 @@ files:
136
136
  - lib/creek/version.rb
137
137
  - spec/.DS_Store
138
138
  - spec/drawing_spec.rb
139
+ - spec/fixtures/escaped.xlsx
140
+ - spec/fixtures/escaped2.xlsx
139
141
  - spec/fixtures/invalid.xls
140
142
  - spec/fixtures/large_numbers.xlsx
141
143
  - spec/fixtures/sample-as-zip.zip
@@ -174,13 +176,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
174
176
  version: '0'
175
177
  requirements: []
176
178
  rubyforge_project:
177
- rubygems_version: 2.6.14
179
+ rubygems_version: 2.7.8
178
180
  signing_key:
179
181
  specification_version: 4
180
182
  summary: A Ruby gem for parsing large Excel(xlsx and xlsm) files.
181
183
  test_files:
182
184
  - spec/.DS_Store
183
185
  - spec/drawing_spec.rb
186
+ - spec/fixtures/escaped.xlsx
187
+ - spec/fixtures/escaped2.xlsx
184
188
  - spec/fixtures/invalid.xls
185
189
  - spec/fixtures/large_numbers.xlsx
186
190
  - spec/fixtures/sample-as-zip.zip