creek 2.4.1 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: ddab26a3269b8c9367e6e891bd9aff7e21c96310
4
- data.tar.gz: 1959b38cd75e8836c39b2e2d0f8e332a8145f7bd
2
+ SHA256:
3
+ metadata.gz: 997fbfad1c0930d6cc9afa229a7d4e43f4f157ac2a8cda3df9863994a7b4a3fe
4
+ data.tar.gz: 38988cdecc3124a3ab9f6d70a99ab43a562b826475d17d04f6b556643aa56f8f
5
5
  SHA512:
6
- metadata.gz: 35d72aab09ffc085e5869fe6a93d3249733820175e711749c7b13574f0545e9101cde0acb28dc800eb8be3900c46860095df32de728eda269724de3849a5a7d1
7
- data.tar.gz: 88eb9a761857575f87dcf45d72362e7bc4000c410ce7ccc924a54383987cf2a64fa3693a8b19a38ed4fc3544e8c6fc63b0aef540332f140aa70a15ca949bdda5
6
+ metadata.gz: f4b31ff774645765b4c7b06e314ac3dfbdcf61a164f77dbf68effefdd343fa3848373ebbcf07e82a12fd6141c39be711dc9fb127301867cb4a4eea70f21888ae
7
+ data.tar.gz: 841f1fd8bbb98db165913e1bbc3c8f3adef2b7c93e1c145c75049110a2386763a4ed2aa72a659f971297fdef95f664c561e0ded1902d7701b68c990a9e77827f
data/README.md CHANGED
@@ -1,3 +1,6 @@
1
+ [![version](https://badge.fury.io/rb/creek.svg)](https://badge.fury.io/rb/creek)
2
+ [![downloads](https://ruby-gem-downloads-badge.herokuapp.com/creek?type=total&total_label=downloads)](https://ruby-gem-downloads-badge.herokuapp.com/creek?type=total&total_label=downloads)
3
+
1
4
  # Creek - Stream parser for large Excel (xlsx and xlsm) files.
2
5
 
3
6
  Creek is a Ruby gem that provides a fast, simple and efficient method of parsing large Excel (xlsx and xlsm) files.
@@ -31,9 +31,9 @@ module Creek
31
31
  xml.css('si').each_with_index do |si, idx|
32
32
  text_nodes = si.css('t')
33
33
  if text_nodes.count == 1 # plain text node
34
- dictionary[idx] = text_nodes.first.content
34
+ dictionary[idx] = Creek::Styles::Converter.unescape_string(text_nodes.first.content)
35
35
  else # rich text nodes with text fragments
36
- dictionary[idx] = text_nodes.map(&:content).join('')
36
+ dictionary[idx] = text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('')
37
37
  end
38
38
  end
39
39
 
@@ -116,7 +116,8 @@ module Creek
116
116
  cell = node.attributes['r']
117
117
  elsif (['v', 't'].include? node.name) and (node.node_type.eql? opener)
118
118
  unless cell.nil?
119
- cells[(use_simple_rows_format ? cell.tr("0-9", "") : cell)] = convert(node.inner_xml, cell_type, cell_style_idx)
119
+ node.read
120
+ cells[(use_simple_rows_format ? cell.tr("0-9", "") : cell)] = convert(node.value, cell_type, cell_style_idx)
120
121
  end
121
122
  end
122
123
  end
@@ -4,6 +4,10 @@ module Creek
4
4
  class Styles
5
5
  class Converter
6
6
  include Creek::Styles::Constants
7
+
8
+ # Excel non-printable character escape sequence
9
+ HEX_ESCAPE_REGEXP = /_x[0-9A-Za-z]{4}_/
10
+
7
11
  ##
8
12
  # The heart of typecasting. The ruby type is determined either explicitly
9
13
  # from the cell xml or implicitly from the cell style, and this
@@ -45,9 +49,9 @@ module Creek
45
49
  when 'b'
46
50
  value.to_i == 1
47
51
  when 'str'
48
- value
52
+ unescape_string(value)
49
53
  when 'inlineStr'
50
- value
54
+ unescape_string(value)
51
55
 
52
56
  ##
53
57
  # Type can also be determined by a style,
@@ -112,6 +116,12 @@ module Creek
112
116
  end
113
117
  end
114
118
 
119
+ def self.unescape_string(value)
120
+ # excel encodes some non-printable characters using a hex code in the format _xHHHH_
121
+ # e.g. Carriage Return (\r) is encoded as _x000D_
122
+ value.gsub(HEX_ESCAPE_REGEXP) { |match| match[2, 4].to_i(16).chr(Encoding::UTF_8) }
123
+ end
124
+
115
125
  private
116
126
 
117
127
  def self.base_date(options)
@@ -1,3 +1,3 @@
1
1
  module Creek
2
- VERSION = "2.4.1"
2
+ VERSION = "2.4.2"
3
3
  end
@@ -75,4 +75,7 @@
75
75
  <t>B2</t>
76
76
  </r>
77
77
  </si>
78
+ <si>
79
+ <t>Cell with_x000D_escaped_x000D_characters</t>
80
+ </si>
78
81
  </sst>
@@ -7,12 +7,13 @@ describe 'shared strings' do
7
7
  doc = Nokogiri::XML(shared_strings_xml_file)
8
8
  dictionary = Creek::SharedStrings.parse_shared_string_from_document(doc)
9
9
 
10
- expect(dictionary.keys.size).to eq(5)
10
+ expect(dictionary.keys.size).to eq(6)
11
11
  expect(dictionary[0]).to eq('Cell A1')
12
12
  expect(dictionary[1]).to eq('Cell B1')
13
13
  expect(dictionary[2]).to eq('My Cell')
14
14
  expect(dictionary[3]).to eq('Cell A2')
15
15
  expect(dictionary[4]).to eq('Cell B2')
16
+ expect(dictionary[5]).to eq("Cell with\rescaped\rcharacters")
16
17
  end
17
18
 
18
19
  end
@@ -12,6 +12,18 @@ describe 'sheet' do
12
12
  cell[cell_name] if cell
13
13
  end
14
14
 
15
+ context 'escaped ampersand' do
16
+ let(:book_escaped) { Creek::Book.new('spec/fixtures/escaped.xlsx') }
17
+ it 'does NOT escape ampersand' do
18
+ expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
19
+ end
20
+
21
+ let(:book_escaped2) { Creek::Book.new('spec/fixtures/escaped2.xlsx') }
22
+ it 'does escape ampersand' do
23
+ expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
24
+ end
25
+ end
26
+
15
27
  describe '#rows' do
16
28
  context 'with excel with images' do
17
29
  context 'with images preloading' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: creek
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.1
4
+ version: 2.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - pythonicrubyist
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-13 00:00:00.000000000 Z
11
+ date: 2019-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -136,6 +136,8 @@ files:
136
136
  - lib/creek/version.rb
137
137
  - spec/.DS_Store
138
138
  - spec/drawing_spec.rb
139
+ - spec/fixtures/escaped.xlsx
140
+ - spec/fixtures/escaped2.xlsx
139
141
  - spec/fixtures/invalid.xls
140
142
  - spec/fixtures/large_numbers.xlsx
141
143
  - spec/fixtures/sample-as-zip.zip
@@ -174,13 +176,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
174
176
  version: '0'
175
177
  requirements: []
176
178
  rubyforge_project:
177
- rubygems_version: 2.6.14
179
+ rubygems_version: 2.7.8
178
180
  signing_key:
179
181
  specification_version: 4
180
182
  summary: A Ruby gem for parsing large Excel(xlsx and xlsm) files.
181
183
  test_files:
182
184
  - spec/.DS_Store
183
185
  - spec/drawing_spec.rb
186
+ - spec/fixtures/escaped.xlsx
187
+ - spec/fixtures/escaped2.xlsx
184
188
  - spec/fixtures/invalid.xls
185
189
  - spec/fixtures/large_numbers.xlsx
186
190
  - spec/fixtures/sample-as-zip.zip