creek 2.5.1 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/README.md +8 -0
- data/lib/creek.rb +2 -0
- data/lib/creek/book.rb +14 -4
- data/lib/creek/sheet.rb +30 -20
- data/lib/creek/version.rb +1 -1
- data/spec/fixtures/sample-with-headers.xlsx +0 -0
- data/spec/sheet_spec.rb +32 -5
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1b9680127cd771d8b0840567db333a2a32369952bbeb09eee7364a9dbcdd2284
|
4
|
+
data.tar.gz: e39694d00b142e542457cc585d9b0e027386a8a3fcdb39b0cfe670f6e8d57b81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ffa323e9d0f5a9c7976750d4c802a10ca39517b7f2ed388c801c5109aa331a109128dfdfcc5a039e919a2f3e28b9099b9839face7594b1667ccf76cb0bef1bca
|
7
|
+
data.tar.gz: 6c5d6dc7b921d504490502b9e2c37def7112d23bb4a8f6d4a3a7e28d68bc6f297b0ff2864725c2186f302106c40543f4369a54c6884d3a3d908aaf0355539c6f
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -100,6 +100,14 @@ remote_url = 'http://dev-builds.libreoffice.org/tmp/test.xlsx'
|
|
100
100
|
Creek::Book.new remote_url, remote: true
|
101
101
|
```
|
102
102
|
|
103
|
+
## Mapping cells with header names
|
104
|
+
By default, Creek will map cell names with letter and number(A1, B3 and etc). To be able to get cell values by header column name use ***with_headers*** (can be used only with ***#simple_rows*** method!!!) during creation *(Note: header column is first string of sheet)*
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
creek = Creek::Book.new file.path, with_headers: true
|
108
|
+
```
|
109
|
+
|
110
|
+
|
103
111
|
## Contributing
|
104
112
|
|
105
113
|
Contributions are welcomed. You can fork a repository, add your code changes to the forked branch, ensure all existing unit tests pass, create new unit tests which cover your new changes and finally create a pull request.
|
data/lib/creek.rb
CHANGED
data/lib/creek/book.rb
CHANGED
@@ -4,12 +4,11 @@ require 'date'
|
|
4
4
|
require 'open-uri'
|
5
5
|
|
6
6
|
module Creek
|
7
|
-
|
8
7
|
class Creek::Book
|
9
|
-
|
10
8
|
attr_reader :files,
|
11
9
|
:sheets,
|
12
|
-
:shared_strings
|
10
|
+
:shared_strings,
|
11
|
+
:with_headers
|
13
12
|
|
14
13
|
DATE_1900 = Date.new(1899, 12, 30).freeze
|
15
14
|
DATE_1904 = Date.new(1904, 1, 1).freeze
|
@@ -23,6 +22,7 @@ module Creek
|
|
23
22
|
path = download_file(path) if options[:remote]
|
24
23
|
@files = Zip::File.open(path)
|
25
24
|
@shared_strings = SharedStrings.new(self)
|
25
|
+
@with_headers = options.fetch(:with_headers, false)
|
26
26
|
end
|
27
27
|
|
28
28
|
def sheets
|
@@ -41,7 +41,17 @@ module Creek
|
|
41
41
|
rels = Nokogiri::XML::Document.parse(rels_doc).css("Relationship")
|
42
42
|
@sheets = xml.css(cssPrefix+'sheet').map do |sheet|
|
43
43
|
sheetfile = rels.find { |el| sheet.attr("r:id") == el.attr("Id") }.attr("Target")
|
44
|
-
|
44
|
+
sheet = Sheet.new(
|
45
|
+
self,
|
46
|
+
sheet.attr("name"),
|
47
|
+
sheet.attr("sheetid"),
|
48
|
+
sheet.attr("state"),
|
49
|
+
sheet.attr("visible"),
|
50
|
+
sheet.attr("r:id"),
|
51
|
+
sheetfile
|
52
|
+
)
|
53
|
+
sheet.with_headers = with_headers
|
54
|
+
sheet
|
45
55
|
end
|
46
56
|
end
|
47
57
|
|
data/lib/creek/sheet.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'zip/filesystem'
|
2
4
|
require 'nokogiri'
|
3
5
|
|
@@ -5,16 +7,19 @@ module Creek
|
|
5
7
|
class Creek::Sheet
|
6
8
|
include Creek::Utils
|
7
9
|
|
10
|
+
HEADERS_ROW_NUMBER = '1'
|
11
|
+
|
12
|
+
attr_accessor :with_headers
|
8
13
|
attr_reader :book,
|
9
14
|
:name,
|
10
15
|
:sheetid,
|
11
16
|
:state,
|
12
17
|
:visible,
|
13
18
|
:rid,
|
14
|
-
:index
|
15
|
-
|
19
|
+
:index,
|
20
|
+
:headers
|
16
21
|
|
17
|
-
def initialize
|
22
|
+
def initialize(book, name, sheetid, state, visible, rid, sheetfile)
|
18
23
|
@book = book
|
19
24
|
@name = name
|
20
25
|
@sheetid = sheetid
|
@@ -46,7 +51,6 @@ module Creek
|
|
46
51
|
@drawing.images_at(cell) if @images_present
|
47
52
|
end
|
48
53
|
|
49
|
-
|
50
54
|
##
|
51
55
|
# Provides an Enumerator that returns a hash representing each row.
|
52
56
|
# The key of the hash is the column ID and the value is the value of the cell.
|
@@ -89,35 +93,37 @@ module Creek
|
|
89
93
|
closer = Nokogiri::XML::Reader::TYPE_END_ELEMENT
|
90
94
|
Enumerator.new do |y|
|
91
95
|
row, cells, cell = nil, {}, nil
|
92
|
-
cell_type
|
96
|
+
cell_type = nil
|
93
97
|
cell_style_idx = nil
|
94
98
|
@book.files.file.open(path) do |xml|
|
95
99
|
Nokogiri::XML::Reader.from_io(xml).each do |node|
|
96
|
-
if
|
100
|
+
if node.name == 'row' && node.node_type == opener
|
97
101
|
row = node.attributes
|
98
|
-
row['cells'] =
|
99
|
-
cells =
|
102
|
+
row['cells'] = {}
|
103
|
+
cells = {}
|
100
104
|
y << (include_meta_data ? row : cells) if node.self_closing?
|
101
|
-
elsif
|
105
|
+
elsif node.name == 'row' && node.node_type == closer
|
102
106
|
processed_cells = fill_in_empty_cells(cells, row['r'], cell, use_simple_rows_format)
|
107
|
+
@headers = processed_cells if row['r'] == HEADERS_ROW_NUMBER
|
103
108
|
|
104
109
|
if @images_present
|
105
110
|
processed_cells.each do |cell_name, cell_value|
|
106
111
|
next unless cell_value.nil?
|
112
|
+
|
107
113
|
processed_cells[cell_name] = images_at(cell_name)
|
108
114
|
end
|
109
115
|
end
|
110
116
|
|
111
117
|
row['cells'] = processed_cells
|
112
118
|
y << (include_meta_data ? row : processed_cells)
|
113
|
-
elsif
|
119
|
+
elsif node.name == 'c' && node.node_type == opener
|
114
120
|
cell_type = node.attributes['t']
|
115
121
|
cell_style_idx = node.attributes['s']
|
116
122
|
cell = node.attributes['r']
|
117
|
-
elsif
|
123
|
+
elsif %w[v t].include?(node.name) && node.node_type == opener
|
118
124
|
unless cell.nil?
|
119
125
|
node.read
|
120
|
-
cells[
|
126
|
+
cells[cell] = convert(node.value, cell_type, cell_style_idx)
|
121
127
|
end
|
122
128
|
end
|
123
129
|
end
|
@@ -142,15 +148,13 @@ module Creek
|
|
142
148
|
# The unzipped XML file does not contain any node for empty cells.
|
143
149
|
# Empty cells are being padded in using this function
|
144
150
|
def fill_in_empty_cells(cells, row_number, last_col, use_simple_rows_format)
|
145
|
-
new_cells =
|
151
|
+
new_cells = {}
|
152
|
+
return new_cells if cells.empty?
|
146
153
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
id = use_simple_rows_format ? "#{column}" : "#{column}#{row_number}"
|
152
|
-
new_cells[id] = cells[id]
|
153
|
-
end
|
154
|
+
last_col = last_col.gsub(row_number, '')
|
155
|
+
('A'..last_col).to_a.each do |column|
|
156
|
+
id = cell_id(column, use_simple_rows_format, row_number)
|
157
|
+
new_cells[id] = cells["#{column}#{row_number}"]
|
154
158
|
end
|
155
159
|
|
156
160
|
new_cells
|
@@ -172,5 +176,11 @@ module Creek
|
|
172
176
|
sheet_rels_filepath = expand_to_rels_path(sheet_filepath)
|
173
177
|
parse_xml(sheet_rels_filepath).css("Relationship[@Id='#{drawing_rid}']").first.attributes['Target'].value
|
174
178
|
end
|
179
|
+
|
180
|
+
def cell_id(column, use_simple_rows_format, row_number = '')
|
181
|
+
return "#{column}#{row_number}" unless use_simple_rows_format
|
182
|
+
|
183
|
+
with_headers && headers ? headers[column] : column
|
184
|
+
end
|
175
185
|
end
|
176
186
|
end
|
data/lib/creek/version.rb
CHANGED
Binary file
|
data/spec/sheet_spec.rb
CHANGED
@@ -1,26 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require './spec/spec_helper'
|
2
4
|
|
3
5
|
describe 'sheet' do
|
4
6
|
let(:book_with_images) { Creek::Book.new('spec/fixtures/sample-with-images.xlsx') }
|
5
|
-
let(:book_no_images) { Creek::Book.new('spec/fixtures/sample.xlsx') }
|
6
7
|
let(:sheetfile) { 'worksheets/sheet1.xml' }
|
7
8
|
let(:sheet_with_images) { Creek::Sheet.new(book_with_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
|
8
|
-
let(:sheet_no_images) { Creek::Sheet.new(book_no_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
|
9
9
|
|
10
10
|
def load_cell(rows, cell_name)
|
11
|
-
cell = rows.find { |row|
|
11
|
+
cell = rows.find { |row| row[cell_name] }
|
12
12
|
cell[cell_name] if cell
|
13
13
|
end
|
14
14
|
|
15
15
|
context 'escaped ampersand' do
|
16
16
|
let(:book_escaped) { Creek::Book.new('spec/fixtures/escaped.xlsx') }
|
17
17
|
it 'does NOT escape ampersand' do
|
18
|
-
expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([[
|
18
|
+
expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([%w[abc def], %w[ghi j&k]])
|
19
19
|
end
|
20
20
|
|
21
21
|
let(:book_escaped2) { Creek::Book.new('spec/fixtures/escaped2.xlsx') }
|
22
22
|
it 'does escape ampersand' do
|
23
|
-
expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([[
|
23
|
+
expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([%w[abc def], %w[ghi j&k]])
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
@@ -66,6 +66,9 @@ describe 'sheet' do
|
|
66
66
|
end
|
67
67
|
|
68
68
|
context 'with excel without images' do
|
69
|
+
let(:book_no_images) { Creek::Book.new('spec/fixtures/sample.xlsx') }
|
70
|
+
let(:sheet_no_images) { Creek::Sheet.new(book_no_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
|
71
|
+
|
69
72
|
it 'does not break on with_images' do
|
70
73
|
rows = sheet_no_images.with_images.rows.map { |r| r }
|
71
74
|
expect(load_cell(rows, 'A10')).to eq(0.15)
|
@@ -94,4 +97,28 @@ describe 'sheet' do
|
|
94
97
|
expect(image).to eq(nil)
|
95
98
|
end
|
96
99
|
end
|
100
|
+
|
101
|
+
describe '#simple_rows' do
|
102
|
+
let(:book_with_headers) { Creek::Book.new('spec/fixtures/sample-with-headers.xlsx') }
|
103
|
+
let(:sheet) { Creek::Sheet.new(book_with_headers, 'Sheet 1', 1, '', '', '1', sheetfile) }
|
104
|
+
|
105
|
+
subject { sheet.simple_rows.to_a[1] }
|
106
|
+
|
107
|
+
it 'returns values by letters' do
|
108
|
+
expect(subject['A']).to eq 'value1'
|
109
|
+
expect(subject['B']).to eq 'value2'
|
110
|
+
end
|
111
|
+
|
112
|
+
context 'when enable with_headers property' do
|
113
|
+
before { sheet.with_headers = true }
|
114
|
+
|
115
|
+
subject { sheet.simple_rows.to_a[1] }
|
116
|
+
|
117
|
+
it 'returns values by headers name' do
|
118
|
+
expect(subject['HeaderA']).to eq 'value1'
|
119
|
+
expect(subject['HeaderB']).to eq 'value2'
|
120
|
+
expect(subject['HeaderC']).to eq 'value3'
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
97
124
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: creek
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.5.
|
4
|
+
version: 2.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- pythonicrubyist
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-04-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -127,6 +127,7 @@ files:
|
|
127
127
|
- spec/fixtures/invalid.xls
|
128
128
|
- spec/fixtures/large_numbers.xlsx
|
129
129
|
- spec/fixtures/sample-as-zip.zip
|
130
|
+
- spec/fixtures/sample-with-headers.xlsx
|
130
131
|
- spec/fixtures/sample-with-images.xlsx
|
131
132
|
- spec/fixtures/sample.xlsx
|
132
133
|
- spec/fixtures/sample_dates.xlsx
|
@@ -161,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
162
|
- !ruby/object:Gem::Version
|
162
163
|
version: '0'
|
163
164
|
requirements: []
|
164
|
-
rubygems_version: 3.0.
|
165
|
+
rubygems_version: 3.0.8
|
165
166
|
signing_key:
|
166
167
|
specification_version: 4
|
167
168
|
summary: A Ruby gem for parsing large Excel(xlsx and xlsm) files.
|
@@ -173,6 +174,7 @@ test_files:
|
|
173
174
|
- spec/fixtures/invalid.xls
|
174
175
|
- spec/fixtures/large_numbers.xlsx
|
175
176
|
- spec/fixtures/sample-as-zip.zip
|
177
|
+
- spec/fixtures/sample-with-headers.xlsx
|
176
178
|
- spec/fixtures/sample-with-images.xlsx
|
177
179
|
- spec/fixtures/sample.xlsx
|
178
180
|
- spec/fixtures/sample_dates.xlsx
|