creek 2.4.3 → 2.5.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2e0adb7a48d121c8eaf3500f58e25d2be013ce5b24b6d647443ec3dd866fee31
4
- data.tar.gz: f92ee6b1dc6195d7f64c4ea189e8f47af5674ca36c31e667a5db16aa50446d58
3
+ metadata.gz: 0d90975898b7d260d37f6543755f8c2f4d5931d6e387a56c343bce23eee7e3b1
4
+ data.tar.gz: 0c381e483070a0e883f128e1a03147a5acf60c4a1095b42ef0d3106af7889f8b
5
5
  SHA512:
6
- metadata.gz: 64384e3b96b50047dbad96f797ad9772dc3dbb5ba2c6f2e99467a55cf6460df883d9eb77ed2dc9eaf820ef81ebb373556328e0e48b6155f4a63180e59d2b630e
7
- data.tar.gz: 5ae4f07b1eedc8ee58acaef4844f8b6abf02ef24d4e98298aa6ad842f31270ccb446908d886a55cb9b88d4e066b4c7a75cf03ab9648153243f518d6d774f7deb
6
+ metadata.gz: 1f23c318b54b45c2d1732c874e3f6519e7d528175a395b888e3352f302fb1f0b8625cdde965f16540081163282751dd410a712281651b9b3d3b91c9e89bec800
7
+ data.tar.gz: e601060be6ba35b3bc3ded42865bce8b46fc3ca92feec1439dc2e719f132bb2359d06129f1110dc6676d07f7707364fead716f8f584a5851388a1d183c6e6825
data/.gitignore CHANGED
@@ -15,3 +15,6 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+
19
+ # Mac finder artifacts
20
+ .DS_Store
data/README.md CHANGED
@@ -100,6 +100,14 @@ remote_url = 'http://dev-builds.libreoffice.org/tmp/test.xlsx'
100
100
  Creek::Book.new remote_url, remote: true
101
101
  ```
102
102
 
103
+ ## Mapping cells with header names
104
+ By default, Creek will map cell names with letter and number(A1, B3 and etc). To be able to get cell values by header column name use ***with_headers*** (can be used only with ***#simple_rows*** method!!!) during creation *(Note: header column is first string of sheet)*
105
+
106
+ ```ruby
107
+ creek = Creek::Book.new file.path, with_headers: true
108
+ ```
109
+
110
+
103
111
  ## Contributing
104
112
 
105
113
  Contributions are welcomed. You can fork a repository, add your code changes to the forked branch, ensure all existing unit tests pass, create new unit tests which cover your new changes and finally create a pull request.
@@ -118,6 +126,12 @@ Once this is complete, you should be able to run the test suite:
118
126
  rake
119
127
  ```
120
128
 
129
+ There are some remote tests that are excluded by default. To run those, run
130
+
131
+ ```
132
+ bundle exec rspec --tag remote
133
+ ```
134
+
121
135
  ## Bug Reporting
122
136
 
123
137
  Please use the [Issues](https://github.com/pythonicrubyist/creek/issues) page to report bugs or suggest new enhancements.
data/creek.gemspec CHANGED
@@ -20,12 +20,11 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.required_ruby_version = '>= 2.0.0'
22
22
 
23
- spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "bundler", "~> 2.1.2"
24
24
  spec.add_development_dependency "rake"
25
25
  spec.add_development_dependency 'rspec', '~> 3.6.0'
26
- spec.add_development_dependency 'pry'
26
+ spec.add_development_dependency 'pry-byebug'
27
27
 
28
- spec.add_dependency 'nokogiri', '>= 1.7.0'
28
+ spec.add_dependency 'nokogiri', '>= 1.10.0'
29
29
  spec.add_dependency 'rubyzip', '>= 1.0.0'
30
- spec.add_dependency 'http', '~> 4.0'
31
30
  end
data/lib/creek.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'creek/version'
2
4
  require 'creek/book'
3
5
  require 'creek/styles/constants'
data/lib/creek/book.rb CHANGED
@@ -1,15 +1,14 @@
1
1
  require 'zip/filesystem'
2
2
  require 'nokogiri'
3
3
  require 'date'
4
- require 'http'
4
+ require 'open-uri'
5
5
 
6
6
  module Creek
7
-
8
7
  class Creek::Book
9
-
10
8
  attr_reader :files,
11
9
  :sheets,
12
- :shared_strings
10
+ :shared_strings,
11
+ :with_headers
13
12
 
14
13
  DATE_1900 = Date.new(1899, 12, 30).freeze
15
14
  DATE_1904 = Date.new(1904, 1, 1).freeze
@@ -20,15 +19,10 @@ module Creek
20
19
  extension = File.extname(options[:original_filename] || path).downcase
21
20
  raise 'Not a valid file format.' unless (['.xlsx', '.xlsm'].include? extension)
22
21
  end
23
- if options[:remote]
24
- zipfile = Tempfile.new("file")
25
- zipfile.binmode
26
- zipfile.write(HTTP.get(path).to_s)
27
- zipfile.close
28
- path = zipfile.path
29
- end
22
+ path = download_file(path) if options[:remote]
30
23
  @files = Zip::File.open(path)
31
24
  @shared_strings = SharedStrings.new(self)
25
+ @with_headers = options.fetch(:with_headers, false)
32
26
  end
33
27
 
34
28
  def sheets
@@ -47,7 +41,17 @@ module Creek
47
41
  rels = Nokogiri::XML::Document.parse(rels_doc).css("Relationship")
48
42
  @sheets = xml.css(cssPrefix+'sheet').map do |sheet|
49
43
  sheetfile = rels.find { |el| sheet.attr("r:id") == el.attr("Id") }.attr("Target")
50
- Sheet.new(self, sheet.attr("name"), sheet.attr("sheetid"), sheet.attr("state"), sheet.attr("visible"), sheet.attr("r:id"), sheetfile)
44
+ sheet = Sheet.new(
45
+ self,
46
+ sheet.attr("name"),
47
+ sheet.attr("sheetid"),
48
+ sheet.attr("state"),
49
+ sheet.attr("visible"),
50
+ sheet.attr("r:id"),
51
+ sheetfile
52
+ )
53
+ sheet.with_headers = with_headers
54
+ sheet
51
55
  end
52
56
  end
53
57
 
@@ -79,5 +83,20 @@ module Creek
79
83
  result
80
84
  end
81
85
  end
86
+
87
+ private
88
+
89
+ def download_file(url)
90
+ # OpenUri will return a StringIO if under OpenURI::Buffer::StringMax
91
+ # threshold, and a Tempfile if over.
92
+ downloaded = URI(url).open
93
+ if downloaded.is_a? StringIO
94
+ path = Tempfile.new(['creek-file', '.xlsx']).path
95
+ File.binwrite(path, downloaded.read)
96
+ path
97
+ else
98
+ downloaded.path
99
+ end
100
+ end
82
101
  end
83
102
  end
data/lib/creek/drawing.rb CHANGED
@@ -83,7 +83,9 @@ module Creek
83
83
  col_to_selector = 'xdr:to/xdr:col'.freeze
84
84
 
85
85
  @drawings.xpath('//xdr:twoCellAnchor').each do |drawing|
86
- embed = drawing.xpath(image_selector).first.attributes['embed']
86
+ # embed = drawing.xpath(image_selector).first.attributes['embed']
87
+ temp = drawing.xpath(image_selector).first
88
+ embed = temp.attributes['embed'] if temp
87
89
  next if embed.nil?
88
90
 
89
91
  rid = embed.value
data/lib/creek/sheet.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'zip/filesystem'
2
4
  require 'nokogiri'
3
5
 
@@ -5,16 +7,19 @@ module Creek
5
7
  class Creek::Sheet
6
8
  include Creek::Utils
7
9
 
10
+ HEADERS_ROW_NUMBER = '1'
11
+
12
+ attr_accessor :with_headers
8
13
  attr_reader :book,
9
14
  :name,
10
15
  :sheetid,
11
16
  :state,
12
17
  :visible,
13
18
  :rid,
14
- :index
15
-
19
+ :index,
20
+ :headers
16
21
 
17
- def initialize book, name, sheetid, state, visible, rid, sheetfile
22
+ def initialize(book, name, sheetid, state, visible, rid, sheetfile)
18
23
  @book = book
19
24
  @name = name
20
25
  @sheetid = sheetid
@@ -46,7 +51,6 @@ module Creek
46
51
  @drawing.images_at(cell) if @images_present
47
52
  end
48
53
 
49
-
50
54
  ##
51
55
  # Provides an Enumerator that returns a hash representing each row.
52
56
  # The key of the hash is the column ID and the value is the value of the cell.
@@ -88,36 +92,39 @@ module Creek
88
92
  opener = Nokogiri::XML::Reader::TYPE_ELEMENT
89
93
  closer = Nokogiri::XML::Reader::TYPE_END_ELEMENT
90
94
  Enumerator.new do |y|
95
+ @headers = nil
91
96
  row, cells, cell = nil, {}, nil
92
- cell_type = nil
97
+ cell_type = nil
93
98
  cell_style_idx = nil
94
99
  @book.files.file.open(path) do |xml|
95
100
  Nokogiri::XML::Reader.from_io(xml).each do |node|
96
- if (node.name.eql? 'row') and (node.node_type.eql? opener)
101
+ if node.name == 'row' && node.node_type == opener
97
102
  row = node.attributes
98
- row['cells'] = Hash.new
99
- cells = Hash.new
103
+ row['cells'] = {}
104
+ cells = {}
100
105
  y << (include_meta_data ? row : cells) if node.self_closing?
101
- elsif (node.name.eql? 'row') and (node.node_type.eql? closer)
106
+ elsif node.name == 'row' && node.node_type == closer
102
107
  processed_cells = fill_in_empty_cells(cells, row['r'], cell, use_simple_rows_format)
108
+ @headers = processed_cells if with_headers && row['r'] == HEADERS_ROW_NUMBER
103
109
 
104
110
  if @images_present
105
111
  processed_cells.each do |cell_name, cell_value|
106
112
  next unless cell_value.nil?
113
+
107
114
  processed_cells[cell_name] = images_at(cell_name)
108
115
  end
109
116
  end
110
117
 
111
118
  row['cells'] = processed_cells
112
119
  y << (include_meta_data ? row : processed_cells)
113
- elsif (node.name.eql? 'c') and (node.node_type.eql? opener)
120
+ elsif node.name == 'c' && node.node_type == opener
114
121
  cell_type = node.attributes['t']
115
122
  cell_style_idx = node.attributes['s']
116
123
  cell = node.attributes['r']
117
- elsif (['v', 't'].include? node.name) and (node.node_type.eql? opener)
124
+ elsif %w[v t].include?(node.name) && node.node_type == opener
118
125
  unless cell.nil?
119
126
  node.read
120
- cells[(use_simple_rows_format ? cell.tr("0-9", "") : cell)] = convert(node.value, cell_type, cell_style_idx)
127
+ cells[cell] = convert(node.value, cell_type, cell_style_idx)
121
128
  end
122
129
  end
123
130
  end
@@ -142,15 +149,13 @@ module Creek
142
149
  # The unzipped XML file does not contain any node for empty cells.
143
150
  # Empty cells are being padded in using this function
144
151
  def fill_in_empty_cells(cells, row_number, last_col, use_simple_rows_format)
145
- new_cells = Hash.new
152
+ new_cells = {}
153
+ return new_cells if cells.empty?
146
154
 
147
- unless cells.empty?
148
- last_col = last_col.gsub(row_number, '')
149
-
150
- ("A"..last_col).to_a.each do |column|
151
- id = use_simple_rows_format ? "#{column}" : "#{column}#{row_number}"
152
- new_cells[id] = cells[id]
153
- end
155
+ last_col = last_col.gsub(row_number, '')
156
+ ('A'..last_col).to_a.each do |column|
157
+ id = cell_id(column, use_simple_rows_format, row_number)
158
+ new_cells[id] = cells["#{column}#{row_number}"]
154
159
  end
155
160
 
156
161
  new_cells
@@ -172,5 +177,11 @@ module Creek
172
177
  sheet_rels_filepath = expand_to_rels_path(sheet_filepath)
173
178
  parse_xml(sheet_rels_filepath).css("Relationship[@Id='#{drawing_rid}']").first.attributes['Target'].value
174
179
  end
180
+
181
+ def cell_id(column, use_simple_rows_format, row_number)
182
+ return "#{column}#{row_number}" unless use_simple_rows_format
183
+
184
+ with_headers && headers ? headers[column] : column
185
+ end
175
186
  end
176
187
  end
@@ -6,7 +6,7 @@ module Creek
6
6
  include Creek::Styles::Constants
7
7
 
8
8
  # Excel non-printable character escape sequence
9
- HEX_ESCAPE_REGEXP = /_x[0-9A-Za-z]{4}_/
9
+ HEX_ESCAPE_REGEXP = /_x[0-9A-Fa-f]{4}_/
10
10
 
11
11
  ##
12
12
  # The heart of typecasting. The ruby type is determined either explicitly
@@ -110,7 +110,7 @@ module Creek
110
110
 
111
111
  def self.convert_bignum(value)
112
112
  if defined?(BigDecimal)
113
- BigDecimal.new(value)
113
+ BigDecimal(value)
114
114
  else
115
115
  value.to_f
116
116
  end
data/lib/creek/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Creek
2
- VERSION = "2.4.3"
2
+ VERSION = "2.5.3"
3
3
  end
data/spec/sheet_spec.rb CHANGED
@@ -1,26 +1,26 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require './spec/spec_helper'
2
4
 
3
5
  describe 'sheet' do
4
6
  let(:book_with_images) { Creek::Book.new('spec/fixtures/sample-with-images.xlsx') }
5
- let(:book_no_images) { Creek::Book.new('spec/fixtures/sample.xlsx') }
6
7
  let(:sheetfile) { 'worksheets/sheet1.xml' }
7
8
  let(:sheet_with_images) { Creek::Sheet.new(book_with_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
8
- let(:sheet_no_images) { Creek::Sheet.new(book_no_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
9
9
 
10
10
  def load_cell(rows, cell_name)
11
- cell = rows.find { |row| !row[cell_name].nil? }
11
+ cell = rows.find { |row| row[cell_name] }
12
12
  cell[cell_name] if cell
13
13
  end
14
14
 
15
15
  context 'escaped ampersand' do
16
16
  let(:book_escaped) { Creek::Book.new('spec/fixtures/escaped.xlsx') }
17
17
  it 'does NOT escape ampersand' do
18
- expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
18
+ expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([%w[abc def], %w[ghi j&k]])
19
19
  end
20
20
 
21
21
  let(:book_escaped2) { Creek::Book.new('spec/fixtures/escaped2.xlsx') }
22
22
  it 'does escape ampersand' do
23
- expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
23
+ expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([%w[abc def], %w[ghi j&k]])
24
24
  end
25
25
  end
26
26
 
@@ -66,6 +66,9 @@ describe 'sheet' do
66
66
  end
67
67
 
68
68
  context 'with excel without images' do
69
+ let(:book_no_images) { Creek::Book.new('spec/fixtures/sample.xlsx') }
70
+ let(:sheet_no_images) { Creek::Sheet.new(book_no_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
71
+
69
72
  it 'does not break on with_images' do
70
73
  rows = sheet_no_images.with_images.rows.map { |r| r }
71
74
  expect(load_cell(rows, 'A10')).to eq(0.15)
@@ -94,4 +97,38 @@ describe 'sheet' do
94
97
  expect(image).to eq(nil)
95
98
  end
96
99
  end
100
+
101
+ describe '#simple_rows' do
102
+ let(:book_with_headers) { Creek::Book.new('spec/fixtures/sample-with-headers.xlsx') }
103
+ let(:sheet) { Creek::Sheet.new(book_with_headers, 'Sheet 1', 1, '', '', '1', sheetfile) }
104
+
105
+ subject { sheet.simple_rows.to_a[1] }
106
+
107
+ it 'returns values by letters' do
108
+ expect(subject['A']).to eq 'value1'
109
+ expect(subject['B']).to eq 'value2'
110
+ end
111
+
112
+ context 'when enable with_headers property' do
113
+ before { sheet.with_headers = true }
114
+
115
+ it 'returns values by headers name' do
116
+ expect(subject['HeaderA']).to eq 'value1'
117
+ expect(subject['HeaderB']).to eq 'value2'
118
+ expect(subject['HeaderC']).to eq 'value3'
119
+ end
120
+
121
+ it 'returns headers correctly when called multiple times' do
122
+ row = sheet.simple_rows.to_a[1]
123
+ expect(row['HeaderA']).to eq 'value1'
124
+ expect(row['HeaderB']).to eq 'value2'
125
+ expect(row['HeaderC']).to eq 'value3'
126
+
127
+ row = sheet.simple_rows.to_a[1]
128
+ expect(row['HeaderA']).to eq 'value1'
129
+ expect(row['HeaderB']).to eq 'value2'
130
+ expect(row['HeaderC']).to eq 'value3'
131
+ end
132
+ end
133
+ end
97
134
  end
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,7 @@
1
1
  require 'creek'
2
2
  require 'pry'
3
+ require 'time'
3
4
 
5
+ RSpec.configure do |config|
6
+ config.filter_run_excluding remote: true
7
+ end
data/spec/test_spec.rb CHANGED
@@ -106,6 +106,20 @@ describe 'Creek parsing a sample XLSX file' do
106
106
  expect(@creek).not_to be_nil
107
107
  end
108
108
 
109
+ it 'opens small remote files successfully', remote: true do
110
+ url = 'https://file-examples.com/wp-content/uploads/2017/02/file_example_XLSX_10.xlsx'
111
+ @creek = Creek::Book.new(url, remote: true)
112
+
113
+ expect(@creek.sheets[0]).to be_a Creek::Sheet
114
+ end
115
+
116
+ it 'opens large remote files successfully', remote: true do
117
+ url = 'http://www.house.leg.state.mn.us/comm/docs/BanaianZooExample.xlsx'
118
+ @creek = Creek::Book.new(url, remote: true)
119
+
120
+ expect(@creek.sheets[0]).to be_a Creek::Sheet
121
+ end
122
+
109
123
  it 'find sheets successfully.' do
110
124
  expect(@creek.sheets.count).to eq(1)
111
125
  sheet = @creek.sheets.first
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: creek
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.3
4
+ version: 2.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - pythonicrubyist
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-12 00:00:00.000000000 Z
11
+ date: 2021-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: 2.1.2
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: 2.1.2
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -53,7 +53,7 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: 3.6.0
55
55
  - !ruby/object:Gem::Dependency
56
- name: pry
56
+ name: pry-byebug
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: 1.7.0
75
+ version: 1.10.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
- version: 1.7.0
82
+ version: 1.10.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rubyzip
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.0.0
97
- - !ruby/object:Gem::Dependency
98
- name: http
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: '4.0'
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: '4.0'
111
97
  description: A Ruby gem that streams and parses large Excel(xlsx and xlsm) files fast
112
98
  and efficiently.
113
99
  email:
@@ -141,6 +127,7 @@ files:
141
127
  - spec/fixtures/invalid.xls
142
128
  - spec/fixtures/large_numbers.xlsx
143
129
  - spec/fixtures/sample-as-zip.zip
130
+ - spec/fixtures/sample-with-headers.xlsx
144
131
  - spec/fixtures/sample-with-images.xlsx
145
132
  - spec/fixtures/sample.xlsx
146
133
  - spec/fixtures/sample_dates.xlsx
@@ -175,8 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
162
  - !ruby/object:Gem::Version
176
163
  version: '0'
177
164
  requirements: []
178
- rubyforge_project:
179
- rubygems_version: 2.7.8
165
+ rubygems_version: 3.0.3
180
166
  signing_key:
181
167
  specification_version: 4
182
168
  summary: A Ruby gem for parsing large Excel(xlsx and xlsm) files.
@@ -188,6 +174,7 @@ test_files:
188
174
  - spec/fixtures/invalid.xls
189
175
  - spec/fixtures/large_numbers.xlsx
190
176
  - spec/fixtures/sample-as-zip.zip
177
+ - spec/fixtures/sample-with-headers.xlsx
191
178
  - spec/fixtures/sample-with-images.xlsx
192
179
  - spec/fixtures/sample.xlsx
193
180
  - spec/fixtures/sample_dates.xlsx