creek 2.4.3 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2e0adb7a48d121c8eaf3500f58e25d2be013ce5b24b6d647443ec3dd866fee31
4
- data.tar.gz: f92ee6b1dc6195d7f64c4ea189e8f47af5674ca36c31e667a5db16aa50446d58
3
+ metadata.gz: 0d90975898b7d260d37f6543755f8c2f4d5931d6e387a56c343bce23eee7e3b1
4
+ data.tar.gz: 0c381e483070a0e883f128e1a03147a5acf60c4a1095b42ef0d3106af7889f8b
5
5
  SHA512:
6
- metadata.gz: 64384e3b96b50047dbad96f797ad9772dc3dbb5ba2c6f2e99467a55cf6460df883d9eb77ed2dc9eaf820ef81ebb373556328e0e48b6155f4a63180e59d2b630e
7
- data.tar.gz: 5ae4f07b1eedc8ee58acaef4844f8b6abf02ef24d4e98298aa6ad842f31270ccb446908d886a55cb9b88d4e066b4c7a75cf03ab9648153243f518d6d774f7deb
6
+ metadata.gz: 1f23c318b54b45c2d1732c874e3f6519e7d528175a395b888e3352f302fb1f0b8625cdde965f16540081163282751dd410a712281651b9b3d3b91c9e89bec800
7
+ data.tar.gz: e601060be6ba35b3bc3ded42865bce8b46fc3ca92feec1439dc2e719f132bb2359d06129f1110dc6676d07f7707364fead716f8f584a5851388a1d183c6e6825
data/.gitignore CHANGED
@@ -15,3 +15,6 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+
19
+ # Mac finder artifacts
20
+ .DS_Store
data/README.md CHANGED
@@ -100,6 +100,14 @@ remote_url = 'http://dev-builds.libreoffice.org/tmp/test.xlsx'
100
100
  Creek::Book.new remote_url, remote: true
101
101
  ```
102
102
 
103
+ ## Mapping cells with header names
104
+ By default, Creek will map cell names with letter and number(A1, B3 and etc). To be able to get cell values by header column name use ***with_headers*** (can be used only with ***#simple_rows*** method!!!) during creation *(Note: header column is first string of sheet)*
105
+
106
+ ```ruby
107
+ creek = Creek::Book.new file.path, with_headers: true
108
+ ```
109
+
110
+
103
111
  ## Contributing
104
112
 
105
113
  Contributions are welcomed. You can fork a repository, add your code changes to the forked branch, ensure all existing unit tests pass, create new unit tests which cover your new changes and finally create a pull request.
@@ -118,6 +126,12 @@ Once this is complete, you should be able to run the test suite:
118
126
  rake
119
127
  ```
120
128
 
129
+ There are some remote tests that are excluded by default. To run those, run
130
+
131
+ ```
132
+ bundle exec rspec --tag remote
133
+ ```
134
+
121
135
  ## Bug Reporting
122
136
 
123
137
  Please use the [Issues](https://github.com/pythonicrubyist/creek/issues) page to report bugs or suggest new enhancements.
data/creek.gemspec CHANGED
@@ -20,12 +20,11 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.required_ruby_version = '>= 2.0.0'
22
22
 
23
- spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "bundler", "~> 2.1.2"
24
24
  spec.add_development_dependency "rake"
25
25
  spec.add_development_dependency 'rspec', '~> 3.6.0'
26
- spec.add_development_dependency 'pry'
26
+ spec.add_development_dependency 'pry-byebug'
27
27
 
28
- spec.add_dependency 'nokogiri', '>= 1.7.0'
28
+ spec.add_dependency 'nokogiri', '>= 1.10.0'
29
29
  spec.add_dependency 'rubyzip', '>= 1.0.0'
30
- spec.add_dependency 'http', '~> 4.0'
31
30
  end
data/lib/creek.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'creek/version'
2
4
  require 'creek/book'
3
5
  require 'creek/styles/constants'
data/lib/creek/book.rb CHANGED
@@ -1,15 +1,14 @@
1
1
  require 'zip/filesystem'
2
2
  require 'nokogiri'
3
3
  require 'date'
4
- require 'http'
4
+ require 'open-uri'
5
5
 
6
6
  module Creek
7
-
8
7
  class Creek::Book
9
-
10
8
  attr_reader :files,
11
9
  :sheets,
12
- :shared_strings
10
+ :shared_strings,
11
+ :with_headers
13
12
 
14
13
  DATE_1900 = Date.new(1899, 12, 30).freeze
15
14
  DATE_1904 = Date.new(1904, 1, 1).freeze
@@ -20,15 +19,10 @@ module Creek
20
19
  extension = File.extname(options[:original_filename] || path).downcase
21
20
  raise 'Not a valid file format.' unless (['.xlsx', '.xlsm'].include? extension)
22
21
  end
23
- if options[:remote]
24
- zipfile = Tempfile.new("file")
25
- zipfile.binmode
26
- zipfile.write(HTTP.get(path).to_s)
27
- zipfile.close
28
- path = zipfile.path
29
- end
22
+ path = download_file(path) if options[:remote]
30
23
  @files = Zip::File.open(path)
31
24
  @shared_strings = SharedStrings.new(self)
25
+ @with_headers = options.fetch(:with_headers, false)
32
26
  end
33
27
 
34
28
  def sheets
@@ -47,7 +41,17 @@ module Creek
47
41
  rels = Nokogiri::XML::Document.parse(rels_doc).css("Relationship")
48
42
  @sheets = xml.css(cssPrefix+'sheet').map do |sheet|
49
43
  sheetfile = rels.find { |el| sheet.attr("r:id") == el.attr("Id") }.attr("Target")
50
- Sheet.new(self, sheet.attr("name"), sheet.attr("sheetid"), sheet.attr("state"), sheet.attr("visible"), sheet.attr("r:id"), sheetfile)
44
+ sheet = Sheet.new(
45
+ self,
46
+ sheet.attr("name"),
47
+ sheet.attr("sheetid"),
48
+ sheet.attr("state"),
49
+ sheet.attr("visible"),
50
+ sheet.attr("r:id"),
51
+ sheetfile
52
+ )
53
+ sheet.with_headers = with_headers
54
+ sheet
51
55
  end
52
56
  end
53
57
 
@@ -79,5 +83,20 @@ module Creek
79
83
  result
80
84
  end
81
85
  end
86
+
87
+ private
88
+
89
+ def download_file(url)
90
+ # OpenUri will return a StringIO if under OpenURI::Buffer::StringMax
91
+ # threshold, and a Tempfile if over.
92
+ downloaded = URI(url).open
93
+ if downloaded.is_a? StringIO
94
+ path = Tempfile.new(['creek-file', '.xlsx']).path
95
+ File.binwrite(path, downloaded.read)
96
+ path
97
+ else
98
+ downloaded.path
99
+ end
100
+ end
82
101
  end
83
102
  end
data/lib/creek/drawing.rb CHANGED
@@ -83,7 +83,9 @@ module Creek
83
83
  col_to_selector = 'xdr:to/xdr:col'.freeze
84
84
 
85
85
  @drawings.xpath('//xdr:twoCellAnchor').each do |drawing|
86
- embed = drawing.xpath(image_selector).first.attributes['embed']
86
+ # embed = drawing.xpath(image_selector).first.attributes['embed']
87
+ temp = drawing.xpath(image_selector).first
88
+ embed = temp.attributes['embed'] if temp
87
89
  next if embed.nil?
88
90
 
89
91
  rid = embed.value
data/lib/creek/sheet.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'zip/filesystem'
2
4
  require 'nokogiri'
3
5
 
@@ -5,16 +7,19 @@ module Creek
5
7
  class Creek::Sheet
6
8
  include Creek::Utils
7
9
 
10
+ HEADERS_ROW_NUMBER = '1'
11
+
12
+ attr_accessor :with_headers
8
13
  attr_reader :book,
9
14
  :name,
10
15
  :sheetid,
11
16
  :state,
12
17
  :visible,
13
18
  :rid,
14
- :index
15
-
19
+ :index,
20
+ :headers
16
21
 
17
- def initialize book, name, sheetid, state, visible, rid, sheetfile
22
+ def initialize(book, name, sheetid, state, visible, rid, sheetfile)
18
23
  @book = book
19
24
  @name = name
20
25
  @sheetid = sheetid
@@ -46,7 +51,6 @@ module Creek
46
51
  @drawing.images_at(cell) if @images_present
47
52
  end
48
53
 
49
-
50
54
  ##
51
55
  # Provides an Enumerator that returns a hash representing each row.
52
56
  # The key of the hash is the column ID and the value is the value of the cell.
@@ -88,36 +92,39 @@ module Creek
88
92
  opener = Nokogiri::XML::Reader::TYPE_ELEMENT
89
93
  closer = Nokogiri::XML::Reader::TYPE_END_ELEMENT
90
94
  Enumerator.new do |y|
95
+ @headers = nil
91
96
  row, cells, cell = nil, {}, nil
92
- cell_type = nil
97
+ cell_type = nil
93
98
  cell_style_idx = nil
94
99
  @book.files.file.open(path) do |xml|
95
100
  Nokogiri::XML::Reader.from_io(xml).each do |node|
96
- if (node.name.eql? 'row') and (node.node_type.eql? opener)
101
+ if node.name == 'row' && node.node_type == opener
97
102
  row = node.attributes
98
- row['cells'] = Hash.new
99
- cells = Hash.new
103
+ row['cells'] = {}
104
+ cells = {}
100
105
  y << (include_meta_data ? row : cells) if node.self_closing?
101
- elsif (node.name.eql? 'row') and (node.node_type.eql? closer)
106
+ elsif node.name == 'row' && node.node_type == closer
102
107
  processed_cells = fill_in_empty_cells(cells, row['r'], cell, use_simple_rows_format)
108
+ @headers = processed_cells if with_headers && row['r'] == HEADERS_ROW_NUMBER
103
109
 
104
110
  if @images_present
105
111
  processed_cells.each do |cell_name, cell_value|
106
112
  next unless cell_value.nil?
113
+
107
114
  processed_cells[cell_name] = images_at(cell_name)
108
115
  end
109
116
  end
110
117
 
111
118
  row['cells'] = processed_cells
112
119
  y << (include_meta_data ? row : processed_cells)
113
- elsif (node.name.eql? 'c') and (node.node_type.eql? opener)
120
+ elsif node.name == 'c' && node.node_type == opener
114
121
  cell_type = node.attributes['t']
115
122
  cell_style_idx = node.attributes['s']
116
123
  cell = node.attributes['r']
117
- elsif (['v', 't'].include? node.name) and (node.node_type.eql? opener)
124
+ elsif %w[v t].include?(node.name) && node.node_type == opener
118
125
  unless cell.nil?
119
126
  node.read
120
- cells[(use_simple_rows_format ? cell.tr("0-9", "") : cell)] = convert(node.value, cell_type, cell_style_idx)
127
+ cells[cell] = convert(node.value, cell_type, cell_style_idx)
121
128
  end
122
129
  end
123
130
  end
@@ -142,15 +149,13 @@ module Creek
142
149
  # The unzipped XML file does not contain any node for empty cells.
143
150
  # Empty cells are being padded in using this function
144
151
  def fill_in_empty_cells(cells, row_number, last_col, use_simple_rows_format)
145
- new_cells = Hash.new
152
+ new_cells = {}
153
+ return new_cells if cells.empty?
146
154
 
147
- unless cells.empty?
148
- last_col = last_col.gsub(row_number, '')
149
-
150
- ("A"..last_col).to_a.each do |column|
151
- id = use_simple_rows_format ? "#{column}" : "#{column}#{row_number}"
152
- new_cells[id] = cells[id]
153
- end
155
+ last_col = last_col.gsub(row_number, '')
156
+ ('A'..last_col).to_a.each do |column|
157
+ id = cell_id(column, use_simple_rows_format, row_number)
158
+ new_cells[id] = cells["#{column}#{row_number}"]
154
159
  end
155
160
 
156
161
  new_cells
@@ -172,5 +177,11 @@ module Creek
172
177
  sheet_rels_filepath = expand_to_rels_path(sheet_filepath)
173
178
  parse_xml(sheet_rels_filepath).css("Relationship[@Id='#{drawing_rid}']").first.attributes['Target'].value
174
179
  end
180
+
181
+ def cell_id(column, use_simple_rows_format, row_number)
182
+ return "#{column}#{row_number}" unless use_simple_rows_format
183
+
184
+ with_headers && headers ? headers[column] : column
185
+ end
175
186
  end
176
187
  end
@@ -6,7 +6,7 @@ module Creek
6
6
  include Creek::Styles::Constants
7
7
 
8
8
  # Excel non-printable character escape sequence
9
- HEX_ESCAPE_REGEXP = /_x[0-9A-Za-z]{4}_/
9
+ HEX_ESCAPE_REGEXP = /_x[0-9A-Fa-f]{4}_/
10
10
 
11
11
  ##
12
12
  # The heart of typecasting. The ruby type is determined either explicitly
@@ -110,7 +110,7 @@ module Creek
110
110
 
111
111
  def self.convert_bignum(value)
112
112
  if defined?(BigDecimal)
113
- BigDecimal.new(value)
113
+ BigDecimal(value)
114
114
  else
115
115
  value.to_f
116
116
  end
data/lib/creek/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Creek
2
- VERSION = "2.4.3"
2
+ VERSION = "2.5.3"
3
3
  end
data/spec/sheet_spec.rb CHANGED
@@ -1,26 +1,26 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require './spec/spec_helper'
2
4
 
3
5
  describe 'sheet' do
4
6
  let(:book_with_images) { Creek::Book.new('spec/fixtures/sample-with-images.xlsx') }
5
- let(:book_no_images) { Creek::Book.new('spec/fixtures/sample.xlsx') }
6
7
  let(:sheetfile) { 'worksheets/sheet1.xml' }
7
8
  let(:sheet_with_images) { Creek::Sheet.new(book_with_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
8
- let(:sheet_no_images) { Creek::Sheet.new(book_no_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
9
9
 
10
10
  def load_cell(rows, cell_name)
11
- cell = rows.find { |row| !row[cell_name].nil? }
11
+ cell = rows.find { |row| row[cell_name] }
12
12
  cell[cell_name] if cell
13
13
  end
14
14
 
15
15
  context 'escaped ampersand' do
16
16
  let(:book_escaped) { Creek::Book.new('spec/fixtures/escaped.xlsx') }
17
17
  it 'does NOT escape ampersand' do
18
- expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
18
+ expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([%w[abc def], %w[ghi j&k]])
19
19
  end
20
20
 
21
21
  let(:book_escaped2) { Creek::Book.new('spec/fixtures/escaped2.xlsx') }
22
22
  it 'does escape ampersand' do
23
- expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
23
+ expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([%w[abc def], %w[ghi j&k]])
24
24
  end
25
25
  end
26
26
 
@@ -66,6 +66,9 @@ describe 'sheet' do
66
66
  end
67
67
 
68
68
  context 'with excel without images' do
69
+ let(:book_no_images) { Creek::Book.new('spec/fixtures/sample.xlsx') }
70
+ let(:sheet_no_images) { Creek::Sheet.new(book_no_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
71
+
69
72
  it 'does not break on with_images' do
70
73
  rows = sheet_no_images.with_images.rows.map { |r| r }
71
74
  expect(load_cell(rows, 'A10')).to eq(0.15)
@@ -94,4 +97,38 @@ describe 'sheet' do
94
97
  expect(image).to eq(nil)
95
98
  end
96
99
  end
100
+
101
+ describe '#simple_rows' do
102
+ let(:book_with_headers) { Creek::Book.new('spec/fixtures/sample-with-headers.xlsx') }
103
+ let(:sheet) { Creek::Sheet.new(book_with_headers, 'Sheet 1', 1, '', '', '1', sheetfile) }
104
+
105
+ subject { sheet.simple_rows.to_a[1] }
106
+
107
+ it 'returns values by letters' do
108
+ expect(subject['A']).to eq 'value1'
109
+ expect(subject['B']).to eq 'value2'
110
+ end
111
+
112
+ context 'when enable with_headers property' do
113
+ before { sheet.with_headers = true }
114
+
115
+ it 'returns values by headers name' do
116
+ expect(subject['HeaderA']).to eq 'value1'
117
+ expect(subject['HeaderB']).to eq 'value2'
118
+ expect(subject['HeaderC']).to eq 'value3'
119
+ end
120
+
121
+ it 'returns headers correctly when called multiple times' do
122
+ row = sheet.simple_rows.to_a[1]
123
+ expect(row['HeaderA']).to eq 'value1'
124
+ expect(row['HeaderB']).to eq 'value2'
125
+ expect(row['HeaderC']).to eq 'value3'
126
+
127
+ row = sheet.simple_rows.to_a[1]
128
+ expect(row['HeaderA']).to eq 'value1'
129
+ expect(row['HeaderB']).to eq 'value2'
130
+ expect(row['HeaderC']).to eq 'value3'
131
+ end
132
+ end
133
+ end
97
134
  end
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,7 @@
1
1
  require 'creek'
2
2
  require 'pry'
3
+ require 'time'
3
4
 
5
+ RSpec.configure do |config|
6
+ config.filter_run_excluding remote: true
7
+ end
data/spec/test_spec.rb CHANGED
@@ -106,6 +106,20 @@ describe 'Creek parsing a sample XLSX file' do
106
106
  expect(@creek).not_to be_nil
107
107
  end
108
108
 
109
+ it 'opens small remote files successfully', remote: true do
110
+ url = 'https://file-examples.com/wp-content/uploads/2017/02/file_example_XLSX_10.xlsx'
111
+ @creek = Creek::Book.new(url, remote: true)
112
+
113
+ expect(@creek.sheets[0]).to be_a Creek::Sheet
114
+ end
115
+
116
+ it 'opens large remote files successfully', remote: true do
117
+ url = 'http://www.house.leg.state.mn.us/comm/docs/BanaianZooExample.xlsx'
118
+ @creek = Creek::Book.new(url, remote: true)
119
+
120
+ expect(@creek.sheets[0]).to be_a Creek::Sheet
121
+ end
122
+
109
123
  it 'find sheets successfully.' do
110
124
  expect(@creek.sheets.count).to eq(1)
111
125
  sheet = @creek.sheets.first
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: creek
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.3
4
+ version: 2.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - pythonicrubyist
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-12 00:00:00.000000000 Z
11
+ date: 2021-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: 2.1.2
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: 2.1.2
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -53,7 +53,7 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: 3.6.0
55
55
  - !ruby/object:Gem::Dependency
56
- name: pry
56
+ name: pry-byebug
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: 1.7.0
75
+ version: 1.10.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
- version: 1.7.0
82
+ version: 1.10.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rubyzip
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.0.0
97
- - !ruby/object:Gem::Dependency
98
- name: http
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: '4.0'
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: '4.0'
111
97
  description: A Ruby gem that streams and parses large Excel(xlsx and xlsm) files fast
112
98
  and efficiently.
113
99
  email:
@@ -141,6 +127,7 @@ files:
141
127
  - spec/fixtures/invalid.xls
142
128
  - spec/fixtures/large_numbers.xlsx
143
129
  - spec/fixtures/sample-as-zip.zip
130
+ - spec/fixtures/sample-with-headers.xlsx
144
131
  - spec/fixtures/sample-with-images.xlsx
145
132
  - spec/fixtures/sample.xlsx
146
133
  - spec/fixtures/sample_dates.xlsx
@@ -175,8 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
162
  - !ruby/object:Gem::Version
176
163
  version: '0'
177
164
  requirements: []
178
- rubyforge_project:
179
- rubygems_version: 2.7.8
165
+ rubygems_version: 3.0.3
180
166
  signing_key:
181
167
  specification_version: 4
182
168
  summary: A Ruby gem for parsing large Excel(xlsx and xlsm) files.
@@ -188,6 +174,7 @@ test_files:
188
174
  - spec/fixtures/invalid.xls
189
175
  - spec/fixtures/large_numbers.xlsx
190
176
  - spec/fixtures/sample-as-zip.zip
177
+ - spec/fixtures/sample-with-headers.xlsx
191
178
  - spec/fixtures/sample-with-images.xlsx
192
179
  - spec/fixtures/sample.xlsx
193
180
  - spec/fixtures/sample_dates.xlsx