roo 2.0.1 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/CHANGELOG.md +16 -1
- data/README.md +8 -11
- data/lib/roo/base.rb +84 -84
- data/lib/roo/constants.rb +5 -0
- data/lib/roo/excelx/shared_strings.rb +10 -0
- data/lib/roo/excelx.rb +12 -13
- data/lib/roo/libre_office.rb +1 -2
- data/lib/roo/open_office.rb +454 -521
- data/lib/roo/spreadsheet.rb +3 -1
- data/lib/roo/version.rb +1 -1
- data/lib/roo.rb +5 -3
- data/spec/helpers.rb +5 -0
- data/spec/lib/roo/base_spec.rb +212 -0
- data/spec/lib/roo/excelx_spec.rb +13 -0
- data/spec/lib/roo/spreadsheet_spec.rb +20 -0
- data/spec/spec_helper.rb +6 -1
- data/test/all_ss.rb +12 -11
- data/test/test_helper.rb +0 -4
- data/test/test_roo.rb +2091 -2088
- metadata +5 -3
- data/test/test_generic_spreadsheet.rb +0 -237
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53d37d09a786f9536fd5bb00c7cacf9888c8b032
|
4
|
+
data.tar.gz: 284974b328b26668c58e3b952a12b8777439037f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b0e6d58d52600710fc5feb49e7483fc81e4bca65fa9013a3fa1004f5526107392b02f982ae981676544e301056a0246b4b85e38d3959565174c247ce49695a9
|
7
|
+
data.tar.gz: 5c3beb6580bece8aa730d1f29b6316889ebb7ce5a7003d8f654edcb1c9800a9f939021fd0d88749489ffdee6ebf496739664e5fede43aeb0067a2a16ad46f2f5
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
## [2.1.1] - 2015-08-02
|
2
|
+
### Fixed invalid new lines with _x000D_ character[#231](https://github.com/roo-rb/roo/pull/231)
|
3
|
+
### Fixed missing URI issue. [#245](https://github.com/roo-rb/roo/pull/245)
|
4
|
+
|
5
|
+
## [2.1.0] - 2015-07-18
|
6
|
+
### Added
|
7
|
+
- Added support for Excel 2007 `xlsm` files. [#232](https://github.com/roo-rb/roo/pull/232)
|
8
|
+
- Roo::Excelx returns an enumerator when calling each_row_streaming without a block. [#224](https://github.com/roo-rb/roo/pull/224)
|
9
|
+
- Returns an enumerator when calling `each` without a block. [#219](https://github.com/roo-rb/roo/pull/219)
|
10
|
+
|
11
|
+
### Fixed
|
12
|
+
- Removed tabs and windows CRLF. [#235](https://github.com/roo-rb/roo/pull/235), [#234](https://github.com/roo-rb/roo/pull/234)
|
13
|
+
- Fixed Regexp to only check for valid URI's when opening a spreadsheet. [#229](https://github.com/roo-rb/roo/pull/228)
|
14
|
+
- Open streams in Roo:Excelx correctly. [#222](https://github.com/roo-rb/roo/pull/222)
|
15
|
+
|
1
16
|
## [2.0.1] - 2015-06-01
|
2
17
|
### Added
|
3
18
|
- Return an enumerator when calling '#each' without a block [#219](https://github.com/roo-rb/roo/pull/219)
|
@@ -464,7 +479,7 @@
|
|
464
479
|
|
465
480
|
## [0.2.4] - 2007-06-16
|
466
481
|
### Fixed
|
467
|
-
- ID 11605
|
482
|
+
- ID 11605 Two cols with same value: crash roo (openoffice version only)
|
468
483
|
|
469
484
|
## [0.2.3] - 2007-06-02
|
470
485
|
### Changed / Added
|
data/README.md
CHANGED
@@ -3,16 +3,13 @@
|
|
3
3
|
[](https://travis-ci.org/roo-rb/roo) [](https://codeclimate.com/github/roo-rb/roo) [](https://coveralls.io/r/roo-rb/roo) [](https://rubygems.org/gems/roo)
|
4
4
|
|
5
5
|
Roo implements read access for all common spreadsheet types. It can handle:
|
6
|
-
|
7
|
-
*
|
8
|
-
* OpenOffice / LibreOffice
|
6
|
+
* Excel 2007 - 2013 formats (xlsx, xlsm)
|
7
|
+
* LibreOffice / OpenOffice.org formats (ods)
|
9
8
|
* CSV
|
9
|
+
* Excel 97, Excel 2002 XML, and Excel 2003 XML formats when using the [roo-xls](https://github.com/roo-rb/roo-xls) gem (xls, xml)
|
10
|
+
* Google spreadsheets with read/write access when using [roo-google](https://github.com/roo-rb/roo-google)
|
10
11
|
|
11
|
-
##
|
12
|
-
|
13
|
-
In addition, the [roo-xls](https://github.com/roo-rb/roo-xls) and [roo-google](https://github.com/roo-rb/roo-google) gems exist to extend Roo to support reading classic Excel formats (i.e. `.xls` and ``Excel2003XML``) and read/write access for Google spreadsheets.
|
14
|
-
|
15
|
-
# #Installation
|
12
|
+
## Installation
|
16
13
|
|
17
14
|
Install as a gem
|
18
15
|
|
@@ -21,7 +18,7 @@ Install as a gem
|
|
21
18
|
Or add it to your Gemfile
|
22
19
|
|
23
20
|
```ruby
|
24
|
-
gem 'roo', '~> 2.
|
21
|
+
gem 'roo', '~> 2.1.0'
|
25
22
|
```
|
26
23
|
## Usage
|
27
24
|
|
@@ -102,7 +99,7 @@ s.cell(1,'A',s.sheets[1])
|
|
102
99
|
```
|
103
100
|
|
104
101
|
#### Querying a spreadsheet
|
105
|
-
Use ``each``
|
102
|
+
Use ``each`` to iterate over each row.
|
106
103
|
|
107
104
|
If each is given a hash with the names of some columns, then each will generate a hash with the columns supplied for each row.
|
108
105
|
|
@@ -143,7 +140,7 @@ sheet.to_xml
|
|
143
140
|
sheet.to_yaml
|
144
141
|
```
|
145
142
|
|
146
|
-
### Excel (xlsx) Support
|
143
|
+
### Excel (xlsx and xlsm) Support
|
147
144
|
|
148
145
|
Stream rows from an Excelx spreadsheet.
|
149
146
|
|
data/lib/roo/base.rb
CHANGED
@@ -91,7 +91,7 @@ class Roo::Base
|
|
91
91
|
first_column = [first_column, key.last.to_i].min
|
92
92
|
last_column = [last_column, key.last.to_i].max
|
93
93
|
end if @cell[sheet]
|
94
|
-
{first_row: first_row, first_column: first_column, last_row: last_row, last_column: last_column}
|
94
|
+
{ first_row: first_row, first_column: first_column, last_row: last_row, last_column: last_column }
|
95
95
|
end
|
96
96
|
|
97
97
|
%w(first_row last_row first_column last_column).each do |key|
|
@@ -117,22 +117,23 @@ class Roo::Base
|
|
117
117
|
result = "--- \n"
|
118
118
|
from_row.upto(to_row) do |row|
|
119
119
|
from_column.upto(to_column) do |col|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
result << " value: #{value} \n"
|
120
|
+
next if empty?(row, col, sheet)
|
121
|
+
|
122
|
+
result << "cell_#{row}_#{col}: \n"
|
123
|
+
prefix.each do|k, v|
|
124
|
+
result << " #{k}: #{v} \n"
|
125
|
+
end
|
126
|
+
result << " row: #{row} \n"
|
127
|
+
result << " col: #{col} \n"
|
128
|
+
result << " celltype: #{celltype(row, col, sheet)} \n"
|
129
|
+
value = cell(row, col, sheet)
|
130
|
+
if celltype(row, col, sheet) == :time
|
131
|
+
value = integer_to_timestring(value)
|
133
132
|
end
|
133
|
+
result << " value: #{value} \n"
|
134
134
|
end
|
135
135
|
end
|
136
|
+
|
136
137
|
result
|
137
138
|
end
|
138
139
|
|
@@ -170,7 +171,7 @@ class Roo::Base
|
|
170
171
|
end
|
171
172
|
|
172
173
|
def inspect
|
173
|
-
"<##{
|
174
|
+
"<##{self.class}:#{object_id.to_s(8)} #{instance_variables.join(' ')}>"
|
174
175
|
end
|
175
176
|
|
176
177
|
# find a row either by row number or a condition
|
@@ -217,7 +218,7 @@ class Roo::Base
|
|
217
218
|
row, col = normalize(row, col)
|
218
219
|
cell_type = cell_type_by_value(value)
|
219
220
|
set_value(row, col, value, sheet)
|
220
|
-
set_type(row, col, cell_type
|
221
|
+
set_type(row, col, cell_type, sheet)
|
221
222
|
end
|
222
223
|
|
223
224
|
def cell_type_by_value(value)
|
@@ -225,7 +226,7 @@ class Roo::Base
|
|
225
226
|
when Fixnum then :float
|
226
227
|
when String, Float then :string
|
227
228
|
else
|
228
|
-
|
229
|
+
fail ArgumentError, "Type for #{value} not set"
|
229
230
|
end
|
230
231
|
end
|
231
232
|
|
@@ -256,13 +257,13 @@ class Roo::Base
|
|
256
257
|
sheets.each do|sheet|
|
257
258
|
self.default_sheet = sheet
|
258
259
|
result << 'Sheet ' + n.to_s + ":\n"
|
259
|
-
|
260
|
-
result << ' - empty -'
|
261
|
-
else
|
260
|
+
if first_row
|
262
261
|
result << " First row: #{first_row}\n"
|
263
262
|
result << " Last row: #{last_row}\n"
|
264
263
|
result << " First column: #{::Roo::Utils.number_to_letter(first_column)}\n"
|
265
264
|
result << " Last column: #{::Roo::Utils.number_to_letter(last_column)}"
|
265
|
+
else
|
266
|
+
result << ' - empty -'
|
266
267
|
end
|
267
268
|
result << "\n" if sheet != sheets.last
|
268
269
|
n += 1
|
@@ -282,12 +283,12 @@ class Roo::Base
|
|
282
283
|
# sonst gibt es Fehler bei leeren Blaettern
|
283
284
|
first_row.upto(last_row) do |row|
|
284
285
|
first_column.upto(last_column) do |col|
|
285
|
-
|
286
|
-
|
286
|
+
next if empty?(row, col)
|
287
|
+
|
288
|
+
x.cell(cell(row, col),
|
287
289
|
row: row,
|
288
290
|
column: col,
|
289
291
|
type: celltype(row, col))
|
290
|
-
end
|
291
292
|
end
|
292
293
|
end
|
293
294
|
end
|
@@ -318,7 +319,7 @@ class Roo::Base
|
|
318
319
|
# access different worksheets by calling spreadsheet.sheet(1)
|
319
320
|
# or spreadsheet.sheet('SHEETNAME')
|
320
321
|
def sheet(index, name = false)
|
321
|
-
self.default_sheet = String
|
322
|
+
self.default_sheet = index.is_a?(::String) ? index : sheets[index]
|
322
323
|
name ? [default_sheet, self] : self
|
323
324
|
end
|
324
325
|
|
@@ -352,25 +353,23 @@ class Roo::Base
|
|
352
353
|
# control characters and white spaces around columns
|
353
354
|
|
354
355
|
def each(options = {})
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
else
|
361
|
-
clean_sheet_if_need(options)
|
362
|
-
search_or_set_header(options)
|
363
|
-
headers = @headers ||
|
364
|
-
Hash[(first_column..last_column).map do |col|
|
365
|
-
[cell(@header_line, col), col]
|
366
|
-
end]
|
367
|
-
|
368
|
-
@header_line.upto(last_row) do |line|
|
369
|
-
yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
|
370
|
-
end
|
356
|
+
return to_enum(:each, options) unless block_given?
|
357
|
+
|
358
|
+
if options.empty?
|
359
|
+
1.upto(last_row) do |line|
|
360
|
+
yield row(line)
|
371
361
|
end
|
372
362
|
else
|
373
|
-
|
363
|
+
clean_sheet_if_need(options)
|
364
|
+
search_or_set_header(options)
|
365
|
+
headers = @headers ||
|
366
|
+
Hash[(first_column..last_column).map do |col|
|
367
|
+
[cell(@header_line, col), col]
|
368
|
+
end]
|
369
|
+
|
370
|
+
@header_line.upto(last_row) do |line|
|
371
|
+
yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
|
372
|
+
end
|
374
373
|
end
|
375
374
|
end
|
376
375
|
|
@@ -401,30 +400,32 @@ class Roo::Base
|
|
401
400
|
|
402
401
|
protected
|
403
402
|
|
404
|
-
def file_type_check(filename,
|
403
|
+
def file_type_check(filename, exts, name, warning_level, packed = nil)
|
405
404
|
if packed == :zip
|
406
|
-
#
|
407
|
-
#
|
408
|
-
#
|
405
|
+
# spreadsheet.ods.zip => spreadsheet.ods
|
406
|
+
# Decompression is not performed here, only the 'zip' extension
|
407
|
+
# is removed from the file.
|
409
408
|
filename = File.basename(filename, File.extname(filename))
|
410
409
|
end
|
411
410
|
|
412
|
-
if uri?(filename) && qs_begin = filename.rindex('?')
|
411
|
+
if uri?(filename) && (qs_begin = filename.rindex('?'))
|
413
412
|
filename = filename[0..qs_begin - 1]
|
414
413
|
end
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
414
|
+
exts = Array(exts)
|
415
|
+
|
416
|
+
return if exts.include?(File.extname(filename).downcase)
|
417
|
+
|
418
|
+
case warning_level
|
419
|
+
when :error
|
420
|
+
warn file_type_warning_message(filename, exts)
|
421
|
+
fail TypeError, "#{filename} is not #{name} file"
|
422
|
+
when :warning
|
423
|
+
warn "are you sure, this is #{name} spreadsheet file?"
|
424
|
+
warn file_type_warning_message(filename, exts)
|
425
|
+
when :ignore
|
426
|
+
# ignore
|
427
|
+
else
|
428
|
+
fail "#{warning_level} illegal state of file_warning"
|
428
429
|
end
|
429
430
|
end
|
430
431
|
|
@@ -475,16 +476,18 @@ class Roo::Base
|
|
475
476
|
return if is_stream?(filename)
|
476
477
|
filename = download_uri(filename, tmpdir) if uri?(filename)
|
477
478
|
filename = unzip(filename, tmpdir) if packed == :zip
|
478
|
-
|
479
|
-
|
480
|
-
|
479
|
+
|
480
|
+
fail IOError, "file #{filename} does not exist" unless File.file?(filename)
|
481
|
+
|
481
482
|
filename
|
482
483
|
end
|
483
484
|
|
484
|
-
def file_type_warning_message(filename,
|
485
|
-
|
485
|
+
def file_type_warning_message(filename, exts)
|
486
|
+
*rest, last_ext = exts
|
487
|
+
ext_list = rest.any? ? "#{rest.join(', ')} or #{last_ext}" : last_ext
|
488
|
+
"use #{Roo::CLASS_FOR_EXTENSION.fetch(last_ext.sub('.', '').to_sym)}.new to handle #{ext_list} spreadsheet files. This has #{File.extname(filename).downcase}"
|
486
489
|
rescue KeyError
|
487
|
-
raise "unknown file
|
490
|
+
raise "unknown file types: #{ext_list}"
|
488
491
|
end
|
489
492
|
|
490
493
|
def find_by_row(row_index)
|
@@ -533,11 +536,8 @@ class Roo::Base
|
|
533
536
|
end
|
534
537
|
|
535
538
|
def make_tmpdir(prefix = nil, root = nil, &block)
|
536
|
-
prefix =
|
537
|
-
|
538
|
-
else
|
539
|
-
TEMP_PREFIX
|
540
|
-
end
|
539
|
+
prefix = "#{TEMP_PREFIX}#{prefix}"
|
540
|
+
|
541
541
|
::Dir.mktmpdir(prefix, root || ENV['ROO_TMP'], &block).tap do |result|
|
542
542
|
block_given? || track_tmpdir!(result)
|
543
543
|
end
|
@@ -585,9 +585,9 @@ class Roo::Base
|
|
585
585
|
fail ArgumentError
|
586
586
|
end
|
587
587
|
end
|
588
|
-
|
589
|
-
|
590
|
-
|
588
|
+
|
589
|
+
col = ::Roo::Utils.letter_to_number(col) if col.is_a?(::String)
|
590
|
+
|
591
591
|
[row, col]
|
592
592
|
end
|
593
593
|
|
@@ -638,7 +638,7 @@ class Roo::Base
|
|
638
638
|
fail RangeError, "sheet index #{sheet} not found"
|
639
639
|
end
|
640
640
|
when String
|
641
|
-
unless sheets.include?
|
641
|
+
unless sheets.include?(sheet)
|
642
642
|
fail RangeError, "sheet '#{sheet}' not found"
|
643
643
|
end
|
644
644
|
else
|
@@ -667,14 +667,14 @@ class Roo::Base
|
|
667
667
|
# parameter is nil the output goes to STDOUT
|
668
668
|
def write_csv_content(file = nil, sheet = nil, separator = ',')
|
669
669
|
file ||= STDOUT
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
670
|
+
return unless first_row(sheet) # The sheet is empty
|
671
|
+
|
672
|
+
1.upto(last_row(sheet)) do |row|
|
673
|
+
1.upto(last_column(sheet)) do |col|
|
674
|
+
file.print(separator) if col > 1
|
675
|
+
file.print cell_to_csv(row, col, sheet)
|
676
|
+
end
|
677
|
+
file.print("\n")
|
678
678
|
end
|
679
679
|
end
|
680
680
|
|
@@ -726,9 +726,9 @@ class Roo::Base
|
|
726
726
|
# converts an integer value to a time string like '02:05:06'
|
727
727
|
def integer_to_timestring(content)
|
728
728
|
h = (content / 3600.0).floor
|
729
|
-
content
|
729
|
+
content -= h * 3600
|
730
730
|
m = (content / 60.0).floor
|
731
|
-
content
|
731
|
+
content -= m * 60
|
732
732
|
s = content
|
733
733
|
sprintf('%02d:%02d:%02d', h, m, s)
|
734
734
|
end
|
@@ -0,0 +1,5 @@
|
|
1
|
+
module Roo
|
2
|
+
ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel.".freeze
|
3
|
+
ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML.".freeze
|
4
|
+
ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google.".freeze
|
5
|
+
end
|
@@ -13,9 +13,19 @@ module Roo
|
|
13
13
|
|
14
14
|
private
|
15
15
|
|
16
|
+
def fix_invalid_shared_strings(doc)
|
17
|
+
invalid = { '_x000D_' => "\n" }
|
18
|
+
xml = doc.to_s
|
19
|
+
|
20
|
+
if xml[/#{invalid.keys.join('|')}/]
|
21
|
+
@doc = ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
16
25
|
def extract_shared_strings
|
17
26
|
return [] unless doc_exists?
|
18
27
|
|
28
|
+
fix_invalid_shared_strings(doc)
|
19
29
|
# read the shared strings xml document
|
20
30
|
doc.xpath('/sst/si').map do |si|
|
21
31
|
shared_string = ''
|
data/lib/roo/excelx.rb
CHANGED
@@ -89,7 +89,7 @@ module Roo
|
|
89
89
|
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
|
90
90
|
|
91
91
|
unless is_stream?(filename_or_stream)
|
92
|
-
file_type_check(filename_or_stream,
|
92
|
+
file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
|
93
93
|
basename = File.basename(filename_or_stream)
|
94
94
|
end
|
95
95
|
|
@@ -321,7 +321,12 @@ module Roo
|
|
321
321
|
# Yield an array of Excelx::Cell
|
322
322
|
# Takes options for sheet, pad_cells, and max_rows
|
323
323
|
def each_row_streaming(options = {})
|
324
|
-
sheet_for(options.delete(:sheet))
|
324
|
+
sheet = sheet_for(options.delete(:sheet))
|
325
|
+
if block_given?
|
326
|
+
sheet.each_row(options) { |row| yield row }
|
327
|
+
else
|
328
|
+
sheet.to_enum(:each_row, options)
|
329
|
+
end
|
325
330
|
end
|
326
331
|
|
327
332
|
private
|
@@ -409,19 +414,13 @@ module Roo
|
|
409
414
|
@sheet_files = []
|
410
415
|
|
411
416
|
unless is_stream?(zipfilename_or_stream)
|
412
|
-
|
417
|
+
zip_file = Zip::File.open(zipfilename_or_stream)
|
413
418
|
else
|
414
|
-
|
415
|
-
|
416
|
-
entries = []
|
417
|
-
while (entry = stream.get_next_entry)
|
418
|
-
entries << entry
|
419
|
-
end
|
420
|
-
process_zipfile_entries entries
|
421
|
-
ensure
|
422
|
-
stream.close
|
423
|
-
end
|
419
|
+
zip_file = Zip::CentralDirectory.new
|
420
|
+
zip_file.read_from_stream zipfilename_or_stream
|
424
421
|
end
|
422
|
+
|
423
|
+
process_zipfile_entries zip_file.to_a.sort_by(&:name)
|
425
424
|
end
|
426
425
|
|
427
426
|
def process_zipfile_entries(entries)
|
data/lib/roo/libre_office.rb
CHANGED