roo 2.4.0 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +6 -2
- data/CHANGELOG.md +19 -0
- data/Gemfile +1 -0
- data/Gemfile_ruby2 +27 -0
- data/README.md +1 -1
- data/lib/roo.rb +2 -0
- data/lib/roo/base.rb +16 -13
- data/lib/roo/csv.rb +1 -1
- data/lib/roo/excelx.rb +8 -4
- data/lib/roo/excelx/extractor.rb +0 -1
- data/lib/roo/excelx/shared_strings.rb +12 -5
- data/lib/roo/excelx/sheet_doc.rb +15 -8
- data/lib/roo/open_office.rb +20 -14
- data/lib/roo/tempdir.rb +26 -0
- data/lib/roo/version.rb +1 -1
- data/test/excelx/cell/test_datetime.rb +0 -3
- data/test/test_roo.rb +15 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 975d999c227f6bc66a6819e0e69c3ebceb3c76c9
|
4
|
+
data.tar.gz: 9b790645ff9ad0e2a8e1a84c9deb465bf5e5154b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b197a0ae99ceeb81ff7a8bf12099944f0b2667dbc25ee85380d64bd1117309cc50e98eece758824ee9d5c0b022b354b3c64a10357035281ee7191f79cb479d1
|
7
|
+
data.tar.gz: a73ae23bc6419fcacad79d85496e6349ef0b45956382cc1e93af0f4be57614248a1e1db33dcc87774a2b8cbbb58098d8526ecc4267c79c86f242acec161815dc
|
data/.travis.yml
CHANGED
@@ -1,12 +1,16 @@
|
|
1
1
|
language: ruby
|
2
2
|
rvm:
|
3
|
-
- 2.0.0
|
4
|
-
- 2.1.8
|
5
3
|
- 2.2.4
|
4
|
+
- 2.3.1
|
6
5
|
- ruby-head
|
7
6
|
- jruby-19mode # JRuby in 1.9 mode
|
8
7
|
- rbx-2
|
9
8
|
matrix:
|
9
|
+
include:
|
10
|
+
- rvm: 2.0.0
|
11
|
+
gemfile: Gemfile_ruby2
|
12
|
+
- rvm: 2.1.8
|
13
|
+
gemfile: Gemfile_ruby2
|
10
14
|
allow_failures:
|
11
15
|
- rvm: ruby-head
|
12
16
|
- rvm: jruby-19mode
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
+
## Unreleased
|
2
|
+
|
3
|
+
## [2.5.0] 2016-08-21
|
4
|
+
### Fixed
|
5
|
+
- Remove tempdirs via finalizers on garbage collection. This cleans them up in all known cases, rather than just when the #close method is called. The #close method can be used to cleanup early. [329](https://github.com/roo-rb/roo/pull/329)
|
6
|
+
- Fixed README.md typo [318](https://github.com/roo-rb/roo/pull/318)
|
7
|
+
- Parse sheets in ODS files once to improve performance [320](https://github.com/roo-rb/roo/pull/320)
|
8
|
+
- Fix some Cell conversion issues [324](https://github.com/roo-rb/roo/pull/324) and [331](https://github.com/roo-rb/roo/pull/331)
|
9
|
+
- Improved memory performance [332](https://github.com/roo-rb/roo/pull/332)
|
10
|
+
- Added `no_hyperlinks` option to improve streamig performance [319](https://github.com/roo-rb/roo/pull/319) and [333](https://github.com/roo-rb/roo/pull/333)
|
11
|
+
|
12
|
+
### Deprecations
|
13
|
+
- Roo::Base::TEMP_PREFIX should be accessed via Roo::TEMP_PREFIX
|
14
|
+
- The private Roo::Base#make_tempdir is now available at the class level in
|
15
|
+
classes that use tempdirs, added via Roo::Tempdir
|
16
|
+
=======
|
17
|
+
### Added
|
18
|
+
- Discard hiperlinks lookups to allow streaming parsing without loading whole files
|
19
|
+
|
1
20
|
## [2.4.0] 2016-05-14
|
2
21
|
### Fixed
|
3
22
|
- Fixed opening spreadsheets with charts [315](https://github.com/roo-rb/roo/pull/315)
|
data/Gemfile
CHANGED
data/Gemfile_ruby2
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gemspec
|
4
|
+
|
5
|
+
group :test do
|
6
|
+
# additional testing libs
|
7
|
+
gem 'webmock'
|
8
|
+
gem 'shoulda'
|
9
|
+
gem 'rspec', '>= 3.0.0'
|
10
|
+
gem 'vcr'
|
11
|
+
gem 'simplecov', '>= 0.9.0', require: false
|
12
|
+
gem 'coveralls', require: false
|
13
|
+
# gem "pry"
|
14
|
+
gem "activesupport", "~> 4.2.0"
|
15
|
+
end
|
16
|
+
|
17
|
+
group :local_development do
|
18
|
+
gem "listen", "~> 3.0.6"
|
19
|
+
gem 'terminal-notifier-guard', require: false if RUBY_PLATFORM.downcase.include?('darwin')
|
20
|
+
gem 'guard-rspec', '>= 4.3.1', require: false
|
21
|
+
gem 'guard-minitest', require: false
|
22
|
+
gem 'guard-bundler', require: false
|
23
|
+
gem 'guard-preek', require: false
|
24
|
+
gem 'guard-rubocop', require: false
|
25
|
+
gem 'guard-reek', github: 'pericles/guard-reek', require: false
|
26
|
+
gem 'pry'
|
27
|
+
end
|
data/README.md
CHANGED
data/lib/roo.rb
CHANGED
data/lib/roo/base.rb
CHANGED
@@ -9,7 +9,6 @@ require 'roo/utils'
|
|
9
9
|
class Roo::Base
|
10
10
|
include Enumerable
|
11
11
|
|
12
|
-
TEMP_PREFIX = 'roo_'.freeze
|
13
12
|
MAX_ROW_COL = 999_999.freeze
|
14
13
|
MIN_ROW_COL = 0.freeze
|
15
14
|
|
@@ -18,6 +17,11 @@ class Roo::Base
|
|
18
17
|
# sets the line with attribute names (default: 1)
|
19
18
|
attr_accessor :header_line
|
20
19
|
|
20
|
+
def self.TEMP_PREFIX
|
21
|
+
warn '[DEPRECATION] please access TEMP_PREFIX via Roo::TEMP_PREFIX'
|
22
|
+
Roo::TEMP_PREFIX
|
23
|
+
end
|
24
|
+
|
21
25
|
def initialize(filename, options = {}, _file_warning = :error, _tmpdir = nil)
|
22
26
|
@filename = filename
|
23
27
|
@options = options
|
@@ -32,14 +36,12 @@ class Roo::Base
|
|
32
36
|
@last_column = {}
|
33
37
|
|
34
38
|
@header_line = 1
|
35
|
-
rescue => e # clean up any temp files, but only if an error was raised
|
36
|
-
close
|
37
|
-
raise e
|
38
39
|
end
|
39
40
|
|
40
41
|
def close
|
41
|
-
|
42
|
-
|
42
|
+
if self.class.respond_to?(:finalize_tempdirs)
|
43
|
+
self.class.finalize_tempdirs(object_id)
|
44
|
+
end
|
43
45
|
nil
|
44
46
|
end
|
45
47
|
|
@@ -449,10 +451,6 @@ class Roo::Base
|
|
449
451
|
|
450
452
|
private
|
451
453
|
|
452
|
-
def track_tmpdir!(tmpdir)
|
453
|
-
(@tmpdirs ||= []) << tmpdir
|
454
|
-
end
|
455
|
-
|
456
454
|
def clean_sheet_if_need(options)
|
457
455
|
return unless options[:clean]
|
458
456
|
options.delete(:clean)
|
@@ -538,7 +536,7 @@ class Roo::Base
|
|
538
536
|
def find_basename(filename)
|
539
537
|
if uri?(filename)
|
540
538
|
require 'uri'
|
541
|
-
uri = URI
|
539
|
+
uri = URI.parse filename
|
542
540
|
File.basename(uri.path)
|
543
541
|
elsif !is_stream?(filename)
|
544
542
|
File.basename(filename)
|
@@ -546,10 +544,15 @@ class Roo::Base
|
|
546
544
|
end
|
547
545
|
|
548
546
|
def make_tmpdir(prefix = nil, root = nil, &block)
|
547
|
+
warn '[DEPRECATION] extend Roo::Tempdir and use its .make_tempdir instead'
|
549
548
|
prefix = "#{TEMP_PREFIX}#{prefix}"
|
549
|
+
root ||= ENV['ROO_TMP']
|
550
550
|
|
551
|
-
|
552
|
-
|
551
|
+
if block_given?
|
552
|
+
# folder is deleted at end of block
|
553
|
+
::Dir.mktmpdir(prefix, root, &block)
|
554
|
+
else
|
555
|
+
self.class.make_tempdir(self, prefix, root)
|
553
556
|
end
|
554
557
|
end
|
555
558
|
|
data/lib/roo/csv.rb
CHANGED
@@ -56,7 +56,7 @@ class Roo::CSV < Roo::Base
|
|
56
56
|
|
57
57
|
def each_row(options, &block)
|
58
58
|
if uri?(filename)
|
59
|
-
|
59
|
+
::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV['ROO_TMP']) do |tmpdir|
|
60
60
|
tmp_filename = download_uri(filename, tmpdir)
|
61
61
|
CSV.foreach(tmp_filename, options, &block)
|
62
62
|
end
|
data/lib/roo/excelx.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'zip/filesystem'
|
3
3
|
require 'roo/link'
|
4
|
+
require 'roo/tempdir'
|
4
5
|
require 'roo/utils'
|
5
6
|
require 'forwardable'
|
6
7
|
|
7
8
|
module Roo
|
8
9
|
class Excelx < Roo::Base
|
10
|
+
extend Roo::Tempdir
|
11
|
+
|
9
12
|
require 'set'
|
10
13
|
extend Forwardable
|
11
14
|
|
@@ -36,13 +39,14 @@ module Roo
|
|
36
39
|
cell_max = options.delete(:cell_max)
|
37
40
|
sheet_options = {}
|
38
41
|
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
|
42
|
+
sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
|
39
43
|
|
40
44
|
unless is_stream?(filename_or_stream)
|
41
45
|
file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
|
42
46
|
basename = find_basename(filename_or_stream)
|
43
47
|
end
|
44
48
|
|
45
|
-
@tmpdir =
|
49
|
+
@tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
|
46
50
|
@shared = Shared.new(@tmpdir)
|
47
51
|
@filename = local_filename(filename_or_stream, @tmpdir, packed)
|
48
52
|
process_zipfile(@filename || filename_or_stream)
|
@@ -64,9 +68,9 @@ module Roo
|
|
64
68
|
end
|
65
69
|
|
66
70
|
super
|
67
|
-
rescue
|
68
|
-
|
69
|
-
raise
|
71
|
+
rescue
|
72
|
+
self.class.finalize_tempdirs(object_id)
|
73
|
+
raise
|
70
74
|
end
|
71
75
|
|
72
76
|
def method_missing(method, *args)
|
data/lib/roo/excelx/extractor.rb
CHANGED
@@ -3,6 +3,14 @@ require 'roo/excelx/extractor'
|
|
3
3
|
module Roo
|
4
4
|
class Excelx
|
5
5
|
class SharedStrings < Excelx::Extractor
|
6
|
+
|
7
|
+
COMMON_STRINGS = {
|
8
|
+
t: "t",
|
9
|
+
r: "r",
|
10
|
+
html_tag_open: "<html>",
|
11
|
+
html_tag_closed: "</html>"
|
12
|
+
}
|
13
|
+
|
6
14
|
def [](index)
|
7
15
|
to_a[index]
|
8
16
|
end
|
@@ -26,18 +34,17 @@ module Roo
|
|
26
34
|
def fix_invalid_shared_strings(doc)
|
27
35
|
invalid = { '_x000D_' => "\n" }
|
28
36
|
xml = doc.to_s
|
37
|
+
return doc unless xml[/#{invalid.keys.join('|')}/]
|
29
38
|
|
30
|
-
|
31
|
-
@doc = ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
|
32
|
-
end
|
39
|
+
::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
|
33
40
|
end
|
34
41
|
|
35
42
|
def extract_shared_strings
|
36
43
|
return [] unless doc_exists?
|
37
44
|
|
38
|
-
fix_invalid_shared_strings(doc)
|
45
|
+
document = fix_invalid_shared_strings(doc)
|
39
46
|
# read the shared strings xml document
|
40
|
-
|
47
|
+
document.xpath('/sst/si').map do |si|
|
41
48
|
shared_string = ''
|
42
49
|
si.children.each do |elem|
|
43
50
|
case elem.name
|
data/lib/roo/excelx/sheet_doc.rb
CHANGED
@@ -39,8 +39,13 @@ module Roo
|
|
39
39
|
def each_cell(row_xml)
|
40
40
|
return [] unless row_xml
|
41
41
|
row_xml.children.each do |cell_element|
|
42
|
-
|
43
|
-
|
42
|
+
# If you're sure you're not going to need this hyperlinks you can discard it
|
43
|
+
hyperlinks = unless @options[:no_hyperlinks]
|
44
|
+
key = ::Roo::Utils.ref_to_key(cell_element['r'])
|
45
|
+
hyperlinks(@relationships)[key]
|
46
|
+
end
|
47
|
+
|
48
|
+
yield cell_from_xml(cell_element, hyperlinks)
|
44
49
|
end
|
45
50
|
end
|
46
51
|
|
@@ -90,10 +95,9 @@ module Roo
|
|
90
95
|
cell_xml.children.each do |cell|
|
91
96
|
case cell.name
|
92
97
|
when 'is'
|
93
|
-
cell.
|
94
|
-
|
95
|
-
|
96
|
-
end
|
98
|
+
content_arr = cell.search('t').map(&:content)
|
99
|
+
unless content_arr.empty?
|
100
|
+
return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
|
97
101
|
end
|
98
102
|
when 'f'
|
99
103
|
formula = cell.content
|
@@ -101,6 +105,8 @@ module Roo
|
|
101
105
|
return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
|
102
106
|
end
|
103
107
|
end
|
108
|
+
|
109
|
+
Excelx::Cell::Empty.new(coordinate)
|
104
110
|
end
|
105
111
|
|
106
112
|
def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
|
@@ -157,8 +163,9 @@ module Roo
|
|
157
163
|
end
|
158
164
|
|
159
165
|
def extract_hyperlinks(relationships)
|
160
|
-
|
161
|
-
|
166
|
+
return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
|
167
|
+
|
168
|
+
Hash[hyperlinks.map do |hyperlink|
|
162
169
|
if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
|
163
170
|
[::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
|
164
171
|
end
|
data/lib/roo/open_office.rb
CHANGED
@@ -3,10 +3,13 @@ require 'nokogiri'
|
|
3
3
|
require 'cgi'
|
4
4
|
require 'zip/filesystem'
|
5
5
|
require 'roo/font'
|
6
|
+
require 'roo/tempdir'
|
6
7
|
require 'base64'
|
7
8
|
|
8
9
|
module Roo
|
9
10
|
class OpenOffice < Roo::Base
|
11
|
+
extend Roo::Tempdir
|
12
|
+
|
10
13
|
ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
|
11
14
|
XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
|
12
15
|
XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
|
@@ -19,15 +22,27 @@ module Roo
|
|
19
22
|
|
20
23
|
@only_visible_sheets = options[:only_visible_sheets]
|
21
24
|
file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
|
22
|
-
@tmpdir =
|
25
|
+
@tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
|
23
26
|
@filename = local_filename(filename, @tmpdir, packed)
|
24
27
|
# TODO: @cells_read[:default] = false
|
25
28
|
open_oo_file(options)
|
26
29
|
super(filename, options)
|
27
30
|
initialize_default_variables
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
|
32
|
+
unless @table_display.any?
|
33
|
+
doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
|
34
|
+
read_table_styles(style)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
@sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
|
39
|
+
if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
|
40
|
+
sheet.attributes['name'].value
|
41
|
+
end
|
42
|
+
end.compact
|
43
|
+
rescue
|
44
|
+
self.class.finalize_tempdirs(object_id)
|
45
|
+
raise
|
31
46
|
end
|
32
47
|
|
33
48
|
def open_oo_file(options)
|
@@ -132,16 +147,7 @@ module Roo
|
|
132
147
|
end
|
133
148
|
|
134
149
|
def sheets
|
135
|
-
|
136
|
-
doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
|
137
|
-
read_table_styles(style)
|
138
|
-
end
|
139
|
-
end
|
140
|
-
doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
|
141
|
-
if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
|
142
|
-
sheet.attributes['name'].value
|
143
|
-
end
|
144
|
-
end.compact
|
150
|
+
@sheet_names
|
145
151
|
end
|
146
152
|
|
147
153
|
# version of the Roo::OpenOffice document
|
data/lib/roo/tempdir.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
module Roo
|
2
|
+
module Tempdir
|
3
|
+
def finalize_tempdirs(object_id)
|
4
|
+
if @tempdirs && (dirs_to_remove = @tempdirs[object_id])
|
5
|
+
@tempdirs[object_id] = nil
|
6
|
+
dirs_to_remove.each do |dir|
|
7
|
+
::FileUtils.remove_entry(dir)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def make_tempdir(object, prefix, root)
|
13
|
+
root ||= ENV['ROO_TMP']
|
14
|
+
# folder is cleaned up in .finalize_tempdirs
|
15
|
+
::Dir.mktmpdir("#{Roo::TEMP_PREFIX}#{prefix}", root).tap do |tmpdir|
|
16
|
+
@tempdirs ||= {}
|
17
|
+
if @tempdirs[object.object_id]
|
18
|
+
@tempdirs[object.object_id] << tmpdir
|
19
|
+
else
|
20
|
+
@tempdirs[object.object_id] = [tmpdir]
|
21
|
+
ObjectSpace.define_finalizer(object, method(:finalize_tempdirs))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/roo/version.rb
CHANGED
data/test/test_roo.rb
CHANGED
@@ -17,6 +17,8 @@
|
|
17
17
|
# with the wrong spreadsheet class
|
18
18
|
#STDERR.reopen "/dev/null","w"
|
19
19
|
|
20
|
+
Encoding.default_external = "UTF-8"
|
21
|
+
|
20
22
|
require 'test_helper'
|
21
23
|
require 'stringio'
|
22
24
|
|
@@ -2089,6 +2091,19 @@ where the expected result is
|
|
2089
2091
|
end
|
2090
2092
|
end
|
2091
2093
|
|
2094
|
+
def test_finalize
|
2095
|
+
tempdirs = []
|
2096
|
+
begin
|
2097
|
+
with_each_spreadsheet(:name=>'numbers1') do |oo|
|
2098
|
+
tempdirs << oo.instance_variable_get('@tmpdir')
|
2099
|
+
end
|
2100
|
+
GC.start
|
2101
|
+
end
|
2102
|
+
tempdirs.each do |tempdir|
|
2103
|
+
assert !File.exists?(tempdir), "Expected #{tempdir} to be cleaned up, but it still exists"
|
2104
|
+
end
|
2105
|
+
end
|
2106
|
+
|
2092
2107
|
def test_cleanup_on_error
|
2093
2108
|
old_temp_files = Dir.open(Dir.tmpdir).to_a
|
2094
2109
|
with_each_spreadsheet(:name=>'non_existent_file', :ignore_errors=>true) do |oo|; end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: roo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Preymesser
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2016-
|
15
|
+
date: 2016-08-21 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: nokogiri
|
@@ -101,6 +101,7 @@ files:
|
|
101
101
|
- ".travis.yml"
|
102
102
|
- CHANGELOG.md
|
103
103
|
- Gemfile
|
104
|
+
- Gemfile_ruby2
|
104
105
|
- Guardfile
|
105
106
|
- LICENSE
|
106
107
|
- README.md
|
@@ -139,6 +140,7 @@ files:
|
|
139
140
|
- lib/roo/link.rb
|
140
141
|
- lib/roo/open_office.rb
|
141
142
|
- lib/roo/spreadsheet.rb
|
143
|
+
- lib/roo/tempdir.rb
|
142
144
|
- lib/roo/utils.rb
|
143
145
|
- lib/roo/version.rb
|
144
146
|
- roo.gemspec
|