roo 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -2
- data/CHANGELOG.md +19 -0
- data/Gemfile +1 -0
- data/Gemfile_ruby2 +27 -0
- data/README.md +1 -1
- data/lib/roo.rb +2 -0
- data/lib/roo/base.rb +16 -13
- data/lib/roo/csv.rb +1 -1
- data/lib/roo/excelx.rb +8 -4
- data/lib/roo/excelx/extractor.rb +0 -1
- data/lib/roo/excelx/shared_strings.rb +12 -5
- data/lib/roo/excelx/sheet_doc.rb +15 -8
- data/lib/roo/open_office.rb +20 -14
- data/lib/roo/tempdir.rb +26 -0
- data/lib/roo/version.rb +1 -1
- data/test/excelx/cell/test_datetime.rb +0 -3
- data/test/test_roo.rb +15 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 975d999c227f6bc66a6819e0e69c3ebceb3c76c9
|
4
|
+
data.tar.gz: 9b790645ff9ad0e2a8e1a84c9deb465bf5e5154b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b197a0ae99ceeb81ff7a8bf12099944f0b2667dbc25ee85380d64bd1117309cc50e98eece758824ee9d5c0b022b354b3c64a10357035281ee7191f79cb479d1
|
7
|
+
data.tar.gz: a73ae23bc6419fcacad79d85496e6349ef0b45956382cc1e93af0f4be57614248a1e1db33dcc87774a2b8cbbb58098d8526ecc4267c79c86f242acec161815dc
|
data/.travis.yml
CHANGED
@@ -1,12 +1,16 @@
|
|
1
1
|
language: ruby
|
2
2
|
rvm:
|
3
|
-
- 2.0.0
|
4
|
-
- 2.1.8
|
5
3
|
- 2.2.4
|
4
|
+
- 2.3.1
|
6
5
|
- ruby-head
|
7
6
|
- jruby-19mode # JRuby in 1.9 mode
|
8
7
|
- rbx-2
|
9
8
|
matrix:
|
9
|
+
include:
|
10
|
+
- rvm: 2.0.0
|
11
|
+
gemfile: Gemfile_ruby2
|
12
|
+
- rvm: 2.1.8
|
13
|
+
gemfile: Gemfile_ruby2
|
10
14
|
allow_failures:
|
11
15
|
- rvm: ruby-head
|
12
16
|
- rvm: jruby-19mode
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
+
## Unreleased
|
2
|
+
|
3
|
+
## [2.5.0] 2016-08-21
|
4
|
+
### Fixed
|
5
|
+
- Remove tempdirs via finalizers on garbage collection. This cleans them up in all known cases, rather than just when the #close method is called. The #close method can be used to cleanup early. [329](https://github.com/roo-rb/roo/pull/329)
|
6
|
+
- Fixed README.md typo [318](https://github.com/roo-rb/roo/pull/318)
|
7
|
+
- Parse sheets in ODS files once to improve performance [320](https://github.com/roo-rb/roo/pull/320)
|
8
|
+
- Fix some Cell conversion issues [324](https://github.com/roo-rb/roo/pull/324) and [331](https://github.com/roo-rb/roo/pull/331)
|
9
|
+
- Improved memory performance [332](https://github.com/roo-rb/roo/pull/332)
|
10
|
+
- Added `no_hyperlinks` option to improve streamig performance [319](https://github.com/roo-rb/roo/pull/319) and [333](https://github.com/roo-rb/roo/pull/333)
|
11
|
+
|
12
|
+
### Deprecations
|
13
|
+
- Roo::Base::TEMP_PREFIX should be accessed via Roo::TEMP_PREFIX
|
14
|
+
- The private Roo::Base#make_tempdir is now available at the class level in
|
15
|
+
classes that use tempdirs, added via Roo::Tempdir
|
16
|
+
=======
|
17
|
+
### Added
|
18
|
+
- Discard hiperlinks lookups to allow streaming parsing without loading whole files
|
19
|
+
|
1
20
|
## [2.4.0] 2016-05-14
|
2
21
|
### Fixed
|
3
22
|
- Fixed opening spreadsheets with charts [315](https://github.com/roo-rb/roo/pull/315)
|
data/Gemfile
CHANGED
data/Gemfile_ruby2
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gemspec
|
4
|
+
|
5
|
+
group :test do
|
6
|
+
# additional testing libs
|
7
|
+
gem 'webmock'
|
8
|
+
gem 'shoulda'
|
9
|
+
gem 'rspec', '>= 3.0.0'
|
10
|
+
gem 'vcr'
|
11
|
+
gem 'simplecov', '>= 0.9.0', require: false
|
12
|
+
gem 'coveralls', require: false
|
13
|
+
# gem "pry"
|
14
|
+
gem "activesupport", "~> 4.2.0"
|
15
|
+
end
|
16
|
+
|
17
|
+
group :local_development do
|
18
|
+
gem "listen", "~> 3.0.6"
|
19
|
+
gem 'terminal-notifier-guard', require: false if RUBY_PLATFORM.downcase.include?('darwin')
|
20
|
+
gem 'guard-rspec', '>= 4.3.1', require: false
|
21
|
+
gem 'guard-minitest', require: false
|
22
|
+
gem 'guard-bundler', require: false
|
23
|
+
gem 'guard-preek', require: false
|
24
|
+
gem 'guard-rubocop', require: false
|
25
|
+
gem 'guard-reek', github: 'pericles/guard-reek', require: false
|
26
|
+
gem 'pry'
|
27
|
+
end
|
data/README.md
CHANGED
data/lib/roo.rb
CHANGED
data/lib/roo/base.rb
CHANGED
@@ -9,7 +9,6 @@ require 'roo/utils'
|
|
9
9
|
class Roo::Base
|
10
10
|
include Enumerable
|
11
11
|
|
12
|
-
TEMP_PREFIX = 'roo_'.freeze
|
13
12
|
MAX_ROW_COL = 999_999.freeze
|
14
13
|
MIN_ROW_COL = 0.freeze
|
15
14
|
|
@@ -18,6 +17,11 @@ class Roo::Base
|
|
18
17
|
# sets the line with attribute names (default: 1)
|
19
18
|
attr_accessor :header_line
|
20
19
|
|
20
|
+
def self.TEMP_PREFIX
|
21
|
+
warn '[DEPRECATION] please access TEMP_PREFIX via Roo::TEMP_PREFIX'
|
22
|
+
Roo::TEMP_PREFIX
|
23
|
+
end
|
24
|
+
|
21
25
|
def initialize(filename, options = {}, _file_warning = :error, _tmpdir = nil)
|
22
26
|
@filename = filename
|
23
27
|
@options = options
|
@@ -32,14 +36,12 @@ class Roo::Base
|
|
32
36
|
@last_column = {}
|
33
37
|
|
34
38
|
@header_line = 1
|
35
|
-
rescue => e # clean up any temp files, but only if an error was raised
|
36
|
-
close
|
37
|
-
raise e
|
38
39
|
end
|
39
40
|
|
40
41
|
def close
|
41
|
-
|
42
|
-
|
42
|
+
if self.class.respond_to?(:finalize_tempdirs)
|
43
|
+
self.class.finalize_tempdirs(object_id)
|
44
|
+
end
|
43
45
|
nil
|
44
46
|
end
|
45
47
|
|
@@ -449,10 +451,6 @@ class Roo::Base
|
|
449
451
|
|
450
452
|
private
|
451
453
|
|
452
|
-
def track_tmpdir!(tmpdir)
|
453
|
-
(@tmpdirs ||= []) << tmpdir
|
454
|
-
end
|
455
|
-
|
456
454
|
def clean_sheet_if_need(options)
|
457
455
|
return unless options[:clean]
|
458
456
|
options.delete(:clean)
|
@@ -538,7 +536,7 @@ class Roo::Base
|
|
538
536
|
def find_basename(filename)
|
539
537
|
if uri?(filename)
|
540
538
|
require 'uri'
|
541
|
-
uri = URI
|
539
|
+
uri = URI.parse filename
|
542
540
|
File.basename(uri.path)
|
543
541
|
elsif !is_stream?(filename)
|
544
542
|
File.basename(filename)
|
@@ -546,10 +544,15 @@ class Roo::Base
|
|
546
544
|
end
|
547
545
|
|
548
546
|
def make_tmpdir(prefix = nil, root = nil, &block)
|
547
|
+
warn '[DEPRECATION] extend Roo::Tempdir and use its .make_tempdir instead'
|
549
548
|
prefix = "#{TEMP_PREFIX}#{prefix}"
|
549
|
+
root ||= ENV['ROO_TMP']
|
550
550
|
|
551
|
-
|
552
|
-
|
551
|
+
if block_given?
|
552
|
+
# folder is deleted at end of block
|
553
|
+
::Dir.mktmpdir(prefix, root, &block)
|
554
|
+
else
|
555
|
+
self.class.make_tempdir(self, prefix, root)
|
553
556
|
end
|
554
557
|
end
|
555
558
|
|
data/lib/roo/csv.rb
CHANGED
@@ -56,7 +56,7 @@ class Roo::CSV < Roo::Base
|
|
56
56
|
|
57
57
|
def each_row(options, &block)
|
58
58
|
if uri?(filename)
|
59
|
-
|
59
|
+
::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV['ROO_TMP']) do |tmpdir|
|
60
60
|
tmp_filename = download_uri(filename, tmpdir)
|
61
61
|
CSV.foreach(tmp_filename, options, &block)
|
62
62
|
end
|
data/lib/roo/excelx.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'zip/filesystem'
|
3
3
|
require 'roo/link'
|
4
|
+
require 'roo/tempdir'
|
4
5
|
require 'roo/utils'
|
5
6
|
require 'forwardable'
|
6
7
|
|
7
8
|
module Roo
|
8
9
|
class Excelx < Roo::Base
|
10
|
+
extend Roo::Tempdir
|
11
|
+
|
9
12
|
require 'set'
|
10
13
|
extend Forwardable
|
11
14
|
|
@@ -36,13 +39,14 @@ module Roo
|
|
36
39
|
cell_max = options.delete(:cell_max)
|
37
40
|
sheet_options = {}
|
38
41
|
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
|
42
|
+
sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
|
39
43
|
|
40
44
|
unless is_stream?(filename_or_stream)
|
41
45
|
file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
|
42
46
|
basename = find_basename(filename_or_stream)
|
43
47
|
end
|
44
48
|
|
45
|
-
@tmpdir =
|
49
|
+
@tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
|
46
50
|
@shared = Shared.new(@tmpdir)
|
47
51
|
@filename = local_filename(filename_or_stream, @tmpdir, packed)
|
48
52
|
process_zipfile(@filename || filename_or_stream)
|
@@ -64,9 +68,9 @@ module Roo
|
|
64
68
|
end
|
65
69
|
|
66
70
|
super
|
67
|
-
rescue
|
68
|
-
|
69
|
-
raise
|
71
|
+
rescue
|
72
|
+
self.class.finalize_tempdirs(object_id)
|
73
|
+
raise
|
70
74
|
end
|
71
75
|
|
72
76
|
def method_missing(method, *args)
|
data/lib/roo/excelx/extractor.rb
CHANGED
@@ -3,6 +3,14 @@ require 'roo/excelx/extractor'
|
|
3
3
|
module Roo
|
4
4
|
class Excelx
|
5
5
|
class SharedStrings < Excelx::Extractor
|
6
|
+
|
7
|
+
COMMON_STRINGS = {
|
8
|
+
t: "t",
|
9
|
+
r: "r",
|
10
|
+
html_tag_open: "<html>",
|
11
|
+
html_tag_closed: "</html>"
|
12
|
+
}
|
13
|
+
|
6
14
|
def [](index)
|
7
15
|
to_a[index]
|
8
16
|
end
|
@@ -26,18 +34,17 @@ module Roo
|
|
26
34
|
def fix_invalid_shared_strings(doc)
|
27
35
|
invalid = { '_x000D_' => "\n" }
|
28
36
|
xml = doc.to_s
|
37
|
+
return doc unless xml[/#{invalid.keys.join('|')}/]
|
29
38
|
|
30
|
-
|
31
|
-
@doc = ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
|
32
|
-
end
|
39
|
+
::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
|
33
40
|
end
|
34
41
|
|
35
42
|
def extract_shared_strings
|
36
43
|
return [] unless doc_exists?
|
37
44
|
|
38
|
-
fix_invalid_shared_strings(doc)
|
45
|
+
document = fix_invalid_shared_strings(doc)
|
39
46
|
# read the shared strings xml document
|
40
|
-
|
47
|
+
document.xpath('/sst/si').map do |si|
|
41
48
|
shared_string = ''
|
42
49
|
si.children.each do |elem|
|
43
50
|
case elem.name
|
data/lib/roo/excelx/sheet_doc.rb
CHANGED
@@ -39,8 +39,13 @@ module Roo
|
|
39
39
|
def each_cell(row_xml)
|
40
40
|
return [] unless row_xml
|
41
41
|
row_xml.children.each do |cell_element|
|
42
|
-
|
43
|
-
|
42
|
+
# If you're sure you're not going to need this hyperlinks you can discard it
|
43
|
+
hyperlinks = unless @options[:no_hyperlinks]
|
44
|
+
key = ::Roo::Utils.ref_to_key(cell_element['r'])
|
45
|
+
hyperlinks(@relationships)[key]
|
46
|
+
end
|
47
|
+
|
48
|
+
yield cell_from_xml(cell_element, hyperlinks)
|
44
49
|
end
|
45
50
|
end
|
46
51
|
|
@@ -90,10 +95,9 @@ module Roo
|
|
90
95
|
cell_xml.children.each do |cell|
|
91
96
|
case cell.name
|
92
97
|
when 'is'
|
93
|
-
cell.
|
94
|
-
|
95
|
-
|
96
|
-
end
|
98
|
+
content_arr = cell.search('t').map(&:content)
|
99
|
+
unless content_arr.empty?
|
100
|
+
return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
|
97
101
|
end
|
98
102
|
when 'f'
|
99
103
|
formula = cell.content
|
@@ -101,6 +105,8 @@ module Roo
|
|
101
105
|
return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
|
102
106
|
end
|
103
107
|
end
|
108
|
+
|
109
|
+
Excelx::Cell::Empty.new(coordinate)
|
104
110
|
end
|
105
111
|
|
106
112
|
def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
|
@@ -157,8 +163,9 @@ module Roo
|
|
157
163
|
end
|
158
164
|
|
159
165
|
def extract_hyperlinks(relationships)
|
160
|
-
|
161
|
-
|
166
|
+
return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
|
167
|
+
|
168
|
+
Hash[hyperlinks.map do |hyperlink|
|
162
169
|
if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
|
163
170
|
[::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
|
164
171
|
end
|
data/lib/roo/open_office.rb
CHANGED
@@ -3,10 +3,13 @@ require 'nokogiri'
|
|
3
3
|
require 'cgi'
|
4
4
|
require 'zip/filesystem'
|
5
5
|
require 'roo/font'
|
6
|
+
require 'roo/tempdir'
|
6
7
|
require 'base64'
|
7
8
|
|
8
9
|
module Roo
|
9
10
|
class OpenOffice < Roo::Base
|
11
|
+
extend Roo::Tempdir
|
12
|
+
|
10
13
|
ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
|
11
14
|
XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
|
12
15
|
XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
|
@@ -19,15 +22,27 @@ module Roo
|
|
19
22
|
|
20
23
|
@only_visible_sheets = options[:only_visible_sheets]
|
21
24
|
file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
|
22
|
-
@tmpdir =
|
25
|
+
@tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
|
23
26
|
@filename = local_filename(filename, @tmpdir, packed)
|
24
27
|
# TODO: @cells_read[:default] = false
|
25
28
|
open_oo_file(options)
|
26
29
|
super(filename, options)
|
27
30
|
initialize_default_variables
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
|
32
|
+
unless @table_display.any?
|
33
|
+
doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
|
34
|
+
read_table_styles(style)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
@sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
|
39
|
+
if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
|
40
|
+
sheet.attributes['name'].value
|
41
|
+
end
|
42
|
+
end.compact
|
43
|
+
rescue
|
44
|
+
self.class.finalize_tempdirs(object_id)
|
45
|
+
raise
|
31
46
|
end
|
32
47
|
|
33
48
|
def open_oo_file(options)
|
@@ -132,16 +147,7 @@ module Roo
|
|
132
147
|
end
|
133
148
|
|
134
149
|
def sheets
|
135
|
-
|
136
|
-
doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
|
137
|
-
read_table_styles(style)
|
138
|
-
end
|
139
|
-
end
|
140
|
-
doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
|
141
|
-
if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
|
142
|
-
sheet.attributes['name'].value
|
143
|
-
end
|
144
|
-
end.compact
|
150
|
+
@sheet_names
|
145
151
|
end
|
146
152
|
|
147
153
|
# version of the Roo::OpenOffice document
|
data/lib/roo/tempdir.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
module Roo
|
2
|
+
module Tempdir
|
3
|
+
def finalize_tempdirs(object_id)
|
4
|
+
if @tempdirs && (dirs_to_remove = @tempdirs[object_id])
|
5
|
+
@tempdirs[object_id] = nil
|
6
|
+
dirs_to_remove.each do |dir|
|
7
|
+
::FileUtils.remove_entry(dir)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def make_tempdir(object, prefix, root)
|
13
|
+
root ||= ENV['ROO_TMP']
|
14
|
+
# folder is cleaned up in .finalize_tempdirs
|
15
|
+
::Dir.mktmpdir("#{Roo::TEMP_PREFIX}#{prefix}", root).tap do |tmpdir|
|
16
|
+
@tempdirs ||= {}
|
17
|
+
if @tempdirs[object.object_id]
|
18
|
+
@tempdirs[object.object_id] << tmpdir
|
19
|
+
else
|
20
|
+
@tempdirs[object.object_id] = [tmpdir]
|
21
|
+
ObjectSpace.define_finalizer(object, method(:finalize_tempdirs))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/roo/version.rb
CHANGED
data/test/test_roo.rb
CHANGED
@@ -17,6 +17,8 @@
|
|
17
17
|
# with the wrong spreadsheet class
|
18
18
|
#STDERR.reopen "/dev/null","w"
|
19
19
|
|
20
|
+
Encoding.default_external = "UTF-8"
|
21
|
+
|
20
22
|
require 'test_helper'
|
21
23
|
require 'stringio'
|
22
24
|
|
@@ -2089,6 +2091,19 @@ where the expected result is
|
|
2089
2091
|
end
|
2090
2092
|
end
|
2091
2093
|
|
2094
|
+
def test_finalize
|
2095
|
+
tempdirs = []
|
2096
|
+
begin
|
2097
|
+
with_each_spreadsheet(:name=>'numbers1') do |oo|
|
2098
|
+
tempdirs << oo.instance_variable_get('@tmpdir')
|
2099
|
+
end
|
2100
|
+
GC.start
|
2101
|
+
end
|
2102
|
+
tempdirs.each do |tempdir|
|
2103
|
+
assert !File.exists?(tempdir), "Expected #{tempdir} to be cleaned up, but it still exists"
|
2104
|
+
end
|
2105
|
+
end
|
2106
|
+
|
2092
2107
|
def test_cleanup_on_error
|
2093
2108
|
old_temp_files = Dir.open(Dir.tmpdir).to_a
|
2094
2109
|
with_each_spreadsheet(:name=>'non_existent_file', :ignore_errors=>true) do |oo|; end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: roo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Preymesser
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2016-
|
15
|
+
date: 2016-08-21 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: nokogiri
|
@@ -101,6 +101,7 @@ files:
|
|
101
101
|
- ".travis.yml"
|
102
102
|
- CHANGELOG.md
|
103
103
|
- Gemfile
|
104
|
+
- Gemfile_ruby2
|
104
105
|
- Guardfile
|
105
106
|
- LICENSE
|
106
107
|
- README.md
|
@@ -139,6 +140,7 @@ files:
|
|
139
140
|
- lib/roo/link.rb
|
140
141
|
- lib/roo/open_office.rb
|
141
142
|
- lib/roo/spreadsheet.rb
|
143
|
+
- lib/roo/tempdir.rb
|
142
144
|
- lib/roo/utils.rb
|
143
145
|
- lib/roo/version.rb
|
144
146
|
- roo.gemspec
|