roo 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5d2a5a56902bf60eef9028eddb5617ae403b866f
4
- data.tar.gz: 3fad0f4206fd3058b3a420b63da7393b530f5f59
3
+ metadata.gz: 975d999c227f6bc66a6819e0e69c3ebceb3c76c9
4
+ data.tar.gz: 9b790645ff9ad0e2a8e1a84c9deb465bf5e5154b
5
5
  SHA512:
6
- metadata.gz: 91ca5af7282631e668e104e407fb6affd40bc56792809b7129e46de4dd9e4af8133aa6c1eb18f2a9ac8fc7d9470ae51bfb304bff9d920893f1b391b40f6dd162
7
- data.tar.gz: 9efee7d9ae6fec31bd965a7ce49fcd57452b0b61bb8ad0eb89a21640e4d3f0bbd5cfbb018f6f567b8475050550eb684dd6800aa80716876e2c0b747ae8adecd2
6
+ metadata.gz: 7b197a0ae99ceeb81ff7a8bf12099944f0b2667dbc25ee85380d64bd1117309cc50e98eece758824ee9d5c0b022b354b3c64a10357035281ee7191f79cb479d1
7
+ data.tar.gz: a73ae23bc6419fcacad79d85496e6349ef0b45956382cc1e93af0f4be57614248a1e1db33dcc87774a2b8cbbb58098d8526ecc4267c79c86f242acec161815dc
@@ -1,12 +1,16 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.0.0
4
- - 2.1.8
5
3
  - 2.2.4
4
+ - 2.3.1
6
5
  - ruby-head
7
6
  - jruby-19mode # JRuby in 1.9 mode
8
7
  - rbx-2
9
8
  matrix:
9
+ include:
10
+ - rvm: 2.0.0
11
+ gemfile: Gemfile_ruby2
12
+ - rvm: 2.1.8
13
+ gemfile: Gemfile_ruby2
10
14
  allow_failures:
11
15
  - rvm: ruby-head
12
16
  - rvm: jruby-19mode
@@ -1,3 +1,22 @@
1
+ ## Unreleased
2
+
3
+ ## [2.5.0] 2016-08-21
4
+ ### Fixed
5
+ - Remove tempdirs via finalizers on garbage collection. This cleans them up in all known cases, rather than just when the #close method is called. The #close method can be used to cleanup early. [329](https://github.com/roo-rb/roo/pull/329)
6
+ - Fixed README.md typo [318](https://github.com/roo-rb/roo/pull/318)
7
+ - Parse sheets in ODS files once to improve performance [320](https://github.com/roo-rb/roo/pull/320)
8
+ - Fix some Cell conversion issues [324](https://github.com/roo-rb/roo/pull/324) and [331](https://github.com/roo-rb/roo/pull/331)
9
+ - Improved memory performance [332](https://github.com/roo-rb/roo/pull/332)
10
+ - Added `no_hyperlinks` option to improve streamig performance [319](https://github.com/roo-rb/roo/pull/319) and [333](https://github.com/roo-rb/roo/pull/333)
11
+
12
+ ### Deprecations
13
+ - Roo::Base::TEMP_PREFIX should be accessed via Roo::TEMP_PREFIX
14
+ - The private Roo::Base#make_tempdir is now available at the class level in
15
+ classes that use tempdirs, added via Roo::Tempdir
16
+ =======
17
+ ### Added
18
+ - Discard hiperlinks lookups to allow streaming parsing without loading whole files
19
+
1
20
  ## [2.4.0] 2016-05-14
2
21
  ### Fixed
3
22
  - Fixed opening spreadsheets with charts [315](https://github.com/roo-rb/roo/pull/315)
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ group :test do
6
6
  # additional testing libs
7
7
  gem 'webmock'
8
8
  gem 'shoulda'
9
+ gem 'activesupport', '< 5.0.0'
9
10
  gem 'rspec', '>= 3.0.0'
10
11
  gem 'vcr'
11
12
  gem 'simplecov', '>= 0.9.0', require: false
@@ -0,0 +1,27 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :test do
6
+ # additional testing libs
7
+ gem 'webmock'
8
+ gem 'shoulda'
9
+ gem 'rspec', '>= 3.0.0'
10
+ gem 'vcr'
11
+ gem 'simplecov', '>= 0.9.0', require: false
12
+ gem 'coveralls', require: false
13
+ # gem "pry"
14
+ gem "activesupport", "~> 4.2.0"
15
+ end
16
+
17
+ group :local_development do
18
+ gem "listen", "~> 3.0.6"
19
+ gem 'terminal-notifier-guard', require: false if RUBY_PLATFORM.downcase.include?('darwin')
20
+ gem 'guard-rspec', '>= 4.3.1', require: false
21
+ gem 'guard-minitest', require: false
22
+ gem 'guard-bundler', require: false
23
+ gem 'guard-preek', require: false
24
+ gem 'guard-rubocop', require: false
25
+ gem 'guard-reek', github: 'pericles/guard-reek', require: false
26
+ gem 'pry'
27
+ end
data/README.md CHANGED
@@ -50,7 +50,7 @@ ods.sheet(0).row(1)
50
50
 
51
51
  # Set the last sheet as the default sheet.
52
52
  ods.default_sheet = ods.sheets.last
53
- ods.default_sheet = s.sheets[3]
53
+ ods.default_sheet = ods.sheets[2]
54
54
  ods.default_sheet = 'Sheet 3'
55
55
 
56
56
  # Iterate through each sheet
data/lib/roo.rb CHANGED
@@ -9,6 +9,8 @@ module Roo
9
9
  autoload :Excelx, 'roo/excelx'
10
10
  autoload :CSV, 'roo/csv'
11
11
 
12
+ TEMP_PREFIX = 'roo_'.freeze
13
+
12
14
  CLASS_FOR_EXTENSION = {
13
15
  ods: Roo::OpenOffice,
14
16
  xlsx: Roo::Excelx,
@@ -9,7 +9,6 @@ require 'roo/utils'
9
9
  class Roo::Base
10
10
  include Enumerable
11
11
 
12
- TEMP_PREFIX = 'roo_'.freeze
13
12
  MAX_ROW_COL = 999_999.freeze
14
13
  MIN_ROW_COL = 0.freeze
15
14
 
@@ -18,6 +17,11 @@ class Roo::Base
18
17
  # sets the line with attribute names (default: 1)
19
18
  attr_accessor :header_line
20
19
 
20
+ def self.TEMP_PREFIX
21
+ warn '[DEPRECATION] please access TEMP_PREFIX via Roo::TEMP_PREFIX'
22
+ Roo::TEMP_PREFIX
23
+ end
24
+
21
25
  def initialize(filename, options = {}, _file_warning = :error, _tmpdir = nil)
22
26
  @filename = filename
23
27
  @options = options
@@ -32,14 +36,12 @@ class Roo::Base
32
36
  @last_column = {}
33
37
 
34
38
  @header_line = 1
35
- rescue => e # clean up any temp files, but only if an error was raised
36
- close
37
- raise e
38
39
  end
39
40
 
40
41
  def close
41
- return nil unless @tmpdirs
42
- @tmpdirs.each { |dir| ::FileUtils.remove_entry(dir) }
42
+ if self.class.respond_to?(:finalize_tempdirs)
43
+ self.class.finalize_tempdirs(object_id)
44
+ end
43
45
  nil
44
46
  end
45
47
 
@@ -449,10 +451,6 @@ class Roo::Base
449
451
 
450
452
  private
451
453
 
452
- def track_tmpdir!(tmpdir)
453
- (@tmpdirs ||= []) << tmpdir
454
- end
455
-
456
454
  def clean_sheet_if_need(options)
457
455
  return unless options[:clean]
458
456
  options.delete(:clean)
@@ -538,7 +536,7 @@ class Roo::Base
538
536
  def find_basename(filename)
539
537
  if uri?(filename)
540
538
  require 'uri'
541
- uri = URI::parse filename
539
+ uri = URI.parse filename
542
540
  File.basename(uri.path)
543
541
  elsif !is_stream?(filename)
544
542
  File.basename(filename)
@@ -546,10 +544,15 @@ class Roo::Base
546
544
  end
547
545
 
548
546
  def make_tmpdir(prefix = nil, root = nil, &block)
547
+ warn '[DEPRECATION] extend Roo::Tempdir and use its .make_tempdir instead'
549
548
  prefix = "#{TEMP_PREFIX}#{prefix}"
549
+ root ||= ENV['ROO_TMP']
550
550
 
551
- ::Dir.mktmpdir(prefix, root || ENV['ROO_TMP'], &block).tap do |result|
552
- block_given? || track_tmpdir!(result)
551
+ if block_given?
552
+ # folder is deleted at end of block
553
+ ::Dir.mktmpdir(prefix, root, &block)
554
+ else
555
+ self.class.make_tempdir(self, prefix, root)
553
556
  end
554
557
  end
555
558
 
@@ -56,7 +56,7 @@ class Roo::CSV < Roo::Base
56
56
 
57
57
  def each_row(options, &block)
58
58
  if uri?(filename)
59
- make_tmpdir do |tmpdir|
59
+ ::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV['ROO_TMP']) do |tmpdir|
60
60
  tmp_filename = download_uri(filename, tmpdir)
61
61
  CSV.foreach(tmp_filename, options, &block)
62
62
  end
@@ -1,11 +1,14 @@
1
1
  require 'nokogiri'
2
2
  require 'zip/filesystem'
3
3
  require 'roo/link'
4
+ require 'roo/tempdir'
4
5
  require 'roo/utils'
5
6
  require 'forwardable'
6
7
 
7
8
  module Roo
8
9
  class Excelx < Roo::Base
10
+ extend Roo::Tempdir
11
+
9
12
  require 'set'
10
13
  extend Forwardable
11
14
 
@@ -36,13 +39,14 @@ module Roo
36
39
  cell_max = options.delete(:cell_max)
37
40
  sheet_options = {}
38
41
  sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
42
+ sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
39
43
 
40
44
  unless is_stream?(filename_or_stream)
41
45
  file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
42
46
  basename = find_basename(filename_or_stream)
43
47
  end
44
48
 
45
- @tmpdir = make_tmpdir(basename, options[:tmpdir_root])
49
+ @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
46
50
  @shared = Shared.new(@tmpdir)
47
51
  @filename = local_filename(filename_or_stream, @tmpdir, packed)
48
52
  process_zipfile(@filename || filename_or_stream)
@@ -64,9 +68,9 @@ module Roo
64
68
  end
65
69
 
66
70
  super
67
- rescue => e # clean up any temp files, but only if an error was raised
68
- close
69
- raise e
71
+ rescue
72
+ self.class.finalize_tempdirs(object_id)
73
+ raise
70
74
  end
71
75
 
72
76
  def method_missing(method, *args)
@@ -8,7 +8,6 @@ module Roo
8
8
  private
9
9
 
10
10
  def doc
11
- @doc ||=
12
11
  if doc_exists?
13
12
  ::Roo::Utils.load_xml(@path).remove_namespaces!
14
13
  end
@@ -3,6 +3,14 @@ require 'roo/excelx/extractor'
3
3
  module Roo
4
4
  class Excelx
5
5
  class SharedStrings < Excelx::Extractor
6
+
7
+ COMMON_STRINGS = {
8
+ t: "t",
9
+ r: "r",
10
+ html_tag_open: "<html>",
11
+ html_tag_closed: "</html>"
12
+ }
13
+
6
14
  def [](index)
7
15
  to_a[index]
8
16
  end
@@ -26,18 +34,17 @@ module Roo
26
34
  def fix_invalid_shared_strings(doc)
27
35
  invalid = { '_x000D_' => "\n" }
28
36
  xml = doc.to_s
37
+ return doc unless xml[/#{invalid.keys.join('|')}/]
29
38
 
30
- if xml[/#{invalid.keys.join('|')}/]
31
- @doc = ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
32
- end
39
+ ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
33
40
  end
34
41
 
35
42
  def extract_shared_strings
36
43
  return [] unless doc_exists?
37
44
 
38
- fix_invalid_shared_strings(doc)
45
+ document = fix_invalid_shared_strings(doc)
39
46
  # read the shared strings xml document
40
- doc.xpath('/sst/si').map do |si|
47
+ document.xpath('/sst/si').map do |si|
41
48
  shared_string = ''
42
49
  si.children.each do |elem|
43
50
  case elem.name
@@ -39,8 +39,13 @@ module Roo
39
39
  def each_cell(row_xml)
40
40
  return [] unless row_xml
41
41
  row_xml.children.each do |cell_element|
42
- key = ::Roo::Utils.ref_to_key(cell_element['r'])
43
- yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
42
+ # If you're sure you're not going to need this hyperlinks you can discard it
43
+ hyperlinks = unless @options[:no_hyperlinks]
44
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
45
+ hyperlinks(@relationships)[key]
46
+ end
47
+
48
+ yield cell_from_xml(cell_element, hyperlinks)
44
49
  end
45
50
  end
46
51
 
@@ -90,10 +95,9 @@ module Roo
90
95
  cell_xml.children.each do |cell|
91
96
  case cell.name
92
97
  when 'is'
93
- cell.children.each do |inline_str|
94
- if inline_str.name == 't'
95
- return Excelx::Cell.create_cell(:string, inline_str.content, formula, style, hyperlink, coordinate)
96
- end
98
+ content_arr = cell.search('t').map(&:content)
99
+ unless content_arr.empty?
100
+ return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
97
101
  end
98
102
  when 'f'
99
103
  formula = cell.content
@@ -101,6 +105,8 @@ module Roo
101
105
  return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
102
106
  end
103
107
  end
108
+
109
+ Excelx::Cell::Empty.new(coordinate)
104
110
  end
105
111
 
106
112
  def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
@@ -157,8 +163,9 @@ module Roo
157
163
  end
158
164
 
159
165
  def extract_hyperlinks(relationships)
160
- # FIXME: select the valid hyperlinks and then map those.
161
- Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
166
+ return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
167
+
168
+ Hash[hyperlinks.map do |hyperlink|
162
169
  if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
163
170
  [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
164
171
  end
@@ -3,10 +3,13 @@ require 'nokogiri'
3
3
  require 'cgi'
4
4
  require 'zip/filesystem'
5
5
  require 'roo/font'
6
+ require 'roo/tempdir'
6
7
  require 'base64'
7
8
 
8
9
  module Roo
9
10
  class OpenOffice < Roo::Base
11
+ extend Roo::Tempdir
12
+
10
13
  ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
11
14
  XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
12
15
  XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
@@ -19,15 +22,27 @@ module Roo
19
22
 
20
23
  @only_visible_sheets = options[:only_visible_sheets]
21
24
  file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
22
- @tmpdir = make_tmpdir(find_basename(filename), options[:tmpdir_root])
25
+ @tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
23
26
  @filename = local_filename(filename, @tmpdir, packed)
24
27
  # TODO: @cells_read[:default] = false
25
28
  open_oo_file(options)
26
29
  super(filename, options)
27
30
  initialize_default_variables
28
- rescue => e # clean up any temp files, but only if an error was raised
29
- close
30
- raise e
31
+
32
+ unless @table_display.any?
33
+ doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
34
+ read_table_styles(style)
35
+ end
36
+ end
37
+
38
+ @sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
39
+ if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
40
+ sheet.attributes['name'].value
41
+ end
42
+ end.compact
43
+ rescue
44
+ self.class.finalize_tempdirs(object_id)
45
+ raise
31
46
  end
32
47
 
33
48
  def open_oo_file(options)
@@ -132,16 +147,7 @@ module Roo
132
147
  end
133
148
 
134
149
  def sheets
135
- unless @table_display.any?
136
- doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
137
- read_table_styles(style)
138
- end
139
- end
140
- doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
141
- if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
142
- sheet.attributes['name'].value
143
- end
144
- end.compact
150
+ @sheet_names
145
151
  end
146
152
 
147
153
  # version of the Roo::OpenOffice document
@@ -0,0 +1,26 @@
1
+ module Roo
2
+ module Tempdir
3
+ def finalize_tempdirs(object_id)
4
+ if @tempdirs && (dirs_to_remove = @tempdirs[object_id])
5
+ @tempdirs[object_id] = nil
6
+ dirs_to_remove.each do |dir|
7
+ ::FileUtils.remove_entry(dir)
8
+ end
9
+ end
10
+ end
11
+
12
+ def make_tempdir(object, prefix, root)
13
+ root ||= ENV['ROO_TMP']
14
+ # folder is cleaned up in .finalize_tempdirs
15
+ ::Dir.mktmpdir("#{Roo::TEMP_PREFIX}#{prefix}", root).tap do |tmpdir|
16
+ @tempdirs ||= {}
17
+ if @tempdirs[object.object_id]
18
+ @tempdirs[object.object_id] << tmpdir
19
+ else
20
+ @tempdirs[object.object_id] = [tmpdir]
21
+ ObjectSpace.define_finalizer(object, method(:finalize_tempdirs))
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -1,3 +1,3 @@
1
1
  module Roo
2
- VERSION = '2.4.0'
2
+ VERSION = '2.5.0'
3
3
  end
@@ -1,7 +1,4 @@
1
1
  require 'test_helper'
2
- # require 'pry'
3
- # require 'active_support'
4
- # require 'active_support/time'
5
2
 
6
3
  class TestRooExcelxCellDateTime < Minitest::Test
7
4
  def test_cell_value_is_datetime
@@ -17,6 +17,8 @@
17
17
  # with the wrong spreadsheet class
18
18
  #STDERR.reopen "/dev/null","w"
19
19
 
20
+ Encoding.default_external = "UTF-8"
21
+
20
22
  require 'test_helper'
21
23
  require 'stringio'
22
24
 
@@ -2089,6 +2091,19 @@ where the expected result is
2089
2091
  end
2090
2092
  end
2091
2093
 
2094
+ def test_finalize
2095
+ tempdirs = []
2096
+ begin
2097
+ with_each_spreadsheet(:name=>'numbers1') do |oo|
2098
+ tempdirs << oo.instance_variable_get('@tmpdir')
2099
+ end
2100
+ GC.start
2101
+ end
2102
+ tempdirs.each do |tempdir|
2103
+ assert !File.exists?(tempdir), "Expected #{tempdir} to be cleaned up, but it still exists"
2104
+ end
2105
+ end
2106
+
2092
2107
  def test_cleanup_on_error
2093
2108
  old_temp_files = Dir.open(Dir.tmpdir).to_a
2094
2109
  with_each_spreadsheet(:name=>'non_existent_file', :ignore_errors=>true) do |oo|; end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: roo
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Preymesser
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2016-05-14 00:00:00.000000000 Z
15
+ date: 2016-08-21 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: nokogiri
@@ -101,6 +101,7 @@ files:
101
101
  - ".travis.yml"
102
102
  - CHANGELOG.md
103
103
  - Gemfile
104
+ - Gemfile_ruby2
104
105
  - Guardfile
105
106
  - LICENSE
106
107
  - README.md
@@ -139,6 +140,7 @@ files:
139
140
  - lib/roo/link.rb
140
141
  - lib/roo/open_office.rb
141
142
  - lib/roo/spreadsheet.rb
143
+ - lib/roo/tempdir.rb
142
144
  - lib/roo/utils.rb
143
145
  - lib/roo/version.rb
144
146
  - roo.gemspec