roo 2.4.0 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5d2a5a56902bf60eef9028eddb5617ae403b866f
4
- data.tar.gz: 3fad0f4206fd3058b3a420b63da7393b530f5f59
3
+ metadata.gz: 975d999c227f6bc66a6819e0e69c3ebceb3c76c9
4
+ data.tar.gz: 9b790645ff9ad0e2a8e1a84c9deb465bf5e5154b
5
5
  SHA512:
6
- metadata.gz: 91ca5af7282631e668e104e407fb6affd40bc56792809b7129e46de4dd9e4af8133aa6c1eb18f2a9ac8fc7d9470ae51bfb304bff9d920893f1b391b40f6dd162
7
- data.tar.gz: 9efee7d9ae6fec31bd965a7ce49fcd57452b0b61bb8ad0eb89a21640e4d3f0bbd5cfbb018f6f567b8475050550eb684dd6800aa80716876e2c0b747ae8adecd2
6
+ metadata.gz: 7b197a0ae99ceeb81ff7a8bf12099944f0b2667dbc25ee85380d64bd1117309cc50e98eece758824ee9d5c0b022b354b3c64a10357035281ee7191f79cb479d1
7
+ data.tar.gz: a73ae23bc6419fcacad79d85496e6349ef0b45956382cc1e93af0f4be57614248a1e1db33dcc87774a2b8cbbb58098d8526ecc4267c79c86f242acec161815dc
@@ -1,12 +1,16 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.0.0
4
- - 2.1.8
5
3
  - 2.2.4
4
+ - 2.3.1
6
5
  - ruby-head
7
6
  - jruby-19mode # JRuby in 1.9 mode
8
7
  - rbx-2
9
8
  matrix:
9
+ include:
10
+ - rvm: 2.0.0
11
+ gemfile: Gemfile_ruby2
12
+ - rvm: 2.1.8
13
+ gemfile: Gemfile_ruby2
10
14
  allow_failures:
11
15
  - rvm: ruby-head
12
16
  - rvm: jruby-19mode
@@ -1,3 +1,22 @@
1
+ ## Unreleased
2
+
3
+ ## [2.5.0] 2016-08-21
4
+ ### Fixed
5
+ - Remove tempdirs via finalizers on garbage collection. This cleans them up in all known cases, rather than just when the #close method is called. The #close method can be used to cleanup early. [329](https://github.com/roo-rb/roo/pull/329)
6
+ - Fixed README.md typo [318](https://github.com/roo-rb/roo/pull/318)
7
+ - Parse sheets in ODS files once to improve performance [320](https://github.com/roo-rb/roo/pull/320)
8
+ - Fix some Cell conversion issues [324](https://github.com/roo-rb/roo/pull/324) and [331](https://github.com/roo-rb/roo/pull/331)
9
+ - Improved memory performance [332](https://github.com/roo-rb/roo/pull/332)
10
+ - Added `no_hyperlinks` option to improve streamig performance [319](https://github.com/roo-rb/roo/pull/319) and [333](https://github.com/roo-rb/roo/pull/333)
11
+
12
+ ### Deprecations
13
+ - Roo::Base::TEMP_PREFIX should be accessed via Roo::TEMP_PREFIX
14
+ - The private Roo::Base#make_tempdir is now available at the class level in
15
+ classes that use tempdirs, added via Roo::Tempdir
16
+ =======
17
+ ### Added
18
+ - Discard hiperlinks lookups to allow streaming parsing without loading whole files
19
+
1
20
  ## [2.4.0] 2016-05-14
2
21
  ### Fixed
3
22
  - Fixed opening spreadsheets with charts [315](https://github.com/roo-rb/roo/pull/315)
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ group :test do
6
6
  # additional testing libs
7
7
  gem 'webmock'
8
8
  gem 'shoulda'
9
+ gem 'activesupport', '< 5.0.0'
9
10
  gem 'rspec', '>= 3.0.0'
10
11
  gem 'vcr'
11
12
  gem 'simplecov', '>= 0.9.0', require: false
@@ -0,0 +1,27 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :test do
6
+ # additional testing libs
7
+ gem 'webmock'
8
+ gem 'shoulda'
9
+ gem 'rspec', '>= 3.0.0'
10
+ gem 'vcr'
11
+ gem 'simplecov', '>= 0.9.0', require: false
12
+ gem 'coveralls', require: false
13
+ # gem "pry"
14
+ gem "activesupport", "~> 4.2.0"
15
+ end
16
+
17
+ group :local_development do
18
+ gem "listen", "~> 3.0.6"
19
+ gem 'terminal-notifier-guard', require: false if RUBY_PLATFORM.downcase.include?('darwin')
20
+ gem 'guard-rspec', '>= 4.3.1', require: false
21
+ gem 'guard-minitest', require: false
22
+ gem 'guard-bundler', require: false
23
+ gem 'guard-preek', require: false
24
+ gem 'guard-rubocop', require: false
25
+ gem 'guard-reek', github: 'pericles/guard-reek', require: false
26
+ gem 'pry'
27
+ end
data/README.md CHANGED
@@ -50,7 +50,7 @@ ods.sheet(0).row(1)
50
50
 
51
51
  # Set the last sheet as the default sheet.
52
52
  ods.default_sheet = ods.sheets.last
53
- ods.default_sheet = s.sheets[3]
53
+ ods.default_sheet = ods.sheets[2]
54
54
  ods.default_sheet = 'Sheet 3'
55
55
 
56
56
  # Iterate through each sheet
data/lib/roo.rb CHANGED
@@ -9,6 +9,8 @@ module Roo
9
9
  autoload :Excelx, 'roo/excelx'
10
10
  autoload :CSV, 'roo/csv'
11
11
 
12
+ TEMP_PREFIX = 'roo_'.freeze
13
+
12
14
  CLASS_FOR_EXTENSION = {
13
15
  ods: Roo::OpenOffice,
14
16
  xlsx: Roo::Excelx,
@@ -9,7 +9,6 @@ require 'roo/utils'
9
9
  class Roo::Base
10
10
  include Enumerable
11
11
 
12
- TEMP_PREFIX = 'roo_'.freeze
13
12
  MAX_ROW_COL = 999_999.freeze
14
13
  MIN_ROW_COL = 0.freeze
15
14
 
@@ -18,6 +17,11 @@ class Roo::Base
18
17
  # sets the line with attribute names (default: 1)
19
18
  attr_accessor :header_line
20
19
 
20
+ def self.TEMP_PREFIX
21
+ warn '[DEPRECATION] please access TEMP_PREFIX via Roo::TEMP_PREFIX'
22
+ Roo::TEMP_PREFIX
23
+ end
24
+
21
25
  def initialize(filename, options = {}, _file_warning = :error, _tmpdir = nil)
22
26
  @filename = filename
23
27
  @options = options
@@ -32,14 +36,12 @@ class Roo::Base
32
36
  @last_column = {}
33
37
 
34
38
  @header_line = 1
35
- rescue => e # clean up any temp files, but only if an error was raised
36
- close
37
- raise e
38
39
  end
39
40
 
40
41
  def close
41
- return nil unless @tmpdirs
42
- @tmpdirs.each { |dir| ::FileUtils.remove_entry(dir) }
42
+ if self.class.respond_to?(:finalize_tempdirs)
43
+ self.class.finalize_tempdirs(object_id)
44
+ end
43
45
  nil
44
46
  end
45
47
 
@@ -449,10 +451,6 @@ class Roo::Base
449
451
 
450
452
  private
451
453
 
452
- def track_tmpdir!(tmpdir)
453
- (@tmpdirs ||= []) << tmpdir
454
- end
455
-
456
454
  def clean_sheet_if_need(options)
457
455
  return unless options[:clean]
458
456
  options.delete(:clean)
@@ -538,7 +536,7 @@ class Roo::Base
538
536
  def find_basename(filename)
539
537
  if uri?(filename)
540
538
  require 'uri'
541
- uri = URI::parse filename
539
+ uri = URI.parse filename
542
540
  File.basename(uri.path)
543
541
  elsif !is_stream?(filename)
544
542
  File.basename(filename)
@@ -546,10 +544,15 @@ class Roo::Base
546
544
  end
547
545
 
548
546
  def make_tmpdir(prefix = nil, root = nil, &block)
547
+ warn '[DEPRECATION] extend Roo::Tempdir and use its .make_tempdir instead'
549
548
  prefix = "#{TEMP_PREFIX}#{prefix}"
549
+ root ||= ENV['ROO_TMP']
550
550
 
551
- ::Dir.mktmpdir(prefix, root || ENV['ROO_TMP'], &block).tap do |result|
552
- block_given? || track_tmpdir!(result)
551
+ if block_given?
552
+ # folder is deleted at end of block
553
+ ::Dir.mktmpdir(prefix, root, &block)
554
+ else
555
+ self.class.make_tempdir(self, prefix, root)
553
556
  end
554
557
  end
555
558
 
@@ -56,7 +56,7 @@ class Roo::CSV < Roo::Base
56
56
 
57
57
  def each_row(options, &block)
58
58
  if uri?(filename)
59
- make_tmpdir do |tmpdir|
59
+ ::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV['ROO_TMP']) do |tmpdir|
60
60
  tmp_filename = download_uri(filename, tmpdir)
61
61
  CSV.foreach(tmp_filename, options, &block)
62
62
  end
@@ -1,11 +1,14 @@
1
1
  require 'nokogiri'
2
2
  require 'zip/filesystem'
3
3
  require 'roo/link'
4
+ require 'roo/tempdir'
4
5
  require 'roo/utils'
5
6
  require 'forwardable'
6
7
 
7
8
  module Roo
8
9
  class Excelx < Roo::Base
10
+ extend Roo::Tempdir
11
+
9
12
  require 'set'
10
13
  extend Forwardable
11
14
 
@@ -36,13 +39,14 @@ module Roo
36
39
  cell_max = options.delete(:cell_max)
37
40
  sheet_options = {}
38
41
  sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
42
+ sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
39
43
 
40
44
  unless is_stream?(filename_or_stream)
41
45
  file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
42
46
  basename = find_basename(filename_or_stream)
43
47
  end
44
48
 
45
- @tmpdir = make_tmpdir(basename, options[:tmpdir_root])
49
+ @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
46
50
  @shared = Shared.new(@tmpdir)
47
51
  @filename = local_filename(filename_or_stream, @tmpdir, packed)
48
52
  process_zipfile(@filename || filename_or_stream)
@@ -64,9 +68,9 @@ module Roo
64
68
  end
65
69
 
66
70
  super
67
- rescue => e # clean up any temp files, but only if an error was raised
68
- close
69
- raise e
71
+ rescue
72
+ self.class.finalize_tempdirs(object_id)
73
+ raise
70
74
  end
71
75
 
72
76
  def method_missing(method, *args)
@@ -8,7 +8,6 @@ module Roo
8
8
  private
9
9
 
10
10
  def doc
11
- @doc ||=
12
11
  if doc_exists?
13
12
  ::Roo::Utils.load_xml(@path).remove_namespaces!
14
13
  end
@@ -3,6 +3,14 @@ require 'roo/excelx/extractor'
3
3
  module Roo
4
4
  class Excelx
5
5
  class SharedStrings < Excelx::Extractor
6
+
7
+ COMMON_STRINGS = {
8
+ t: "t",
9
+ r: "r",
10
+ html_tag_open: "<html>",
11
+ html_tag_closed: "</html>"
12
+ }
13
+
6
14
  def [](index)
7
15
  to_a[index]
8
16
  end
@@ -26,18 +34,17 @@ module Roo
26
34
  def fix_invalid_shared_strings(doc)
27
35
  invalid = { '_x000D_' => "\n" }
28
36
  xml = doc.to_s
37
+ return doc unless xml[/#{invalid.keys.join('|')}/]
29
38
 
30
- if xml[/#{invalid.keys.join('|')}/]
31
- @doc = ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
32
- end
39
+ ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
33
40
  end
34
41
 
35
42
  def extract_shared_strings
36
43
  return [] unless doc_exists?
37
44
 
38
- fix_invalid_shared_strings(doc)
45
+ document = fix_invalid_shared_strings(doc)
39
46
  # read the shared strings xml document
40
- doc.xpath('/sst/si').map do |si|
47
+ document.xpath('/sst/si').map do |si|
41
48
  shared_string = ''
42
49
  si.children.each do |elem|
43
50
  case elem.name
@@ -39,8 +39,13 @@ module Roo
39
39
  def each_cell(row_xml)
40
40
  return [] unless row_xml
41
41
  row_xml.children.each do |cell_element|
42
- key = ::Roo::Utils.ref_to_key(cell_element['r'])
43
- yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
42
+ # If you're sure you're not going to need this hyperlinks you can discard it
43
+ hyperlinks = unless @options[:no_hyperlinks]
44
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
45
+ hyperlinks(@relationships)[key]
46
+ end
47
+
48
+ yield cell_from_xml(cell_element, hyperlinks)
44
49
  end
45
50
  end
46
51
 
@@ -90,10 +95,9 @@ module Roo
90
95
  cell_xml.children.each do |cell|
91
96
  case cell.name
92
97
  when 'is'
93
- cell.children.each do |inline_str|
94
- if inline_str.name == 't'
95
- return Excelx::Cell.create_cell(:string, inline_str.content, formula, style, hyperlink, coordinate)
96
- end
98
+ content_arr = cell.search('t').map(&:content)
99
+ unless content_arr.empty?
100
+ return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
97
101
  end
98
102
  when 'f'
99
103
  formula = cell.content
@@ -101,6 +105,8 @@ module Roo
101
105
  return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
102
106
  end
103
107
  end
108
+
109
+ Excelx::Cell::Empty.new(coordinate)
104
110
  end
105
111
 
106
112
  def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
@@ -157,8 +163,9 @@ module Roo
157
163
  end
158
164
 
159
165
  def extract_hyperlinks(relationships)
160
- # FIXME: select the valid hyperlinks and then map those.
161
- Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
166
+ return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
167
+
168
+ Hash[hyperlinks.map do |hyperlink|
162
169
  if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
163
170
  [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
164
171
  end
@@ -3,10 +3,13 @@ require 'nokogiri'
3
3
  require 'cgi'
4
4
  require 'zip/filesystem'
5
5
  require 'roo/font'
6
+ require 'roo/tempdir'
6
7
  require 'base64'
7
8
 
8
9
  module Roo
9
10
  class OpenOffice < Roo::Base
11
+ extend Roo::Tempdir
12
+
10
13
  ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
11
14
  XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
12
15
  XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
@@ -19,15 +22,27 @@ module Roo
19
22
 
20
23
  @only_visible_sheets = options[:only_visible_sheets]
21
24
  file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
22
- @tmpdir = make_tmpdir(find_basename(filename), options[:tmpdir_root])
25
+ @tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
23
26
  @filename = local_filename(filename, @tmpdir, packed)
24
27
  # TODO: @cells_read[:default] = false
25
28
  open_oo_file(options)
26
29
  super(filename, options)
27
30
  initialize_default_variables
28
- rescue => e # clean up any temp files, but only if an error was raised
29
- close
30
- raise e
31
+
32
+ unless @table_display.any?
33
+ doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
34
+ read_table_styles(style)
35
+ end
36
+ end
37
+
38
+ @sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
39
+ if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
40
+ sheet.attributes['name'].value
41
+ end
42
+ end.compact
43
+ rescue
44
+ self.class.finalize_tempdirs(object_id)
45
+ raise
31
46
  end
32
47
 
33
48
  def open_oo_file(options)
@@ -132,16 +147,7 @@ module Roo
132
147
  end
133
148
 
134
149
  def sheets
135
- unless @table_display.any?
136
- doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
137
- read_table_styles(style)
138
- end
139
- end
140
- doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
141
- if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
142
- sheet.attributes['name'].value
143
- end
144
- end.compact
150
+ @sheet_names
145
151
  end
146
152
 
147
153
  # version of the Roo::OpenOffice document
@@ -0,0 +1,26 @@
1
+ module Roo
2
+ module Tempdir
3
+ def finalize_tempdirs(object_id)
4
+ if @tempdirs && (dirs_to_remove = @tempdirs[object_id])
5
+ @tempdirs[object_id] = nil
6
+ dirs_to_remove.each do |dir|
7
+ ::FileUtils.remove_entry(dir)
8
+ end
9
+ end
10
+ end
11
+
12
+ def make_tempdir(object, prefix, root)
13
+ root ||= ENV['ROO_TMP']
14
+ # folder is cleaned up in .finalize_tempdirs
15
+ ::Dir.mktmpdir("#{Roo::TEMP_PREFIX}#{prefix}", root).tap do |tmpdir|
16
+ @tempdirs ||= {}
17
+ if @tempdirs[object.object_id]
18
+ @tempdirs[object.object_id] << tmpdir
19
+ else
20
+ @tempdirs[object.object_id] = [tmpdir]
21
+ ObjectSpace.define_finalizer(object, method(:finalize_tempdirs))
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -1,3 +1,3 @@
1
1
  module Roo
2
- VERSION = '2.4.0'
2
+ VERSION = '2.5.0'
3
3
  end
@@ -1,7 +1,4 @@
1
1
  require 'test_helper'
2
- # require 'pry'
3
- # require 'active_support'
4
- # require 'active_support/time'
5
2
 
6
3
  class TestRooExcelxCellDateTime < Minitest::Test
7
4
  def test_cell_value_is_datetime
@@ -17,6 +17,8 @@
17
17
  # with the wrong spreadsheet class
18
18
  #STDERR.reopen "/dev/null","w"
19
19
 
20
+ Encoding.default_external = "UTF-8"
21
+
20
22
  require 'test_helper'
21
23
  require 'stringio'
22
24
 
@@ -2089,6 +2091,19 @@ where the expected result is
2089
2091
  end
2090
2092
  end
2091
2093
 
2094
+ def test_finalize
2095
+ tempdirs = []
2096
+ begin
2097
+ with_each_spreadsheet(:name=>'numbers1') do |oo|
2098
+ tempdirs << oo.instance_variable_get('@tmpdir')
2099
+ end
2100
+ GC.start
2101
+ end
2102
+ tempdirs.each do |tempdir|
2103
+ assert !File.exists?(tempdir), "Expected #{tempdir} to be cleaned up, but it still exists"
2104
+ end
2105
+ end
2106
+
2092
2107
  def test_cleanup_on_error
2093
2108
  old_temp_files = Dir.open(Dir.tmpdir).to_a
2094
2109
  with_each_spreadsheet(:name=>'non_existent_file', :ignore_errors=>true) do |oo|; end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: roo
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Preymesser
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2016-05-14 00:00:00.000000000 Z
15
+ date: 2016-08-21 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: nokogiri
@@ -101,6 +101,7 @@ files:
101
101
  - ".travis.yml"
102
102
  - CHANGELOG.md
103
103
  - Gemfile
104
+ - Gemfile_ruby2
104
105
  - Guardfile
105
106
  - LICENSE
106
107
  - README.md
@@ -139,6 +140,7 @@ files:
139
140
  - lib/roo/link.rb
140
141
  - lib/roo/open_office.rb
141
142
  - lib/roo/spreadsheet.rb
143
+ - lib/roo/tempdir.rb
142
144
  - lib/roo/utils.rb
143
145
  - lib/roo/version.rb
144
146
  - roo.gemspec