culturecode-roo 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. data/.gitignore +7 -0
  2. data/.simplecov +4 -0
  3. data/.travis.yml +13 -0
  4. data/CHANGELOG +438 -0
  5. data/Gemfile +24 -0
  6. data/Guardfile +24 -0
  7. data/LICENSE +22 -0
  8. data/README.md +121 -0
  9. data/Rakefile +23 -0
  10. data/examples/roo_soap_client.rb +50 -0
  11. data/examples/roo_soap_server.rb +26 -0
  12. data/examples/write_me.rb +31 -0
  13. data/lib/roo.rb +28 -0
  14. data/lib/roo/base.rb +717 -0
  15. data/lib/roo/csv.rb +110 -0
  16. data/lib/roo/excelx.rb +542 -0
  17. data/lib/roo/excelx/comments.rb +23 -0
  18. data/lib/roo/excelx/extractor.rb +20 -0
  19. data/lib/roo/excelx/relationships.rb +26 -0
  20. data/lib/roo/excelx/shared_strings.rb +40 -0
  21. data/lib/roo/excelx/sheet_doc.rb +175 -0
  22. data/lib/roo/excelx/styles.rb +62 -0
  23. data/lib/roo/excelx/workbook.rb +59 -0
  24. data/lib/roo/font.rb +17 -0
  25. data/lib/roo/libre_office.rb +5 -0
  26. data/lib/roo/link.rb +15 -0
  27. data/lib/roo/open_office.rb +652 -0
  28. data/lib/roo/spreadsheet.rb +31 -0
  29. data/lib/roo/utils.rb +81 -0
  30. data/lib/roo/version.rb +3 -0
  31. data/roo.gemspec +27 -0
  32. data/scripts/txt2html +67 -0
  33. data/spec/fixtures/vcr_cassettes/google_drive.yml +165 -0
  34. data/spec/fixtures/vcr_cassettes/google_drive_access_token.yml +73 -0
  35. data/spec/fixtures/vcr_cassettes/google_drive_set.yml +857 -0
  36. data/spec/lib/roo/base_spec.rb +4 -0
  37. data/spec/lib/roo/csv_spec.rb +48 -0
  38. data/spec/lib/roo/excelx/format_spec.rb +51 -0
  39. data/spec/lib/roo/excelx_spec.rb +363 -0
  40. data/spec/lib/roo/libreoffice_spec.rb +13 -0
  41. data/spec/lib/roo/openoffice_spec.rb +15 -0
  42. data/spec/lib/roo/spreadsheet_spec.rb +88 -0
  43. data/spec/lib/roo/utils_spec.rb +105 -0
  44. data/spec/spec_helper.rb +9 -0
  45. data/test/all_ss.rb +11 -0
  46. data/test/files/1900_base.xlsx +0 -0
  47. data/test/files/1904_base.xlsx +0 -0
  48. data/test/files/Bibelbund.csv +3741 -0
  49. data/test/files/Bibelbund.ods +0 -0
  50. data/test/files/Bibelbund.xlsx +0 -0
  51. data/test/files/Bibelbund1.ods +0 -0
  52. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  53. data/test/files/advanced_header.ods +0 -0
  54. data/test/files/bbu.ods +0 -0
  55. data/test/files/bbu.xlsx +0 -0
  56. data/test/files/bode-v1.ods.zip +0 -0
  57. data/test/files/bode-v1.xls.zip +0 -0
  58. data/test/files/boolean.csv +2 -0
  59. data/test/files/boolean.ods +0 -0
  60. data/test/files/boolean.xlsx +0 -0
  61. data/test/files/borders.ods +0 -0
  62. data/test/files/borders.xlsx +0 -0
  63. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  64. data/test/files/comments.ods +0 -0
  65. data/test/files/comments.xlsx +0 -0
  66. data/test/files/csvtypes.csv +1 -0
  67. data/test/files/datetime.ods +0 -0
  68. data/test/files/datetime.xlsx +0 -0
  69. data/test/files/dreimalvier.ods +0 -0
  70. data/test/files/emptysheets.ods +0 -0
  71. data/test/files/emptysheets.xlsx +0 -0
  72. data/test/files/encrypted-letmein.ods +0 -0
  73. data/test/files/file_item_error.xlsx +0 -0
  74. data/test/files/formula.ods +0 -0
  75. data/test/files/formula.xlsx +0 -0
  76. data/test/files/formula_string_error.xlsx +0 -0
  77. data/test/files/html-escape.ods +0 -0
  78. data/test/files/link.csv +1 -0
  79. data/test/files/link.xlsx +0 -0
  80. data/test/files/matrix.ods +0 -0
  81. data/test/files/named_cells.ods +0 -0
  82. data/test/files/named_cells.xlsx +0 -0
  83. data/test/files/no_spreadsheet_file.txt +1 -0
  84. data/test/files/numbers-export.xlsx +0 -0
  85. data/test/files/numbers1.csv +18 -0
  86. data/test/files/numbers1.ods +0 -0
  87. data/test/files/numbers1.xlsx +0 -0
  88. data/test/files/numbers1withnull.xlsx +0 -0
  89. data/test/files/numeric-link.xlsx +0 -0
  90. data/test/files/only_one_sheet.ods +0 -0
  91. data/test/files/only_one_sheet.xlsx +0 -0
  92. data/test/files/paragraph.ods +0 -0
  93. data/test/files/paragraph.xlsx +0 -0
  94. data/test/files/ric.ods +0 -0
  95. data/test/files/sheet1.xml +109 -0
  96. data/test/files/simple_spreadsheet.ods +0 -0
  97. data/test/files/simple_spreadsheet.xlsx +0 -0
  98. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  99. data/test/files/so_datetime.csv +8 -0
  100. data/test/files/style.ods +0 -0
  101. data/test/files/style.xlsx +0 -0
  102. data/test/files/time-test.csv +2 -0
  103. data/test/files/time-test.ods +0 -0
  104. data/test/files/time-test.xlsx +0 -0
  105. data/test/files/type_excel.ods +0 -0
  106. data/test/files/type_excel.xlsx +0 -0
  107. data/test/files/type_excelx.ods +0 -0
  108. data/test/files/type_openoffice.xlsx +0 -0
  109. data/test/files/whitespace.ods +0 -0
  110. data/test/files/whitespace.xlsx +0 -0
  111. data/test/test_generic_spreadsheet.rb +211 -0
  112. data/test/test_helper.rb +58 -0
  113. data/test/test_roo.rb +1977 -0
  114. metadata +329 -0
data/Gemfile ADDED
@@ -0,0 +1,24 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :test do
6
+ # additional testing libs
7
+ gem 'webmock'
8
+ gem 'shoulda'
9
+ gem 'rspec', '>= 3.0.0'
10
+ gem 'vcr'
11
+ gem 'simplecov', '>= 0.9.0', require: false
12
+ gem 'coveralls', require: false
13
+ end
14
+
15
+ group :local_development do
16
+ gem 'terminal-notifier-guard', require: false if RUBY_PLATFORM.downcase.include?('darwin')
17
+ gem 'guard-rspec', '>= 4.3.1', require: false
18
+ gem 'guard-minitest', require: false
19
+ gem 'guard-bundler', require: false
20
+ gem 'guard-preek', require: false
21
+ gem 'guard-rubocop', require: false
22
+ gem 'guard-reek', github: 'pericles/guard-reek', require: false
23
+ gem 'pry'
24
+ end
data/Guardfile ADDED
@@ -0,0 +1,24 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :minitest, test_folders: ['test'] do
5
+ watch(%r{^test/(.*)\/?test_(.*)\.rb$})
6
+ watch(%r{^lib/(.*/)?([^/]+)\.rb$}) { |m| "test/#{m[1]}test_#{m[2]}.rb" }
7
+ watch(%r{^test/test_helper\.rb$}) { 'test' }
8
+ end
9
+
10
+ # Note: The cmd option is now required due to the increasing number of ways
11
+ # rspec may be run, below are examples of the most common uses.
12
+ # * bundler: 'bundle exec rspec'
13
+ # * bundler binstubs: 'bin/rspec'
14
+ # * spring: 'bin/rsspec' (This will use spring if running and you have
15
+ # installed the spring binstubs per the docs)
16
+ # * zeus: 'zeus rspec' (requires the server to be started separetly)
17
+ # * 'just' rspec: 'rspec'
18
+ guard :rspec, cmd: 'bundle exec rspec' do
19
+ watch(%r{^spec/.+_spec\.rb$})
20
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
21
+ watch('spec/spec_helper.rb') { "spec" }
22
+ watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
23
+ end
24
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2008-2014 Thomas Preymesser, Ben Woosley
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,121 @@
1
+ # Roo [![Build Status](https://travis-ci.org/roo-rb/roo.svg)](https://travis-ci.org/roo-rb/roo)[![Code Climate](https://codeclimate.com/github/roo-rb/roo/badges/gpa.svg)](https://codeclimate.com/github/roo-rb/roo)[![Coverage Status](https://coveralls.io/repos/roo-rb/roo/badge.png)](https://coveralls.io/r/roo-rb/roo)
2
+
3
+ Roo implements read access for all spreadsheet types and read/write access for
4
+ Google spreadsheets. It can handle
5
+ * Excelx
6
+ * OpenOffice / LibreOffice
7
+ * CSV
8
+
9
+ ## Additional libraries
10
+
11
+ In addition, the roo-xls and roo-google gems exist to Google Spreadsheet add classic Excel
12
+ handling capabilities to roo.
13
+
14
+ ## Usage:
15
+
16
+ ```ruby
17
+ require 'roo'
18
+
19
+ s = Roo::OpenOffice.new("myspreadsheet.ods") # loads an OpenOffice Spreadsheet
20
+ s = Roo::OpenOffice.new("myspreadsheet.ods", :password => "password") # loads an encrypted OpenOffice Spreadsheet
21
+ s = Roo::Excelx.new("myspreadsheet.xlsx") # loads an Excel Spreadsheet for Excel .xlsx files
22
+ s = Roo::CSV.new("mycsv.csv") # loads a CSV file
23
+
24
+ # You can use CSV to load TSV files, or files of a certain encoding by passing
25
+ # in options under the :csv_options key
26
+ s = Roo::CSV.new("mytsv.tsv", csv_options: {col_sep: "\t"}) # TSV
27
+ s = Roo::CSV.new("mycsv.csv", csv_options: {encoding: Encoding::ISO_8859_1}) # csv with explicit encoding
28
+
29
+ s.default_sheet = s.sheets.first # first sheet in the spreadsheet file will be used
30
+
31
+ # s.sheets is an array which holds the names of the sheets within
32
+ # a spreadsheet.
33
+ # you can also write
34
+ # s.default_sheet = s.sheets[3] or
35
+ # s.default_sheet = 'Sheet 3'
36
+
37
+ s.cell(1,1) # returns the content of the first row/first cell in the sheet
38
+ s.cell('A',1) # same cell
39
+ s.cell(1,'A') # same cell
40
+ s.cell(1,'A',s.sheets[0]) # same cell
41
+
42
+ # almost all methods have an optional argument 'sheet'.
43
+ # If this parameter is omitted, the default_sheet will be used.
44
+
45
+ s.info # Returns basic info about the spreadsheet file
46
+
47
+ s.first_row # the number of the first row
48
+ s.last_row # the number of the last row
49
+ s.first_column # the number of the first column
50
+ s.last_column # the number of the last column
51
+
52
+ # limited font information is available
53
+
54
+ s.font(1,1).bold?
55
+ s.font(1,1).italic?
56
+ s.font(1,1).underline?
57
+
58
+
59
+ # Roo::Spreadsheet requires spreadsheet gem
60
+ require 'spreadsheet'
61
+
62
+ # Spreadsheet.open can accept both files and paths
63
+
64
+ xls = Roo::Spreadsheet.open('./new_prices.xls')
65
+
66
+ # If the File.path or provided path string does not have an extension, you can optionally
67
+ # provide one as a string or symbol
68
+
69
+ xls = Roo::Spreadsheet.open('./rails_temp_upload', extension: :xls)
70
+
71
+ # no more setting xls.default_sheet, just use this
72
+
73
+ xls.sheet('Info').row(1)
74
+ xls.sheet(0).row(1)
75
+
76
+ # excel likes to create random "Data01" sheets for macros
77
+ # use this to find the sheet with the most data to parse
78
+
79
+ xls.longest_sheet
80
+
81
+ # this excel file has multiple worksheets, let's iterate through each of them and process
82
+
83
+ xls.each_with_pagename do |name, sheet|
84
+ p sheet.row(1)
85
+ end
86
+
87
+ # pull out a hash of exclusive column data (get rid of useless columns and save memory)
88
+
89
+ xls.each(:id => 'UPC',:qty => 'ATS') {|hash| arr << hash}
90
+ #=> hash will appear like {:upc=>727880013358, :qty => 12}
91
+
92
+ # NOTE: .parse does the same as .each, except it returns an array (similar to each vs. map)
93
+
94
+ # not sure exactly what a column will be named? try a wildcard search with a regex
95
+
96
+ xls.parse(:id => /UPC|SKU/,:qty => /ATS*\sATP\s*QTY\z/)
97
+
98
+ # if you need to locate the header row and assign the header names themselves,
99
+ # use the :header_search option
100
+
101
+ xls.parse(:header_search => [/UPC*SKU/,/ATS*\sATP\s*QTY\z/])
102
+ #=> each element will appear in this fashion:
103
+ #=> {"UPC" => 123456789012, "STYLE" => "987B0", "COLOR" => "blue", "QTY" => 78}
104
+
105
+ # want to strip out annoying unicode characters and surrounding white space?
106
+
107
+ xls.parse(:clean => true)
108
+
109
+ # another bonus feature is a patch to prevent the Spreadsheet gem from parsing
110
+ # thousands and thousands of blank lines. i got fed up after watching my computer
111
+ # nearly catch fire for 4 hours for a spreadsheet with only 200 ACTUAL lines
112
+ # - located in lib/roo/worksheet.rb
113
+
114
+ # if you want to load and stream .xlsx rows
115
+
116
+ s = Roo::Excelx.new("./test_data/test_small.xlsx")
117
+ s.each_row_streaming do |row|
118
+ puts row.inspect # Array of Excelx::Cell objects
119
+ end
120
+
121
+ ```
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rake/testtask'
4
+ require 'rspec/core/rake_task'
5
+ require 'coveralls/rake/task'
6
+
7
+ # Test unit
8
+ Rake::TestTask.new do |t|
9
+ t.libs << 'test'
10
+ t.test_files = FileList['test/test*.rb']
11
+ t.verbose = true
12
+ end
13
+
14
+ # RSpec
15
+ RSpec::Core::RakeTask.new(:spec)
16
+
17
+ # Coveralls
18
+ Coveralls::RakeTask.new
19
+
20
+ default_task = [:test, :spec]
21
+ default_task << 'coveralls:push' if ENV['TRAVIS']
22
+
23
+ task default: default_task
@@ -0,0 +1,50 @@
1
+ require 'soap/rpc/driver'
2
+
3
+ def ferien_fuer_region(proxy, region, year = nil)
4
+ proxy.first_row.upto(proxy.last_row) do |row|
5
+ if proxy.cell(row, 2) == region
6
+ jahr = proxy.cell(row, 1).to_i
7
+ if year.nil? || jahr == year
8
+ bis_datum = proxy.cell(row, 5)
9
+ if DateTime.now > bis_datum
10
+ print '('
11
+ end
12
+ print jahr.to_s + ' '
13
+ print proxy.cell(row, 2) + ' '
14
+ print proxy.cell(row, 3) + ' '
15
+ print proxy.cell(row, 4).to_s + ' '
16
+ print bis_datum.to_s + ' '
17
+ print (proxy.cell(row, 6) || '') + ' '
18
+ if DateTime.now > bis_datum
19
+ print ')'
20
+ end
21
+ puts
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ proxy = SOAP::RPC::Driver.new('http://localhost:12321', 'spreadsheetserver')
28
+ proxy.add_method('cell', 'row', 'col')
29
+ proxy.add_method('officeversion')
30
+ proxy.add_method('last_row')
31
+ proxy.add_method('last_column')
32
+ proxy.add_method('first_row')
33
+ proxy.add_method('first_column')
34
+ proxy.add_method('sheets')
35
+ proxy.add_method('set_default_sheet', 's')
36
+ proxy.add_method('ferien_fuer_region', 'region')
37
+
38
+ sheets = proxy.sheets
39
+ proxy.set_default_sheet(sheets.first)
40
+
41
+ puts "first row: #{proxy.first_row}"
42
+ puts "first column: #{proxy.first_column}"
43
+ puts "last row: #{proxy.last_row}"
44
+ puts "last column: #{proxy.last_column}"
45
+ puts "cell: #{proxy.cell('C', 8)}"
46
+ puts "cell: #{proxy.cell('F', 12)}"
47
+ puts "officeversion: #{proxy.officeversion}"
48
+ puts 'Berlin:'
49
+
50
+ ferien_fuer_region(proxy, 'Berlin')
@@ -0,0 +1,26 @@
1
+ require 'roo'
2
+ require 'soap/rpc/standaloneServer'
3
+
4
+ NS = 'spreadsheetserver' # name of your service = namespace
5
+ class Server2 < SOAP::RPC::StandaloneServer
6
+ def on_init
7
+ spreadsheet = OpenOffice.new('./Ferien-de.ods')
8
+ add_method(spreadsheet, 'cell', 'row', 'col')
9
+ add_method(spreadsheet, 'officeversion')
10
+ add_method(spreadsheet, 'first_row')
11
+ add_method(spreadsheet, 'last_row')
12
+ add_method(spreadsheet, 'first_column')
13
+ add_method(spreadsheet, 'last_column')
14
+ add_method(spreadsheet, 'sheets')
15
+ # add_method(spreadsheet, 'default_sheet=', 's')
16
+ # method with '...=' did not work? alias method 'set_default_sheet' created
17
+ add_method(spreadsheet, 'set_default_sheet', 's')
18
+ end
19
+ end
20
+
21
+ PORT = 12_321
22
+ puts "serving at port #{PORT}"
23
+ svr = Server2.new('Roo', NS, '0.0.0.0', PORT)
24
+
25
+ trap('INT') { svr.shutdown }
26
+ svr.start
@@ -0,0 +1,31 @@
1
+ require 'roo'
2
+
3
+ #-- create a new spreadsheet within your google-spreadsheets and paste
4
+ #-- the 'key' parameter in the spreadsheet URL
5
+ MAXTRIES = 1000
6
+ print "what's your name? "
7
+ my_name = gets.chomp
8
+ print 'where do you live? '
9
+ my_location = gets.chomp
10
+ print 'your message? (if left blank, only your name and location will be inserted) '
11
+ my_message = gets.chomp
12
+ spreadsheet = Google.new('ptu6bbahNZpY0N0RrxQbWdw')
13
+ spreadsheet.default_sheet = 'Sheet1'
14
+ success = false
15
+ MAXTRIES.times do
16
+ col = rand(10) + 1
17
+ row = rand(10) + 1
18
+ if spreadsheet.empty?(row, col)
19
+ if my_message.empty?
20
+ text = Time.now.to_s + ' ' + "Greetings from #{my_name} (#{my_location})"
21
+ else
22
+ text = Time.now.to_s + ' ' + "#{my_message} from #{my_name} (#{my_location})"
23
+ end
24
+ spreadsheet.set_value(row, col, text)
25
+ puts "message written to row #{row}, column #{col}"
26
+ success = true
27
+ break
28
+ end
29
+ puts "Row #{row}, column #{col} already occupied, trying again..."
30
+ end
31
+ puts "no empty cell found within #{MAXTRIES} tries" unless success
data/lib/roo.rb ADDED
@@ -0,0 +1,28 @@
1
+ module Roo
2
+ autoload :Spreadsheet, 'roo/spreadsheet'
3
+ autoload :Base, 'roo/base'
4
+
5
+ autoload :OpenOffice, 'roo/open_office'
6
+ autoload :LibreOffice, 'roo/libre_office'
7
+ autoload :Excelx, 'roo/excelx'
8
+ autoload :CSV, 'roo/csv'
9
+
10
+ CLASS_FOR_EXTENSION = {
11
+ ods: Roo::OpenOffice,
12
+ xlsx: Roo::Excelx,
13
+ csv: Roo::CSV
14
+ }
15
+
16
+ def self.const_missing(const_name)
17
+ case const_name
18
+ when :Excel
19
+ raise "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel."
20
+ when :Excel2003XML
21
+ raise "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML."
22
+ when :Google
23
+ raise "Google support has been extracted to roo-google. Install roo-google to use Roo::Google."
24
+ else
25
+ super
26
+ end
27
+ end
28
+ end
data/lib/roo/base.rb ADDED
@@ -0,0 +1,717 @@
1
+ # encoding: utf-8
2
+
3
+ require 'tmpdir'
4
+ require 'stringio'
5
+ require 'nokogiri'
6
+ require 'roo/utils'
7
+
8
+ # Base class for all other types of spreadsheets
9
+ class Roo::Base
10
+ include Enumerable
11
+
12
+ TEMP_PREFIX = 'roo_'
13
+ MAX_ROW_COL = 999_999.freeze
14
+ MIN_ROW_COL = 0.freeze
15
+
16
+ attr_reader :headers
17
+
18
+ # sets the line with attribute names (default: 1)
19
+ attr_accessor :header_line
20
+
21
+ def initialize(filename, options = {}, _file_warning = :error, _tmpdir = nil)
22
+ @filename = filename
23
+ @options = options
24
+
25
+ @cell = {}
26
+ @cell_type = {}
27
+ @cells_read = {}
28
+
29
+ @first_row = {}
30
+ @last_row = {}
31
+ @first_column = {}
32
+ @last_column = {}
33
+
34
+ @header_line = 1
35
+ end
36
+
37
+ def default_sheet
38
+ @default_sheet ||= sheets.first
39
+ end
40
+
41
+ # sets the working sheet in the document
42
+ # 'sheet' can be a number (1 = first sheet) or the name of a sheet.
43
+ def default_sheet=(sheet)
44
+ validate_sheet!(sheet)
45
+ @default_sheet = sheet
46
+ @first_row[sheet] = @last_row[sheet] = @first_column[sheet] = @last_column[sheet] = nil
47
+ @cells_read[sheet] = false
48
+ end
49
+
50
+ # first non-empty column as a letter
51
+ def first_column_as_letter(sheet = default_sheet)
52
+ ::Roo::Utils.number_to_letter(first_column(sheet))
53
+ end
54
+
55
+ # last non-empty column as a letter
56
+ def last_column_as_letter(sheet = default_sheet)
57
+ ::Roo::Utils.number_to_letter(last_column(sheet))
58
+ end
59
+
60
+ # Set first/last row/column for sheet
61
+ def first_last_row_col_for_sheet(sheet)
62
+ @first_last_row_cols ||= {}
63
+ @first_last_row_cols[sheet] ||= begin
64
+ result = collect_last_row_col_for_sheet(sheet)
65
+ {
66
+ first_row: result[:first_row] == MAX_ROW_COL ? nil : result[:first_row],
67
+ first_column: result[:first_column] == MAX_ROW_COL ? nil : result[:first_column],
68
+ last_row: result[:last_row] == MIN_ROW_COL ? nil : result[:last_row],
69
+ last_column: result[:last_column] == MIN_ROW_COL ? nil : result[:last_column]
70
+ }
71
+ end
72
+ end
73
+
74
+ # Collect first/last row/column from sheet
75
+ def collect_last_row_col_for_sheet(sheet)
76
+ first_row = first_column = MAX_ROW_COL
77
+ last_row = last_column = MIN_ROW_COL
78
+ @cell[sheet].each_pair do|key, value|
79
+ next unless value
80
+ first_row = [first_row, key.first.to_i].min
81
+ last_row = [last_row, key.first.to_i].max
82
+ first_column = [first_column, key.last.to_i].min
83
+ last_column = [last_column, key.last.to_i].max
84
+ end if @cell[sheet]
85
+ {first_row: first_row, first_column: first_column, last_row: last_row, last_column: last_column}
86
+ end
87
+
88
+ %w(first_row last_row first_column last_column).each do |key|
89
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
90
+ def #{key}(sheet = default_sheet) # def first_row(sheet = default_sheet)
91
+ read_cells(sheet) # read_cells(sheet)
92
+ @#{key}[sheet] ||= first_last_row_col_for_sheet(sheet)[:#{key}] # @first_row[sheet] ||= first_last_row_col_for_sheet(sheet)[:first_row]
93
+ end # end
94
+ EOS
95
+ end
96
+
97
+ # returns a rectangular area (default: all cells) as yaml-output
98
+ # you can add additional attributes with the prefix parameter like:
99
+ # oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"})
100
+ def to_yaml(prefix = {}, from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
101
+ return '' unless first_row # empty result if there is no first_row in a sheet
102
+
103
+ from_row ||= first_row(sheet)
104
+ to_row ||= last_row(sheet)
105
+ from_column ||= first_column(sheet)
106
+ to_column ||= last_column(sheet)
107
+
108
+ result = "--- \n"
109
+ from_row.upto(to_row) do |row|
110
+ from_column.upto(to_column) do |col|
111
+ unless empty?(row, col, sheet)
112
+ result << "cell_#{row}_#{col}: \n"
113
+ prefix.each do|k, v|
114
+ result << " #{k}: #{v} \n"
115
+ end
116
+ result << " row: #{row} \n"
117
+ result << " col: #{col} \n"
118
+ result << " celltype: #{celltype(row, col, sheet)} \n"
119
+ value = cell(row, col, sheet)
120
+ if celltype(row, col, sheet) == :time
121
+ value = integer_to_timestring(value)
122
+ end
123
+ result << " value: #{value} \n"
124
+ end
125
+ end
126
+ end
127
+ result
128
+ end
129
+
130
+ # write the current spreadsheet to stdout or into a file
131
+ def to_csv(filename = nil, separator = ',', sheet = default_sheet)
132
+ if filename
133
+ File.open(filename, 'w') do |file|
134
+ write_csv_content(file, sheet, separator)
135
+ end
136
+ true
137
+ else
138
+ sio = ::StringIO.new
139
+ write_csv_content(sio, sheet, separator)
140
+ sio.rewind
141
+ sio.read
142
+ end
143
+ end
144
+
145
+ # returns a matrix object from the whole sheet or a rectangular area of a sheet
146
+ def to_matrix(from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
147
+ require 'matrix'
148
+
149
+ return Matrix.empty unless first_row
150
+
151
+ from_row ||= first_row(sheet)
152
+ to_row ||= last_row(sheet)
153
+ from_column ||= first_column(sheet)
154
+ to_column ||= last_column(sheet)
155
+
156
+ Matrix.rows(from_row.upto(to_row).map do |row|
157
+ from_column.upto(to_column).map do |col|
158
+ cell(row, col, sheet)
159
+ end
160
+ end)
161
+ end
162
+
163
+ # call to_s method defined on subclasses
164
+ def inspect
165
+ to_s
166
+ end
167
+
168
+ # find a row either by row number or a condition
169
+ # Caution: this works only within the default sheet -> set default_sheet before you call this method
170
+ # (experimental. see examples in the test_roo.rb file)
171
+ def find(*args) # :nodoc
172
+ options = (args.last.is_a?(Hash) ? args.pop : {})
173
+
174
+ case args[0]
175
+ when Fixnum
176
+ find_by_row(args[0])
177
+ when :all
178
+ find_by_conditions(options)
179
+ else
180
+ fail ArgumentError, "unexpected arg #{args[0].inspect}, pass a row index or :all"
181
+ end
182
+ end
183
+
184
+ # returns all values in this row as an array
185
+ # row numbers are 1,2,3,... like in the spreadsheet
186
+ def row(row_number, sheet = default_sheet)
187
+ read_cells(sheet)
188
+ first_column(sheet).upto(last_column(sheet)).map do |col|
189
+ cell(row_number, col, sheet)
190
+ end
191
+ end
192
+
193
+ # returns all values in this column as an array
194
+ # column numbers are 1,2,3,... like in the spreadsheet
195
+ def column(column_number, sheet = default_sheet)
196
+ if column_number.is_a?(::String)
197
+ column_number = ::Roo::Utils.letter_to_number(column_number)
198
+ end
199
+ read_cells(sheet)
200
+ first_row(sheet).upto(last_row(sheet)).map do |row|
201
+ cell(row, column_number, sheet)
202
+ end
203
+ end
204
+
205
+ # set a cell to a certain value
206
+ # (this will not be saved back to the spreadsheet file!)
207
+ def set(row, col, value, sheet = default_sheet) #:nodoc:
208
+ read_cells(sheet)
209
+ row, col = normalize(row, col)
210
+ cell_type = cell_type_by_value(value)
211
+ set_value(row, col, value, sheet)
212
+ set_type(row, col, cell_type , sheet)
213
+ end
214
+
215
+ def cell_type_by_value(value)
216
+ case value
217
+ when Fixnum then :float
218
+ when String, Float then :string
219
+ else
220
+ raise ArgumentError, "Type for #{value} not set"
221
+ end
222
+ end
223
+
224
+ # reopens and read a spreadsheet document
225
+ def reload
226
+ ds = default_sheet
227
+ reinitialize
228
+ self.default_sheet = ds
229
+ end
230
+
231
+ # true if cell is empty
232
+ def empty?(row, col, sheet = default_sheet)
233
+ read_cells(sheet)
234
+ row, col = normalize(row, col)
235
+ contents = cell(row, col, sheet)
236
+ !contents || (celltype(row, col, sheet) == :string && contents.empty?) \
237
+ || (row < first_row(sheet) || row > last_row(sheet) || col < first_column(sheet) || col > last_column(sheet))
238
+ end
239
+
240
+ # returns information of the spreadsheet document and all sheets within
241
+ # this document.
242
+ def info
243
+ without_changing_default_sheet do
244
+ result = "File: #{File.basename(@filename)}\n"\
245
+ "Number of sheets: #{sheets.size}\n"\
246
+ "Sheets: #{sheets.join(', ')}\n"
247
+ n = 1
248
+ sheets.each do|sheet|
249
+ self.default_sheet = sheet
250
+ result << 'Sheet ' + n.to_s + ":\n"
251
+ unless first_row
252
+ result << ' - empty -'
253
+ else
254
+ result << " First row: #{first_row}\n"
255
+ result << " Last row: #{last_row}\n"
256
+ result << " First column: #{::Roo::Utils.number_to_letter(first_column)}\n"
257
+ result << " Last column: #{::Roo::Utils.number_to_letter(last_column)}"
258
+ end
259
+ result << "\n" if sheet != sheets.last
260
+ n += 1
261
+ end
262
+ result
263
+ end
264
+ end
265
+
266
+ # returns an XML representation of all sheets of a spreadsheet file
267
+ def to_xml
268
+ Nokogiri::XML::Builder.new do |xml|
269
+ xml.spreadsheet do
270
+ sheets.each do |sheet|
271
+ self.default_sheet = sheet
272
+ xml.sheet(name: sheet) do |x|
273
+ if first_row && last_row && first_column && last_column
274
+ # sonst gibt es Fehler bei leeren Blaettern
275
+ first_row.upto(last_row) do |row|
276
+ first_column.upto(last_column) do |col|
277
+ unless empty?(row, col)
278
+ x.cell(cell(row, col),
279
+ row: row,
280
+ column: col,
281
+ type: celltype(row, col))
282
+ end
283
+ end
284
+ end
285
+ end
286
+ end
287
+ end
288
+ end
289
+ end.to_xml
290
+ end
291
+
292
+ # when a method like spreadsheet.a42 is called
293
+ # convert it to a call of spreadsheet.cell('a',42)
294
+ def method_missing(m, *args)
295
+ # #aa42 => #cell('aa',42)
296
+ # #aa42('Sheet1') => #cell('aa',42,'Sheet1')
297
+ if m =~ /^([a-z]+)(\d)$/
298
+ col = ::Roo::Utils.letter_to_number(Regexp.last_match[1])
299
+ row = Regexp.last_match[2].to_i
300
+ if args.empty?
301
+ cell(row, col)
302
+ else
303
+ cell(row, col, args.first)
304
+ end
305
+ else
306
+ super
307
+ end
308
+ end
309
+
310
+ # access different worksheets by calling spreadsheet.sheet(1)
311
+ # or spreadsheet.sheet('SHEETNAME')
312
+ def sheet(index, name = false)
313
+ self.default_sheet = String === index ? index : sheets[index]
314
+ name ? [default_sheet, self] : self
315
+ end
316
+
317
+ # iterate through all worksheets of a document
318
+ def each_with_pagename
319
+ sheets.each do |s|
320
+ yield sheet(s, true)
321
+ end
322
+ end
323
+
324
+
325
+ def clean_sheet_if_need(options)
326
+ return unless options[:clean]
327
+ options.delete(:clean)
328
+ @cleaned ||= {}
329
+ clean_sheet(default_sheet) unless @cleaned[default_sheet]
330
+ end
331
+
332
+ def search_or_set_header(options)
333
+ if options[:header_search]
334
+ @headers = nil
335
+ @header_line = row_with(options[:header_search])
336
+ elsif [:first_row, true].include?(options[:headers])
337
+ @headers = []
338
+ row(first_row).each_with_index { |x, i| @headers << [x, i + 1] }
339
+ else
340
+ set_headers(options)
341
+ end
342
+ end
343
+
344
+ # by passing in headers as options, this method returns
345
+ # specific columns from your header assignment
346
+ # for example:
347
+ # xls.sheet('New Prices').parse(:upc => 'UPC', :price => 'Price') would return:
348
+ # [{:upc => 123456789012, :price => 35.42},..]
349
+
350
+ # the queries are matched with regex, so regex options can be passed in
351
+ # such as :price => '^(Cost|Price)'
352
+ # case insensitive by default
353
+
354
+ # by using the :header_search option, you can query for headers
355
+ # and return a hash of every row with the keys set to the header result
356
+ # for example:
357
+ # xls.sheet('New Prices').parse(:header_search => ['UPC*SKU','^Price*\sCost\s'])
358
+
359
+ # that example searches for a column titled either UPC or SKU and another
360
+ # column titled either Price or Cost (regex characters allowed)
361
+ # * is the wildcard character
362
+
363
+ # you can also pass in a :clean => true option to strip the sheet of
364
+ # odd unicode characters and white spaces around columns
365
+
366
+ def each(options = {})
367
+ if options.empty?
368
+ 1.upto(last_row) do |line|
369
+ yield row(line)
370
+ end
371
+ else
372
+ clean_sheet_if_need(options)
373
+ search_or_set_header(options)
374
+ headers = @headers ||
375
+ Hash[(first_column..last_column).map do |col|
376
+ [cell(@header_line, col), col]
377
+ end]
378
+
379
+ @header_line.upto(last_row) do |line|
380
+ yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
381
+ end
382
+ end
383
+ end
384
+
385
+ def parse(options = {})
386
+ ary = []
387
+ each(options) do |row|
388
+ yield(row) if block_given?
389
+ ary << row
390
+ end
391
+ ary
392
+ end
393
+
394
+ def row_with(query, return_headers = false)
395
+ line_no = 0
396
+ each do |row|
397
+ line_no += 1
398
+ headers = query.map { |q| row.grep(q)[0] }.compact
399
+
400
+ if headers.length == query.length
401
+ @header_line = line_no
402
+ return return_headers ? headers : line_no
403
+ elsif line_no > 100
404
+ fail "Couldn't find header row."
405
+ end
406
+ end
407
+ fail "Couldn't find header row."
408
+ end
409
+
410
+ protected
411
+
412
+ def file_type_check(filename, ext, name, warning_level, packed = nil)
413
+ if packed == :zip
414
+ # lalala.ods.zip => lalala.ods
415
+ # hier wird KEIN unzip gemacht, sondern nur der Name der Datei
416
+ # getestet, falls es eine gepackte Datei ist.
417
+ filename = File.basename(filename, File.extname(filename))
418
+ end
419
+
420
+ if uri?(filename) && qs_begin = filename.rindex('?')
421
+ filename = filename[0..qs_begin - 1]
422
+ end
423
+ if File.extname(filename).downcase != ext
424
+ case warning_level
425
+ when :error
426
+ warn file_type_warning_message(filename, ext)
427
+ fail TypeError, "#{filename} is not #{name} file"
428
+ when :warning
429
+ warn "are you sure, this is #{name} spreadsheet file?"
430
+ warn file_type_warning_message(filename, ext)
431
+ when :ignore
432
+ # ignore
433
+ else
434
+ fail "#{warning_level} illegal state of file_warning"
435
+ end
436
+ end
437
+ end
438
+
439
+ # konvertiert einen Key in der Form "12,45" (=row,column) in
440
+ # ein Array mit numerischen Werten ([12,45])
441
+ # Diese Methode ist eine temp. Loesung, um zu erforschen, ob der
442
+ # Zugriff mit numerischen Keys schneller ist.
443
+ def key_to_num(str)
444
+ r, c = str.split(',')
445
+ [r.to_i, c.to_i]
446
+ end
447
+
448
+ # see: key_to_num
449
+ def key_to_string(arr)
450
+ "#{arr[0]},#{arr[1]}"
451
+ end
452
+
453
+ private
454
+
455
+ def local_filename(filename, tmpdir, packed)
456
+ filename = download_uri(filename, tmpdir) if uri?(filename)
457
+ filename = unzip(filename, tmpdir) if packed == :zip
458
+ unless File.file?(filename)
459
+ fail IOError, "file #{filename} does not exist"
460
+ end
461
+ filename
462
+ end
463
+
464
+ def file_type_warning_message(filename, ext)
465
+ "use #{Roo::CLASS_FOR_EXTENSION.fetch(ext.sub('.', '').to_sym)}.new to handle #{ext} spreadsheet files. This has #{File.extname(filename).downcase}"
466
+ rescue KeyError
467
+ raise "unknown file type: #{ext}"
468
+ end
469
+
470
+ def find_by_row(row_index)
471
+ row_index += (header_line - 1) if @header_line
472
+
473
+ row(row_index).size.times.map do |cell_index|
474
+ cell(row_index, cell_index + 1)
475
+ end
476
+ end
477
+
478
+ def find_by_conditions(options)
479
+ rows = first_row.upto(last_row)
480
+ header_for = Hash[1.upto(last_column).map do |col|
481
+ [col, cell(@header_line, col)]
482
+ end]
483
+
484
+ # are all conditions met?
485
+ conditions = options[:conditions]
486
+ if conditions && !conditions.empty?
487
+ column_with = header_for.invert
488
+ rows = rows.select do |i|
489
+ conditions.all? { |key, val| cell(i, column_with[key]) == val }
490
+ end
491
+ end
492
+
493
+ if options[:array]
494
+ rows.map { |i| row(i) }
495
+ else
496
+ rows.map do |i|
497
+ Hash[1.upto(row(i).size).map do |j|
498
+ [header_for.fetch(j), cell(i, j)]
499
+ end]
500
+ end
501
+ end
502
+ end
503
+
504
+ def without_changing_default_sheet
505
+ original_default_sheet = default_sheet
506
+ yield
507
+ ensure
508
+ self.default_sheet = original_default_sheet
509
+ end
510
+
511
+ def reinitialize
512
+ initialize(@filename)
513
+ end
514
+
515
+ def make_tmpdir(prefix = nil, root = nil, &block)
516
+ prefix = if prefix
517
+ TEMP_PREFIX + prefix
518
+ else
519
+ TEMP_PREFIX
520
+ end
521
+ Dir.mktmpdir(prefix, root || ENV['ROO_TMP'], &block)
522
+ end
523
+
524
+ def clean_sheet(sheet)
525
+ read_cells(sheet)
526
+ @cell[sheet].each_pair do |coord, value|
527
+ if value.is_a?(::String)
528
+ @cell[sheet][coord] = sanitize_value(value)
529
+ end
530
+ end
531
+ @cleaned[sheet] = true
532
+ end
533
+
534
+ def sanitize_value(v)
535
+ v.unpack('U*').select { |b| b < 127 }.pack('U*').strip
536
+ end
537
+
538
+ def set_headers(hash = {})
539
+ # try to find header row with all values or give an error
540
+ # then create new hash by indexing strings and keeping integers for header array
541
+ @headers = row_with(hash.values, true)
542
+ @headers = Hash[hash.keys.zip(@headers.map { |x| header_index(x) })]
543
+ end
544
+
545
+ def header_index(query)
546
+ row(@header_line).index(query) + first_column
547
+ end
548
+
549
+ def set_value(row, col, value, sheet = default_sheet)
550
+ @cell[sheet][[row, col]] = value
551
+ end
552
+
553
+ def set_type(row, col, type, sheet = default_sheet)
554
+ @cell_type[sheet][[row, col]] = type
555
+ end
556
+
557
+ # converts cell coordinate to numeric values of row,col
558
+ def normalize(row, col)
559
+ if row.is_a?(::String)
560
+ if col.is_a?(::Fixnum)
561
+ # ('A',1):
562
+ # ('B', 5) -> (5, 2)
563
+ row, col = col, row
564
+ else
565
+ fail ArgumentError
566
+ end
567
+ end
568
+ if col.is_a?(::String)
569
+ col = ::Roo::Utils.letter_to_number(col)
570
+ end
571
+ [row, col]
572
+ end
573
+
574
+ def uri?(filename)
575
+ filename.start_with?('http://', 'https://')
576
+ rescue
577
+ false
578
+ end
579
+
580
+ def download_uri(uri, tmpdir)
581
+ require 'open-uri'
582
+ tempfilename = File.join(tmpdir, File.basename(uri))
583
+ begin
584
+ File.open(tempfilename, 'wb') do |file|
585
+ open(uri, 'User-Agent' => "Ruby/#{RUBY_VERSION}") do |net|
586
+ file.write(net.read)
587
+ end
588
+ end
589
+ rescue OpenURI::HTTPError
590
+ raise "could not open #{uri}"
591
+ end
592
+ tempfilename
593
+ end
594
+
595
+ def open_from_stream(stream, tmpdir)
596
+ tempfilename = File.join(tmpdir, 'spreadsheet')
597
+ File.open(tempfilename, 'wb') do |file|
598
+ file.write(stream[7..-1])
599
+ end
600
+ File.join(tmpdir, 'spreadsheet')
601
+ end
602
+
603
+ def unzip(filename, tmpdir)
604
+ require 'zip/filesystem'
605
+
606
+ Zip::File.open(filename) do |zip|
607
+ process_zipfile_packed(zip, tmpdir)
608
+ end
609
+ end
610
+
611
+ # check if default_sheet was set and exists in sheets-array
612
+ def validate_sheet!(sheet)
613
+ case sheet
614
+ when nil
615
+ fail ArgumentError, "Error: sheet 'nil' not valid"
616
+ when Fixnum
617
+ sheets.fetch(sheet - 1) do
618
+ fail RangeError, "sheet index #{sheet} not found"
619
+ end
620
+ when String
621
+ unless sheets.include? sheet
622
+ fail RangeError, "sheet '#{sheet}' not found"
623
+ end
624
+ else
625
+ fail TypeError, "not a valid sheet type: #{sheet.inspect}"
626
+ end
627
+ end
628
+
629
+ def process_zipfile_packed(zip, tmpdir, path = '')
630
+ if zip.file.file? path
631
+ # extract and return filename
632
+ File.open(File.join(tmpdir, path), 'wb') do |file|
633
+ file.write(zip.read(path))
634
+ end
635
+ File.join(tmpdir, path)
636
+ else
637
+ ret = nil
638
+ path += '/' unless path.empty?
639
+ zip.dir.foreach(path) do |filename|
640
+ ret = process_zipfile_packed(zip, tmpdir, path + filename)
641
+ end
642
+ ret
643
+ end
644
+ end
645
+
646
+ # Write all cells to the csv file. File can be a filename or nil. If the this
647
+ # parameter is nil the output goes to STDOUT
648
+ def write_csv_content(file = nil, sheet = nil, separator = ',')
649
+ file ||= STDOUT
650
+ if first_row(sheet) # sheet is not empty
651
+ 1.upto(last_row(sheet)) do |row|
652
+ 1.upto(last_column(sheet)) do |col|
653
+ file.print(separator) if col > 1
654
+ file.print cell_to_csv(row, col, sheet)
655
+ end
656
+ file.print("\n")
657
+ end # sheet not empty
658
+ end
659
+ end
660
+
661
+ # The content of a cell in the csv output
662
+ def cell_to_csv(row, col, sheet)
663
+ if empty?(row, col, sheet)
664
+ ''
665
+ else
666
+ onecell = cell(row, col, sheet)
667
+
668
+ case celltype(row, col, sheet)
669
+ when :string
670
+ %("#{onecell.tr('"', '""')}") unless onecell.empty?
671
+ when :boolean
672
+ %("#{onecell.tr('"', '""').downcase}")
673
+ when :float, :percentage
674
+ if onecell == onecell.to_i
675
+ onecell.to_i.to_s
676
+ else
677
+ onecell.to_s
678
+ end
679
+ when :formula
680
+ case onecell
681
+ when String
682
+ %("#{onecell.tr('"', '""')}") unless onecell.empty?
683
+ when Float
684
+ if onecell == onecell.to_i
685
+ onecell.to_i.to_s
686
+ else
687
+ onecell.to_s
688
+ end
689
+ when DateTime
690
+ onecell.to_s
691
+ else
692
+ fail "unhandled onecell-class #{onecell.class}"
693
+ end
694
+ when :date, :datetime
695
+ onecell.to_s
696
+ when :time
697
+ integer_to_timestring(onecell)
698
+ when :link
699
+ %("#{onecell.url.tr('"', '""')}")
700
+ else
701
+ fail "unhandled celltype #{celltype(row, col, sheet)}"
702
+ end || ''
703
+ end
704
+ end
705
+
706
+ private
707
+
708
+ # converts an integer value to a time string like '02:05:06'
709
+ def integer_to_timestring(content)
710
+ h = (content / 3600.0).floor
711
+ content = content - h * 3600
712
+ m = (content / 60.0).floor
713
+ content = content - m * 60
714
+ s = content
715
+ sprintf('%02d:%02d:%02d', h, m, s)
716
+ end
717
+ end