roo-andyw8 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG +438 -0
  6. data/Gemfile +24 -0
  7. data/Guardfile +24 -0
  8. data/LICENSE +22 -0
  9. data/README.md +121 -0
  10. data/Rakefile +23 -0
  11. data/examples/roo_soap_client.rb +50 -0
  12. data/examples/roo_soap_server.rb +26 -0
  13. data/examples/write_me.rb +31 -0
  14. data/lib/roo.rb +28 -0
  15. data/lib/roo/base.rb +717 -0
  16. data/lib/roo/csv.rb +110 -0
  17. data/lib/roo/excelx.rb +540 -0
  18. data/lib/roo/excelx/comments.rb +23 -0
  19. data/lib/roo/excelx/extractor.rb +20 -0
  20. data/lib/roo/excelx/relationships.rb +26 -0
  21. data/lib/roo/excelx/shared_strings.rb +40 -0
  22. data/lib/roo/excelx/sheet_doc.rb +175 -0
  23. data/lib/roo/excelx/styles.rb +62 -0
  24. data/lib/roo/excelx/workbook.rb +59 -0
  25. data/lib/roo/font.rb +17 -0
  26. data/lib/roo/libre_office.rb +5 -0
  27. data/lib/roo/link.rb +15 -0
  28. data/lib/roo/open_office.rb +652 -0
  29. data/lib/roo/spreadsheet.rb +31 -0
  30. data/lib/roo/utils.rb +81 -0
  31. data/lib/roo/version.rb +3 -0
  32. data/roo.gemspec +27 -0
  33. data/scripts/txt2html +67 -0
  34. data/spec/fixtures/vcr_cassettes/google_drive.yml +165 -0
  35. data/spec/fixtures/vcr_cassettes/google_drive_access_token.yml +73 -0
  36. data/spec/fixtures/vcr_cassettes/google_drive_set.yml +857 -0
  37. data/spec/lib/roo/base_spec.rb +4 -0
  38. data/spec/lib/roo/csv_spec.rb +48 -0
  39. data/spec/lib/roo/excelx/format_spec.rb +51 -0
  40. data/spec/lib/roo/excelx_spec.rb +363 -0
  41. data/spec/lib/roo/libreoffice_spec.rb +13 -0
  42. data/spec/lib/roo/openoffice_spec.rb +15 -0
  43. data/spec/lib/roo/spreadsheet_spec.rb +88 -0
  44. data/spec/lib/roo/utils_spec.rb +105 -0
  45. data/spec/spec_helper.rb +9 -0
  46. data/test/all_ss.rb +11 -0
  47. data/test/files/1900_base.xlsx +0 -0
  48. data/test/files/1904_base.xlsx +0 -0
  49. data/test/files/Bibelbund.csv +3741 -0
  50. data/test/files/Bibelbund.ods +0 -0
  51. data/test/files/Bibelbund.xlsx +0 -0
  52. data/test/files/Bibelbund1.ods +0 -0
  53. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  54. data/test/files/advanced_header.ods +0 -0
  55. data/test/files/bbu.ods +0 -0
  56. data/test/files/bbu.xlsx +0 -0
  57. data/test/files/bode-v1.ods.zip +0 -0
  58. data/test/files/bode-v1.xls.zip +0 -0
  59. data/test/files/boolean.csv +2 -0
  60. data/test/files/boolean.ods +0 -0
  61. data/test/files/boolean.xlsx +0 -0
  62. data/test/files/borders.ods +0 -0
  63. data/test/files/borders.xlsx +0 -0
  64. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  65. data/test/files/comments.ods +0 -0
  66. data/test/files/comments.xlsx +0 -0
  67. data/test/files/csvtypes.csv +1 -0
  68. data/test/files/datetime.ods +0 -0
  69. data/test/files/datetime.xlsx +0 -0
  70. data/test/files/dreimalvier.ods +0 -0
  71. data/test/files/emptysheets.ods +0 -0
  72. data/test/files/emptysheets.xlsx +0 -0
  73. data/test/files/encrypted-letmein.ods +0 -0
  74. data/test/files/file_item_error.xlsx +0 -0
  75. data/test/files/formula.ods +0 -0
  76. data/test/files/formula.xlsx +0 -0
  77. data/test/files/formula_string_error.xlsx +0 -0
  78. data/test/files/html-escape.ods +0 -0
  79. data/test/files/link.csv +1 -0
  80. data/test/files/link.xlsx +0 -0
  81. data/test/files/matrix.ods +0 -0
  82. data/test/files/named_cells.ods +0 -0
  83. data/test/files/named_cells.xlsx +0 -0
  84. data/test/files/no_spreadsheet_file.txt +1 -0
  85. data/test/files/numbers-export.xlsx +0 -0
  86. data/test/files/numbers1.csv +18 -0
  87. data/test/files/numbers1.ods +0 -0
  88. data/test/files/numbers1.xlsx +0 -0
  89. data/test/files/numbers1withnull.xlsx +0 -0
  90. data/test/files/numeric-link.xlsx +0 -0
  91. data/test/files/only_one_sheet.ods +0 -0
  92. data/test/files/only_one_sheet.xlsx +0 -0
  93. data/test/files/paragraph.ods +0 -0
  94. data/test/files/paragraph.xlsx +0 -0
  95. data/test/files/ric.ods +0 -0
  96. data/test/files/sheet1.xml +109 -0
  97. data/test/files/simple_spreadsheet.ods +0 -0
  98. data/test/files/simple_spreadsheet.xlsx +0 -0
  99. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  100. data/test/files/so_datetime.csv +8 -0
  101. data/test/files/style.ods +0 -0
  102. data/test/files/style.xlsx +0 -0
  103. data/test/files/time-test.csv +2 -0
  104. data/test/files/time-test.ods +0 -0
  105. data/test/files/time-test.xlsx +0 -0
  106. data/test/files/type_excel.ods +0 -0
  107. data/test/files/type_excel.xlsx +0 -0
  108. data/test/files/type_excelx.ods +0 -0
  109. data/test/files/type_openoffice.xlsx +0 -0
  110. data/test/files/whitespace.ods +0 -0
  111. data/test/files/whitespace.xlsx +0 -0
  112. data/test/test_generic_spreadsheet.rb +211 -0
  113. data/test/test_helper.rb +58 -0
  114. data/test/test_roo.rb +1977 -0
  115. metadata +318 -0
data/Gemfile ADDED
@@ -0,0 +1,24 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :test do
6
+ # additional testing libs
7
+ gem 'webmock'
8
+ gem 'shoulda'
9
+ gem 'rspec', '>= 3.0.0'
10
+ gem 'vcr'
11
+ gem 'simplecov', '>= 0.9.0', require: false
12
+ gem 'coveralls', require: false
13
+ end
14
+
15
+ group :local_development do
16
+ gem 'terminal-notifier-guard', require: false if RUBY_PLATFORM.downcase.include?('darwin')
17
+ gem 'guard-rspec', '>= 4.3.1', require: false
18
+ gem 'guard-minitest', require: false
19
+ gem 'guard-bundler', require: false
20
+ gem 'guard-preek', require: false
21
+ gem 'guard-rubocop', require: false
22
+ gem 'guard-reek', github: 'pericles/guard-reek', require: false
23
+ gem 'pry'
24
+ end
@@ -0,0 +1,24 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :minitest, test_folders: ['test'] do
5
+ watch(%r{^test/(.*)\/?test_(.*)\.rb$})
6
+ watch(%r{^lib/(.*/)?([^/]+)\.rb$}) { |m| "test/#{m[1]}test_#{m[2]}.rb" }
7
+ watch(%r{^test/test_helper\.rb$}) { 'test' }
8
+ end
9
+
10
+ # Note: The cmd option is now required due to the increasing number of ways
11
+ # rspec may be run, below are examples of the most common uses.
12
+ # * bundler: 'bundle exec rspec'
13
+ # * bundler binstubs: 'bin/rspec'
14
+ # * spring: 'bin/rsspec' (This will use spring if running and you have
15
+ # installed the spring binstubs per the docs)
16
+ # * zeus: 'zeus rspec' (requires the server to be started separetly)
17
+ # * 'just' rspec: 'rspec'
18
+ guard :rspec, cmd: 'bundle exec rspec' do
19
+ watch(%r{^spec/.+_spec\.rb$})
20
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
21
+ watch('spec/spec_helper.rb') { "spec" }
22
+ watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
23
+ end
24
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2008-2014 Thomas Preymesser, Ben Woosley
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,121 @@
1
+ # Roo [![Build Status](https://travis-ci.org/roo-rb/roo.svg)](https://travis-ci.org/roo-rb/roo)[![Code Climate](https://codeclimate.com/github/roo-rb/roo/badges/gpa.svg)](https://codeclimate.com/github/roo-rb/roo)[![Coverage Status](https://coveralls.io/repos/roo-rb/roo/badge.png)](https://coveralls.io/r/roo-rb/roo)
2
+
3
+ Roo implements read access for all spreadsheet types and read/write access for
4
+ Google spreadsheets. It can handle
5
+ * Excelx
6
+ * OpenOffice / LibreOffice
7
+ * CSV
8
+
9
+ ## Additional libraries
10
+
11
+ In addition, the roo-xls and roo-google gems exist to Google Spreadsheet add classic Excel
12
+ handling capabilities to roo.
13
+
14
+ ## Usage:
15
+
16
+ ```ruby
17
+ require 'roo'
18
+
19
+ s = Roo::OpenOffice.new("myspreadsheet.ods") # loads an OpenOffice Spreadsheet
20
+ s = Roo::OpenOffice.new("myspreadsheet.ods", :password => "password") # loads an encrypted OpenOffice Spreadsheet
21
+ s = Roo::Excelx.new("myspreadsheet.xlsx") # loads an Excel Spreadsheet for Excel .xlsx files
22
+ s = Roo::CSV.new("mycsv.csv") # loads a CSV file
23
+
24
+ # You can use CSV to load TSV files, or files of a certain encoding by passing
25
+ # in options under the :csv_options key
26
+ s = Roo::CSV.new("mytsv.tsv", csv_options: {col_sep: "\t"}) # TSV
27
+ s = Roo::CSV.new("mycsv.csv", csv_options: {encoding: Encoding::ISO_8859_1}) # csv with explicit encoding
28
+
29
+ s.default_sheet = s.sheets.first # first sheet in the spreadsheet file will be used
30
+
31
+ # s.sheets is an array which holds the names of the sheets within
32
+ # a spreadsheet.
33
+ # you can also write
34
+ # s.default_sheet = s.sheets[3] or
35
+ # s.default_sheet = 'Sheet 3'
36
+
37
+ s.cell(1,1) # returns the content of the first row/first cell in the sheet
38
+ s.cell('A',1) # same cell
39
+ s.cell(1,'A') # same cell
40
+ s.cell(1,'A',s.sheets[0]) # same cell
41
+
42
+ # almost all methods have an optional argument 'sheet'.
43
+ # If this parameter is omitted, the default_sheet will be used.
44
+
45
+ s.info # Returns basic info about the spreadsheet file
46
+
47
+ s.first_row # the number of the first row
48
+ s.last_row # the number of the last row
49
+ s.first_column # the number of the first column
50
+ s.last_column # the number of the last column
51
+
52
+ # limited font information is available
53
+
54
+ s.font(1,1).bold?
55
+ s.font(1,1).italic?
56
+ s.font(1,1).underline?
57
+
58
+
59
+ # Roo::Spreadsheet requires spreadsheet gem
60
+ require 'spreadsheet'
61
+
62
+ # Spreadsheet.open can accept both files and paths
63
+
64
+ xls = Roo::Spreadsheet.open('./new_prices.xls')
65
+
66
+ # If the File.path or provided path string does not have an extension, you can optionally
67
+ # provide one as a string or symbol
68
+
69
+ xls = Roo::Spreadsheet.open('./rails_temp_upload', extension: :xls)
70
+
71
+ # no more setting xls.default_sheet, just use this
72
+
73
+ xls.sheet('Info').row(1)
74
+ xls.sheet(0).row(1)
75
+
76
+ # excel likes to create random "Data01" sheets for macros
77
+ # use this to find the sheet with the most data to parse
78
+
79
+ xls.longest_sheet
80
+
81
+ # this excel file has multiple worksheets, let's iterate through each of them and process
82
+
83
+ xls.each_with_pagename do |name, sheet|
84
+ p sheet.row(1)
85
+ end
86
+
87
+ # pull out a hash of exclusive column data (get rid of useless columns and save memory)
88
+
89
+ xls.each(:id => 'UPC',:qty => 'ATS') {|hash| arr << hash}
90
+ #=> hash will appear like {:upc=>727880013358, :qty => 12}
91
+
92
+ # NOTE: .parse does the same as .each, except it returns an array (similar to each vs. map)
93
+
94
+ # not sure exactly what a column will be named? try a wildcard search with a regex
95
+
96
+ xls.parse(:id => /UPC|SKU/,:qty => /ATS*\sATP\s*QTY\z/)
97
+
98
+ # if you need to locate the header row and assign the header names themselves,
99
+ # use the :header_search option
100
+
101
+ xls.parse(:header_search => [/UPC*SKU/,/ATS*\sATP\s*QTY\z/])
102
+ #=> each element will appear in this fashion:
103
+ #=> {"UPC" => 123456789012, "STYLE" => "987B0", "COLOR" => "blue", "QTY" => 78}
104
+
105
+ # want to strip out annoying unicode characters and surrounding white space?
106
+
107
+ xls.parse(:clean => true)
108
+
109
+ # another bonus feature is a patch to prevent the Spreadsheet gem from parsing
110
+ # thousands and thousands of blank lines. i got fed up after watching my computer
111
+ # nearly catch fire for 4 hours for a spreadsheet with only 200 ACTUAL lines
112
+ # - located in lib/roo/worksheet.rb
113
+
114
+ # if you want to load and stream .xlsx rows
115
+
116
+ s = Roo::Excelx.new("./test_data/test_small.xlsx")
117
+ s.each_row_streaming do |row|
118
+ puts row.inspect # Array of Excelx::Cell objects
119
+ end
120
+
121
+ ```
@@ -0,0 +1,23 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rake/testtask'
4
+ require 'rspec/core/rake_task'
5
+ require 'coveralls/rake/task'
6
+
7
+ # Test unit
8
+ Rake::TestTask.new do |t|
9
+ t.libs << 'test'
10
+ t.test_files = FileList['test/test*.rb']
11
+ t.verbose = true
12
+ end
13
+
14
+ # RSpec
15
+ RSpec::Core::RakeTask.new(:spec)
16
+
17
+ # Coveralls
18
+ Coveralls::RakeTask.new
19
+
20
+ default_task = [:test, :spec]
21
+ default_task << 'coveralls:push' if ENV['TRAVIS']
22
+
23
+ task default: default_task
@@ -0,0 +1,50 @@
1
+ require 'soap/rpc/driver'
2
+
3
+ def ferien_fuer_region(proxy, region, year = nil)
4
+ proxy.first_row.upto(proxy.last_row) do |row|
5
+ if proxy.cell(row, 2) == region
6
+ jahr = proxy.cell(row, 1).to_i
7
+ if year.nil? || jahr == year
8
+ bis_datum = proxy.cell(row, 5)
9
+ if DateTime.now > bis_datum
10
+ print '('
11
+ end
12
+ print jahr.to_s + ' '
13
+ print proxy.cell(row, 2) + ' '
14
+ print proxy.cell(row, 3) + ' '
15
+ print proxy.cell(row, 4).to_s + ' '
16
+ print bis_datum.to_s + ' '
17
+ print (proxy.cell(row, 6) || '') + ' '
18
+ if DateTime.now > bis_datum
19
+ print ')'
20
+ end
21
+ puts
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ proxy = SOAP::RPC::Driver.new('http://localhost:12321', 'spreadsheetserver')
28
+ proxy.add_method('cell', 'row', 'col')
29
+ proxy.add_method('officeversion')
30
+ proxy.add_method('last_row')
31
+ proxy.add_method('last_column')
32
+ proxy.add_method('first_row')
33
+ proxy.add_method('first_column')
34
+ proxy.add_method('sheets')
35
+ proxy.add_method('set_default_sheet', 's')
36
+ proxy.add_method('ferien_fuer_region', 'region')
37
+
38
+ sheets = proxy.sheets
39
+ proxy.set_default_sheet(sheets.first)
40
+
41
+ puts "first row: #{proxy.first_row}"
42
+ puts "first column: #{proxy.first_column}"
43
+ puts "last row: #{proxy.last_row}"
44
+ puts "last column: #{proxy.last_column}"
45
+ puts "cell: #{proxy.cell('C', 8)}"
46
+ puts "cell: #{proxy.cell('F', 12)}"
47
+ puts "officeversion: #{proxy.officeversion}"
48
+ puts 'Berlin:'
49
+
50
+ ferien_fuer_region(proxy, 'Berlin')
@@ -0,0 +1,26 @@
1
+ require 'roo'
2
+ require 'soap/rpc/standaloneServer'
3
+
4
+ NS = 'spreadsheetserver' # name of your service = namespace
5
+ class Server2 < SOAP::RPC::StandaloneServer
6
+ def on_init
7
+ spreadsheet = OpenOffice.new('./Ferien-de.ods')
8
+ add_method(spreadsheet, 'cell', 'row', 'col')
9
+ add_method(spreadsheet, 'officeversion')
10
+ add_method(spreadsheet, 'first_row')
11
+ add_method(spreadsheet, 'last_row')
12
+ add_method(spreadsheet, 'first_column')
13
+ add_method(spreadsheet, 'last_column')
14
+ add_method(spreadsheet, 'sheets')
15
+ # add_method(spreadsheet, 'default_sheet=', 's')
16
+ # method with '...=' did not work? alias method 'set_default_sheet' created
17
+ add_method(spreadsheet, 'set_default_sheet', 's')
18
+ end
19
+ end
20
+
21
+ PORT = 12_321
22
+ puts "serving at port #{PORT}"
23
+ svr = Server2.new('Roo', NS, '0.0.0.0', PORT)
24
+
25
+ trap('INT') { svr.shutdown }
26
+ svr.start
@@ -0,0 +1,31 @@
1
+ require 'roo'
2
+
3
+ #-- create a new spreadsheet within your google-spreadsheets and paste
4
+ #-- the 'key' parameter in the spreadsheet URL
5
+ MAXTRIES = 1000
6
+ print "what's your name? "
7
+ my_name = gets.chomp
8
+ print 'where do you live? '
9
+ my_location = gets.chomp
10
+ print 'your message? (if left blank, only your name and location will be inserted) '
11
+ my_message = gets.chomp
12
+ spreadsheet = Google.new('ptu6bbahNZpY0N0RrxQbWdw')
13
+ spreadsheet.default_sheet = 'Sheet1'
14
+ success = false
15
+ MAXTRIES.times do
16
+ col = rand(10) + 1
17
+ row = rand(10) + 1
18
+ if spreadsheet.empty?(row, col)
19
+ if my_message.empty?
20
+ text = Time.now.to_s + ' ' + "Greetings from #{my_name} (#{my_location})"
21
+ else
22
+ text = Time.now.to_s + ' ' + "#{my_message} from #{my_name} (#{my_location})"
23
+ end
24
+ spreadsheet.set_value(row, col, text)
25
+ puts "message written to row #{row}, column #{col}"
26
+ success = true
27
+ break
28
+ end
29
+ puts "Row #{row}, column #{col} already occupied, trying again..."
30
+ end
31
+ puts "no empty cell found within #{MAXTRIES} tries" unless success
@@ -0,0 +1,28 @@
1
+ module Roo
2
+ autoload :Spreadsheet, 'roo/spreadsheet'
3
+ autoload :Base, 'roo/base'
4
+
5
+ autoload :OpenOffice, 'roo/open_office'
6
+ autoload :LibreOffice, 'roo/libre_office'
7
+ autoload :Excelx, 'roo/excelx'
8
+ autoload :CSV, 'roo/csv'
9
+
10
+ CLASS_FOR_EXTENSION = {
11
+ ods: Roo::OpenOffice,
12
+ xlsx: Roo::Excelx,
13
+ csv: Roo::CSV
14
+ }
15
+
16
+ def self.const_missing(const_name)
17
+ case const_name
18
+ when :Excel
19
+ raise "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel."
20
+ when :Excel2003XML
21
+ raise "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML."
22
+ when :Google
23
+ raise "Google support has been extracted to roo-google. Install roo-google to use Roo::Google."
24
+ else
25
+ super
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,717 @@
1
+ # encoding: utf-8
2
+
3
+ require 'tmpdir'
4
+ require 'stringio'
5
+ require 'nokogiri'
6
+ require 'roo/utils'
7
+
8
+ # Base class for all other types of spreadsheets
9
+ class Roo::Base
10
+ include Enumerable
11
+
12
+ TEMP_PREFIX = 'roo_'
13
+ MAX_ROW_COL = 999_999.freeze
14
+ MIN_ROW_COL = 0.freeze
15
+
16
+ attr_reader :headers
17
+
18
+ # sets the line with attribute names (default: 1)
19
+ attr_accessor :header_line
20
+
21
+ def initialize(filename, options = {}, _file_warning = :error, _tmpdir = nil)
22
+ @filename = filename
23
+ @options = options
24
+
25
+ @cell = {}
26
+ @cell_type = {}
27
+ @cells_read = {}
28
+
29
+ @first_row = {}
30
+ @last_row = {}
31
+ @first_column = {}
32
+ @last_column = {}
33
+
34
+ @header_line = 1
35
+ end
36
+
37
+ def default_sheet
38
+ @default_sheet ||= sheets.first
39
+ end
40
+
41
+ # sets the working sheet in the document
42
+ # 'sheet' can be a number (1 = first sheet) or the name of a sheet.
43
+ def default_sheet=(sheet)
44
+ validate_sheet!(sheet)
45
+ @default_sheet = sheet
46
+ @first_row[sheet] = @last_row[sheet] = @first_column[sheet] = @last_column[sheet] = nil
47
+ @cells_read[sheet] = false
48
+ end
49
+
50
+ # first non-empty column as a letter
51
+ def first_column_as_letter(sheet = default_sheet)
52
+ ::Roo::Utils.number_to_letter(first_column(sheet))
53
+ end
54
+
55
+ # last non-empty column as a letter
56
+ def last_column_as_letter(sheet = default_sheet)
57
+ ::Roo::Utils.number_to_letter(last_column(sheet))
58
+ end
59
+
60
+ # Set first/last row/column for sheet
61
+ def first_last_row_col_for_sheet(sheet)
62
+ @first_last_row_cols ||= {}
63
+ @first_last_row_cols[sheet] ||= begin
64
+ result = collect_last_row_col_for_sheet(sheet)
65
+ {
66
+ first_row: result[:first_row] == MAX_ROW_COL ? nil : result[:first_row],
67
+ first_column: result[:first_column] == MAX_ROW_COL ? nil : result[:first_column],
68
+ last_row: result[:last_row] == MIN_ROW_COL ? nil : result[:last_row],
69
+ last_column: result[:last_column] == MIN_ROW_COL ? nil : result[:last_column]
70
+ }
71
+ end
72
+ end
73
+
74
+ # Collect first/last row/column from sheet
75
+ def collect_last_row_col_for_sheet(sheet)
76
+ first_row = first_column = MAX_ROW_COL
77
+ last_row = last_column = MIN_ROW_COL
78
+ @cell[sheet].each_pair do|key, value|
79
+ next unless value
80
+ first_row = [first_row, key.first.to_i].min
81
+ last_row = [last_row, key.first.to_i].max
82
+ first_column = [first_column, key.last.to_i].min
83
+ last_column = [last_column, key.last.to_i].max
84
+ end if @cell[sheet]
85
+ {first_row: first_row, first_column: first_column, last_row: last_row, last_column: last_column}
86
+ end
87
+
88
+ %w(first_row last_row first_column last_column).each do |key|
89
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
90
+ def #{key}(sheet = default_sheet) # def first_row(sheet = default_sheet)
91
+ read_cells(sheet) # read_cells(sheet)
92
+ @#{key}[sheet] ||= first_last_row_col_for_sheet(sheet)[:#{key}] # @first_row[sheet] ||= first_last_row_col_for_sheet(sheet)[:first_row]
93
+ end # end
94
+ EOS
95
+ end
96
+
97
+ # returns a rectangular area (default: all cells) as yaml-output
98
+ # you can add additional attributes with the prefix parameter like:
99
+ # oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"})
100
+ def to_yaml(prefix = {}, from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
101
+ return '' unless first_row # empty result if there is no first_row in a sheet
102
+
103
+ from_row ||= first_row(sheet)
104
+ to_row ||= last_row(sheet)
105
+ from_column ||= first_column(sheet)
106
+ to_column ||= last_column(sheet)
107
+
108
+ result = "--- \n"
109
+ from_row.upto(to_row) do |row|
110
+ from_column.upto(to_column) do |col|
111
+ unless empty?(row, col, sheet)
112
+ result << "cell_#{row}_#{col}: \n"
113
+ prefix.each do|k, v|
114
+ result << " #{k}: #{v} \n"
115
+ end
116
+ result << " row: #{row} \n"
117
+ result << " col: #{col} \n"
118
+ result << " celltype: #{celltype(row, col, sheet)} \n"
119
+ value = cell(row, col, sheet)
120
+ if celltype(row, col, sheet) == :time
121
+ value = integer_to_timestring(value)
122
+ end
123
+ result << " value: #{value} \n"
124
+ end
125
+ end
126
+ end
127
+ result
128
+ end
129
+
130
+ # write the current spreadsheet to stdout or into a file
131
+ def to_csv(filename = nil, separator = ',', sheet = default_sheet)
132
+ if filename
133
+ File.open(filename, 'w') do |file|
134
+ write_csv_content(file, sheet, separator)
135
+ end
136
+ true
137
+ else
138
+ sio = ::StringIO.new
139
+ write_csv_content(sio, sheet, separator)
140
+ sio.rewind
141
+ sio.read
142
+ end
143
+ end
144
+
145
+ # returns a matrix object from the whole sheet or a rectangular area of a sheet
146
+ def to_matrix(from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
147
+ require 'matrix'
148
+
149
+ return Matrix.empty unless first_row
150
+
151
+ from_row ||= first_row(sheet)
152
+ to_row ||= last_row(sheet)
153
+ from_column ||= first_column(sheet)
154
+ to_column ||= last_column(sheet)
155
+
156
+ Matrix.rows(from_row.upto(to_row).map do |row|
157
+ from_column.upto(to_column).map do |col|
158
+ cell(row, col, sheet)
159
+ end
160
+ end)
161
+ end
162
+
163
+ # call to_s method defined on subclasses
164
+ def inspect
165
+ to_s
166
+ end
167
+
168
+ # find a row either by row number or a condition
169
+ # Caution: this works only within the default sheet -> set default_sheet before you call this method
170
+ # (experimental. see examples in the test_roo.rb file)
171
+ def find(*args) # :nodoc
172
+ options = (args.last.is_a?(Hash) ? args.pop : {})
173
+
174
+ case args[0]
175
+ when Fixnum
176
+ find_by_row(args[0])
177
+ when :all
178
+ find_by_conditions(options)
179
+ else
180
+ fail ArgumentError, "unexpected arg #{args[0].inspect}, pass a row index or :all"
181
+ end
182
+ end
183
+
184
+ # returns all values in this row as an array
185
+ # row numbers are 1,2,3,... like in the spreadsheet
186
+ def row(row_number, sheet = default_sheet)
187
+ read_cells(sheet)
188
+ first_column(sheet).upto(last_column(sheet)).map do |col|
189
+ cell(row_number, col, sheet)
190
+ end
191
+ end
192
+
193
+ # returns all values in this column as an array
194
+ # column numbers are 1,2,3,... like in the spreadsheet
195
+ def column(column_number, sheet = default_sheet)
196
+ if column_number.is_a?(::String)
197
+ column_number = ::Roo::Utils.letter_to_number(column_number)
198
+ end
199
+ read_cells(sheet)
200
+ first_row(sheet).upto(last_row(sheet)).map do |row|
201
+ cell(row, column_number, sheet)
202
+ end
203
+ end
204
+
205
+ # set a cell to a certain value
206
+ # (this will not be saved back to the spreadsheet file!)
207
+ def set(row, col, value, sheet = default_sheet) #:nodoc:
208
+ read_cells(sheet)
209
+ row, col = normalize(row, col)
210
+ cell_type = cell_type_by_value(value)
211
+ set_value(row, col, value, sheet)
212
+ set_type(row, col, cell_type , sheet)
213
+ end
214
+
215
+ def cell_type_by_value(value)
216
+ case value
217
+ when Fixnum then :float
218
+ when String, Float then :string
219
+ else
220
+ raise ArgumentError, "Type for #{value} not set"
221
+ end
222
+ end
223
+
224
+ # reopens and read a spreadsheet document
225
+ def reload
226
+ ds = default_sheet
227
+ reinitialize
228
+ self.default_sheet = ds
229
+ end
230
+
231
+ # true if cell is empty
232
+ def empty?(row, col, sheet = default_sheet)
233
+ read_cells(sheet)
234
+ row, col = normalize(row, col)
235
+ contents = cell(row, col, sheet)
236
+ !contents || (celltype(row, col, sheet) == :string && contents.empty?) \
237
+ || (row < first_row(sheet) || row > last_row(sheet) || col < first_column(sheet) || col > last_column(sheet))
238
+ end
239
+
240
+ # returns information of the spreadsheet document and all sheets within
241
+ # this document.
242
+ def info
243
+ without_changing_default_sheet do
244
+ result = "File: #{File.basename(@filename)}\n"\
245
+ "Number of sheets: #{sheets.size}\n"\
246
+ "Sheets: #{sheets.join(', ')}\n"
247
+ n = 1
248
+ sheets.each do|sheet|
249
+ self.default_sheet = sheet
250
+ result << 'Sheet ' + n.to_s + ":\n"
251
+ unless first_row
252
+ result << ' - empty -'
253
+ else
254
+ result << " First row: #{first_row}\n"
255
+ result << " Last row: #{last_row}\n"
256
+ result << " First column: #{::Roo::Utils.number_to_letter(first_column)}\n"
257
+ result << " Last column: #{::Roo::Utils.number_to_letter(last_column)}"
258
+ end
259
+ result << "\n" if sheet != sheets.last
260
+ n += 1
261
+ end
262
+ result
263
+ end
264
+ end
265
+
266
+ # returns an XML representation of all sheets of a spreadsheet file
267
+ def to_xml
268
+ Nokogiri::XML::Builder.new do |xml|
269
+ xml.spreadsheet do
270
+ sheets.each do |sheet|
271
+ self.default_sheet = sheet
272
+ xml.sheet(name: sheet) do |x|
273
+ if first_row && last_row && first_column && last_column
274
+ # sonst gibt es Fehler bei leeren Blaettern
275
+ first_row.upto(last_row) do |row|
276
+ first_column.upto(last_column) do |col|
277
+ unless empty?(row, col)
278
+ x.cell(cell(row, col),
279
+ row: row,
280
+ column: col,
281
+ type: celltype(row, col))
282
+ end
283
+ end
284
+ end
285
+ end
286
+ end
287
+ end
288
+ end
289
+ end.to_xml
290
+ end
291
+
292
+ # when a method like spreadsheet.a42 is called
293
+ # convert it to a call of spreadsheet.cell('a',42)
294
+ def method_missing(m, *args)
295
+ # #aa42 => #cell('aa',42)
296
+ # #aa42('Sheet1') => #cell('aa',42,'Sheet1')
297
+ if m =~ /^([a-z]+)(\d)$/
298
+ col = ::Roo::Utils.letter_to_number(Regexp.last_match[1])
299
+ row = Regexp.last_match[2].to_i
300
+ if args.empty?
301
+ cell(row, col)
302
+ else
303
+ cell(row, col, args.first)
304
+ end
305
+ else
306
+ super
307
+ end
308
+ end
309
+
310
+ # access different worksheets by calling spreadsheet.sheet(1)
311
+ # or spreadsheet.sheet('SHEETNAME')
312
+ def sheet(index, name = false)
313
+ self.default_sheet = String === index ? index : sheets[index]
314
+ name ? [default_sheet, self] : self
315
+ end
316
+
317
+ # iterate through all worksheets of a document
318
+ def each_with_pagename
319
+ sheets.each do |s|
320
+ yield sheet(s, true)
321
+ end
322
+ end
323
+
324
+
325
+ def clean_sheet_if_need(options)
326
+ return unless options[:clean]
327
+ options.delete(:clean)
328
+ @cleaned ||= {}
329
+ clean_sheet(default_sheet) unless @cleaned[default_sheet]
330
+ end
331
+
332
+ def search_or_set_header(options)
333
+ if options[:header_search]
334
+ @headers = nil
335
+ @header_line = row_with(options[:header_search])
336
+ elsif [:first_row, true].include?(options[:headers])
337
+ @headers = []
338
+ row(first_row).each_with_index { |x, i| @headers << [x, i + 1] }
339
+ else
340
+ set_headers(options)
341
+ end
342
+ end
343
+
344
+ # by passing in headers as options, this method returns
345
+ # specific columns from your header assignment
346
+ # for example:
347
+ # xls.sheet('New Prices').parse(:upc => 'UPC', :price => 'Price') would return:
348
+ # [{:upc => 123456789012, :price => 35.42},..]
349
+
350
+ # the queries are matched with regex, so regex options can be passed in
351
+ # such as :price => '^(Cost|Price)'
352
+ # case insensitive by default
353
+
354
+ # by using the :header_search option, you can query for headers
355
+ # and return a hash of every row with the keys set to the header result
356
+ # for example:
357
+ # xls.sheet('New Prices').parse(:header_search => ['UPC*SKU','^Price*\sCost\s'])
358
+
359
+ # that example searches for a column titled either UPC or SKU and another
360
+ # column titled either Price or Cost (regex characters allowed)
361
+ # * is the wildcard character
362
+
363
+ # you can also pass in a :clean => true option to strip the sheet of
364
+ # odd unicode characters and white spaces around columns
365
+
366
+ def each(options = {})
367
+ if options.empty?
368
+ 1.upto(last_row) do |line|
369
+ yield row(line)
370
+ end
371
+ else
372
+ clean_sheet_if_need(options)
373
+ search_or_set_header(options)
374
+ headers = @headers ||
375
+ Hash[(first_column..last_column).map do |col|
376
+ [cell(@header_line, col), col]
377
+ end]
378
+
379
+ @header_line.upto(last_row) do |line|
380
+ yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
381
+ end
382
+ end
383
+ end
384
+
385
+ def parse(options = {})
386
+ ary = []
387
+ each(options) do |row|
388
+ yield(row) if block_given?
389
+ ary << row
390
+ end
391
+ ary
392
+ end
393
+
394
+ def row_with(query, return_headers = false)
395
+ line_no = 0
396
+ each do |row|
397
+ line_no += 1
398
+ headers = query.map { |q| row.grep(q)[0] }.compact
399
+
400
+ if headers.length == query.length
401
+ @header_line = line_no
402
+ return return_headers ? headers : line_no
403
+ elsif line_no > 100
404
+ fail "Couldn't find header row."
405
+ end
406
+ end
407
+ fail "Couldn't find header row."
408
+ end
409
+
410
+ protected
411
+
412
+ def file_type_check(filename, ext, name, warning_level, packed = nil)
413
+ if packed == :zip
414
+ # lalala.ods.zip => lalala.ods
415
+ # hier wird KEIN unzip gemacht, sondern nur der Name der Datei
416
+ # getestet, falls es eine gepackte Datei ist.
417
+ filename = File.basename(filename, File.extname(filename))
418
+ end
419
+
420
+ if uri?(filename) && qs_begin = filename.rindex('?')
421
+ filename = filename[0..qs_begin - 1]
422
+ end
423
+ if File.extname(filename).downcase != ext
424
+ case warning_level
425
+ when :error
426
+ warn file_type_warning_message(filename, ext)
427
+ fail TypeError, "#{filename} is not #{name} file"
428
+ when :warning
429
+ warn "are you sure, this is #{name} spreadsheet file?"
430
+ warn file_type_warning_message(filename, ext)
431
+ when :ignore
432
+ # ignore
433
+ else
434
+ fail "#{warning_level} illegal state of file_warning"
435
+ end
436
+ end
437
+ end
438
+
439
+ # konvertiert einen Key in der Form "12,45" (=row,column) in
440
+ # ein Array mit numerischen Werten ([12,45])
441
+ # Diese Methode ist eine temp. Loesung, um zu erforschen, ob der
442
+ # Zugriff mit numerischen Keys schneller ist.
443
+ def key_to_num(str)
444
+ r, c = str.split(',')
445
+ [r.to_i, c.to_i]
446
+ end
447
+
448
+ # see: key_to_num
449
+ def key_to_string(arr)
450
+ "#{arr[0]},#{arr[1]}"
451
+ end
452
+
453
+ private
454
+
455
+ def local_filename(filename, tmpdir, packed)
456
+ filename = download_uri(filename, tmpdir) if uri?(filename)
457
+ filename = unzip(filename, tmpdir) if packed == :zip
458
+ unless File.file?(filename)
459
+ fail IOError, "file #{filename} does not exist"
460
+ end
461
+ filename
462
+ end
463
+
464
+ def file_type_warning_message(filename, ext)
465
+ "use #{Roo::CLASS_FOR_EXTENSION.fetch(ext.sub('.', '').to_sym)}.new to handle #{ext} spreadsheet files. This has #{File.extname(filename).downcase}"
466
+ rescue KeyError
467
+ raise "unknown file type: #{ext}"
468
+ end
469
+
470
+ def find_by_row(row_index)
471
+ row_index += (header_line - 1) if @header_line
472
+
473
+ row(row_index).size.times.map do |cell_index|
474
+ cell(row_index, cell_index + 1)
475
+ end
476
+ end
477
+
478
+ def find_by_conditions(options)
479
+ rows = first_row.upto(last_row)
480
+ header_for = Hash[1.upto(last_column).map do |col|
481
+ [col, cell(@header_line, col)]
482
+ end]
483
+
484
+ # are all conditions met?
485
+ conditions = options[:conditions]
486
+ if conditions && !conditions.empty?
487
+ column_with = header_for.invert
488
+ rows = rows.select do |i|
489
+ conditions.all? { |key, val| cell(i, column_with[key]) == val }
490
+ end
491
+ end
492
+
493
+ if options[:array]
494
+ rows.map { |i| row(i) }
495
+ else
496
+ rows.map do |i|
497
+ Hash[1.upto(row(i).size).map do |j|
498
+ [header_for.fetch(j), cell(i, j)]
499
+ end]
500
+ end
501
+ end
502
+ end
503
+
504
+ def without_changing_default_sheet
505
+ original_default_sheet = default_sheet
506
+ yield
507
+ ensure
508
+ self.default_sheet = original_default_sheet
509
+ end
510
+
511
+ def reinitialize
512
+ initialize(@filename)
513
+ end
514
+
515
+ def make_tmpdir(prefix = nil, root = nil, &block)
516
+ prefix = if prefix
517
+ TEMP_PREFIX + prefix
518
+ else
519
+ TEMP_PREFIX
520
+ end
521
+ Dir.mktmpdir(prefix, root || ENV['ROO_TMP'], &block)
522
+ end
523
+
524
+ def clean_sheet(sheet)
525
+ read_cells(sheet)
526
+ @cell[sheet].each_pair do |coord, value|
527
+ if value.is_a?(::String)
528
+ @cell[sheet][coord] = sanitize_value(value)
529
+ end
530
+ end
531
+ @cleaned[sheet] = true
532
+ end
533
+
534
+ def sanitize_value(v)
535
+ v.unpack('U*').select { |b| b < 127 }.pack('U*').strip
536
+ end
537
+
538
+ def set_headers(hash = {})
539
+ # try to find header row with all values or give an error
540
+ # then create new hash by indexing strings and keeping integers for header array
541
+ @headers = row_with(hash.values, true)
542
+ @headers = Hash[hash.keys.zip(@headers.map { |x| header_index(x) })]
543
+ end
544
+
545
+ def header_index(query)
546
+ row(@header_line).index(query) + first_column
547
+ end
548
+
549
+ def set_value(row, col, value, sheet = default_sheet)
550
+ @cell[sheet][[row, col]] = value
551
+ end
552
+
553
+ def set_type(row, col, type, sheet = default_sheet)
554
+ @cell_type[sheet][[row, col]] = type
555
+ end
556
+
557
+ # converts cell coordinate to numeric values of row,col
558
+ def normalize(row, col)
559
+ if row.is_a?(::String)
560
+ if col.is_a?(::Fixnum)
561
+ # ('A',1):
562
+ # ('B', 5) -> (5, 2)
563
+ row, col = col, row
564
+ else
565
+ fail ArgumentError
566
+ end
567
+ end
568
+ if col.is_a?(::String)
569
+ col = ::Roo::Utils.letter_to_number(col)
570
+ end
571
+ [row, col]
572
+ end
573
+
574
+ def uri?(filename)
575
+ filename.start_with?('http://', 'https://')
576
+ rescue
577
+ false
578
+ end
579
+
580
+ def download_uri(uri, tmpdir)
581
+ require 'open-uri'
582
+ tempfilename = File.join(tmpdir, File.basename(uri))
583
+ begin
584
+ File.open(tempfilename, 'wb') do |file|
585
+ open(uri, 'User-Agent' => "Ruby/#{RUBY_VERSION}") do |net|
586
+ file.write(net.read)
587
+ end
588
+ end
589
+ rescue OpenURI::HTTPError
590
+ raise "could not open #{uri}"
591
+ end
592
+ tempfilename
593
+ end
594
+
595
+ def open_from_stream(stream, tmpdir)
596
+ tempfilename = File.join(tmpdir, 'spreadsheet')
597
+ File.open(tempfilename, 'wb') do |file|
598
+ file.write(stream[7..-1])
599
+ end
600
+ File.join(tmpdir, 'spreadsheet')
601
+ end
602
+
603
+ def unzip(filename, tmpdir)
604
+ require 'zip/filesystem'
605
+
606
+ Zip::File.open(filename) do |zip|
607
+ process_zipfile_packed(zip, tmpdir)
608
+ end
609
+ end
610
+
611
+ # check if default_sheet was set and exists in sheets-array
612
+ def validate_sheet!(sheet)
613
+ case sheet
614
+ when nil
615
+ fail ArgumentError, "Error: sheet 'nil' not valid"
616
+ when Fixnum
617
+ sheets.fetch(sheet - 1) do
618
+ fail RangeError, "sheet index #{sheet} not found"
619
+ end
620
+ when String
621
+ unless sheets.include? sheet
622
+ fail RangeError, "sheet '#{sheet}' not found"
623
+ end
624
+ else
625
+ fail TypeError, "not a valid sheet type: #{sheet.inspect}"
626
+ end
627
+ end
628
+
629
+ def process_zipfile_packed(zip, tmpdir, path = '')
630
+ if zip.file.file? path
631
+ # extract and return filename
632
+ File.open(File.join(tmpdir, path), 'wb') do |file|
633
+ file.write(zip.read(path))
634
+ end
635
+ File.join(tmpdir, path)
636
+ else
637
+ ret = nil
638
+ path += '/' unless path.empty?
639
+ zip.dir.foreach(path) do |filename|
640
+ ret = process_zipfile_packed(zip, tmpdir, path + filename)
641
+ end
642
+ ret
643
+ end
644
+ end
645
+
646
+ # Write all cells to the csv file. File can be a filename or nil. If the this
647
+ # parameter is nil the output goes to STDOUT
648
+ def write_csv_content(file = nil, sheet = nil, separator = ',')
649
+ file ||= STDOUT
650
+ if first_row(sheet) # sheet is not empty
651
+ 1.upto(last_row(sheet)) do |row|
652
+ 1.upto(last_column(sheet)) do |col|
653
+ file.print(separator) if col > 1
654
+ file.print cell_to_csv(row, col, sheet)
655
+ end
656
+ file.print("\n")
657
+ end # sheet not empty
658
+ end
659
+ end
660
+
661
+ # The content of a cell in the csv output
662
+ def cell_to_csv(row, col, sheet)
663
+ if empty?(row, col, sheet)
664
+ ''
665
+ else
666
+ onecell = cell(row, col, sheet)
667
+
668
+ case celltype(row, col, sheet)
669
+ when :string
670
+ %("#{onecell.tr('"', '""')}") unless onecell.empty?
671
+ when :boolean
672
+ %("#{onecell.tr('"', '""').downcase}")
673
+ when :float, :percentage
674
+ if onecell == onecell.to_i
675
+ onecell.to_i.to_s
676
+ else
677
+ onecell.to_s
678
+ end
679
+ when :formula
680
+ case onecell
681
+ when String
682
+ %("#{onecell.tr('"', '""')}") unless onecell.empty?
683
+ when Float
684
+ if onecell == onecell.to_i
685
+ onecell.to_i.to_s
686
+ else
687
+ onecell.to_s
688
+ end
689
+ when DateTime
690
+ onecell.to_s
691
+ else
692
+ fail "unhandled onecell-class #{onecell.class}"
693
+ end
694
+ when :date, :datetime
695
+ onecell.to_s
696
+ when :time
697
+ integer_to_timestring(onecell)
698
+ when :link
699
+ %("#{onecell.url.tr('"', '""')}")
700
+ else
701
+ fail "unhandled celltype #{celltype(row, col, sheet)}"
702
+ end || ''
703
+ end
704
+ end
705
+
706
+ private
707
+
708
+ # converts an integer value to a time string like '02:05:06'
709
+ def integer_to_timestring(content)
710
+ h = (content / 3600.0).floor
711
+ content = content - h * 3600
712
+ m = (content / 60.0).floor
713
+ content = content - m * 60
714
+ s = content
715
+ sprintf('%02d:%02d:%02d', h, m, s)
716
+ end
717
+ end