roo 1.13.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +7 -0
- data/.simplecov +4 -0
- data/.travis.yml +13 -0
- data/CHANGELOG.md +500 -0
- data/Gemfile +16 -10
- data/Guardfile +24 -0
- data/LICENSE +3 -1
- data/README.md +254 -0
- data/Rakefile +23 -23
- data/examples/roo_soap_client.rb +28 -31
- data/examples/roo_soap_server.rb +4 -6
- data/examples/write_me.rb +9 -10
- data/lib/roo/base.rb +303 -388
- data/lib/roo/csv.rb +120 -113
- data/lib/roo/excelx/comments.rb +24 -0
- data/lib/roo/excelx/extractor.rb +20 -0
- data/lib/roo/excelx/relationships.rb +26 -0
- data/lib/roo/excelx/shared_strings.rb +40 -0
- data/lib/roo/excelx/sheet_doc.rb +202 -0
- data/lib/roo/excelx/styles.rb +62 -0
- data/lib/roo/excelx/workbook.rb +59 -0
- data/lib/roo/excelx.rb +452 -484
- data/lib/roo/font.rb +17 -0
- data/lib/roo/libre_office.rb +5 -0
- data/lib/roo/link.rb +15 -0
- data/lib/roo/{openoffice.rb → open_office.rb} +678 -496
- data/lib/roo/spreadsheet.rb +20 -23
- data/lib/roo/utils.rb +78 -0
- data/lib/roo/version.rb +3 -0
- data/lib/roo.rb +18 -24
- data/roo.gemspec +20 -204
- data/spec/lib/roo/base_spec.rb +1 -4
- data/spec/lib/roo/csv_spec.rb +21 -13
- data/spec/lib/roo/excelx/format_spec.rb +7 -6
- data/spec/lib/roo/excelx_spec.rb +388 -11
- data/spec/lib/roo/libreoffice_spec.rb +16 -6
- data/spec/lib/roo/openoffice_spec.rb +2 -8
- data/spec/lib/roo/spreadsheet_spec.rb +40 -12
- data/spec/lib/roo/utils_spec.rb +106 -0
- data/spec/spec_helper.rb +2 -1
- data/test/test_generic_spreadsheet.rb +19 -67
- data/test/test_helper.rb +9 -56
- data/test/test_roo.rb +252 -477
- metadata +63 -302
- data/CHANGELOG +0 -417
- data/Gemfile.lock +0 -78
- data/README.markdown +0 -126
- data/VERSION +0 -1
- data/lib/roo/excel.rb +0 -355
- data/lib/roo/excel2003xml.rb +0 -300
- data/lib/roo/google.rb +0 -292
- data/lib/roo/roo_rails_helper.rb +0 -83
- data/lib/roo/worksheet.rb +0 -18
- data/spec/lib/roo/excel2003xml_spec.rb +0 -15
- data/spec/lib/roo/excel_spec.rb +0 -17
- data/spec/lib/roo/google_spec.rb +0 -64
- data/test/files/1900_base.xls +0 -0
- data/test/files/1900_base.xlsx +0 -0
- data/test/files/1904_base.xls +0 -0
- data/test/files/1904_base.xlsx +0 -0
- data/test/files/Bibelbund.csv +0 -3741
- data/test/files/Bibelbund.ods +0 -0
- data/test/files/Bibelbund.xls +0 -0
- data/test/files/Bibelbund.xlsx +0 -0
- data/test/files/Bibelbund.xml +0 -62518
- data/test/files/Bibelbund1.ods +0 -0
- data/test/files/Pfand_from_windows_phone.xlsx +0 -0
- data/test/files/bad_excel_date.xls +0 -0
- data/test/files/bbu.ods +0 -0
- data/test/files/bbu.xls +0 -0
- data/test/files/bbu.xlsx +0 -0
- data/test/files/bbu.xml +0 -152
- data/test/files/bode-v1.ods.zip +0 -0
- data/test/files/bode-v1.xls.zip +0 -0
- data/test/files/boolean.csv +0 -2
- data/test/files/boolean.ods +0 -0
- data/test/files/boolean.xls +0 -0
- data/test/files/boolean.xlsx +0 -0
- data/test/files/boolean.xml +0 -112
- data/test/files/borders.ods +0 -0
- data/test/files/borders.xls +0 -0
- data/test/files/borders.xlsx +0 -0
- data/test/files/borders.xml +0 -144
- data/test/files/bug-numbered-sheet-names.xlsx +0 -0
- data/test/files/bug-row-column-fixnum-float.xls +0 -0
- data/test/files/bug-row-column-fixnum-float.xml +0 -127
- data/test/files/comments.ods +0 -0
- data/test/files/comments.xls +0 -0
- data/test/files/comments.xlsx +0 -0
- data/test/files/csvtypes.csv +0 -1
- data/test/files/datetime.ods +0 -0
- data/test/files/datetime.xls +0 -0
- data/test/files/datetime.xlsx +0 -0
- data/test/files/datetime.xml +0 -142
- data/test/files/datetime_floatconv.xls +0 -0
- data/test/files/datetime_floatconv.xml +0 -148
- data/test/files/dreimalvier.ods +0 -0
- data/test/files/emptysheets.ods +0 -0
- data/test/files/emptysheets.xls +0 -0
- data/test/files/emptysheets.xlsx +0 -0
- data/test/files/emptysheets.xml +0 -105
- data/test/files/excel2003.xml +0 -21140
- data/test/files/false_encoding.xls +0 -0
- data/test/files/false_encoding.xml +0 -132
- data/test/files/file_item_error.xlsx +0 -0
- data/test/files/formula.ods +0 -0
- data/test/files/formula.xls +0 -0
- data/test/files/formula.xlsx +0 -0
- data/test/files/formula.xml +0 -134
- data/test/files/formula_parse_error.xls +0 -0
- data/test/files/formula_parse_error.xml +0 -1833
- data/test/files/formula_string_error.xlsx +0 -0
- data/test/files/html-escape.ods +0 -0
- data/test/files/link.xls +0 -0
- data/test/files/link.xlsx +0 -0
- data/test/files/matrix.ods +0 -0
- data/test/files/matrix.xls +0 -0
- data/test/files/named_cells.ods +0 -0
- data/test/files/named_cells.xls +0 -0
- data/test/files/named_cells.xlsx +0 -0
- data/test/files/no_spreadsheet_file.txt +0 -1
- data/test/files/numbers1.csv +0 -18
- data/test/files/numbers1.ods +0 -0
- data/test/files/numbers1.xls +0 -0
- data/test/files/numbers1.xlsx +0 -0
- data/test/files/numbers1.xml +0 -312
- data/test/files/numeric-link.xlsx +0 -0
- data/test/files/only_one_sheet.ods +0 -0
- data/test/files/only_one_sheet.xls +0 -0
- data/test/files/only_one_sheet.xlsx +0 -0
- data/test/files/only_one_sheet.xml +0 -67
- data/test/files/paragraph.ods +0 -0
- data/test/files/paragraph.xls +0 -0
- data/test/files/paragraph.xlsx +0 -0
- data/test/files/paragraph.xml +0 -127
- data/test/files/prova.xls +0 -0
- data/test/files/ric.ods +0 -0
- data/test/files/simple_spreadsheet.ods +0 -0
- data/test/files/simple_spreadsheet.xls +0 -0
- data/test/files/simple_spreadsheet.xlsx +0 -0
- data/test/files/simple_spreadsheet.xml +0 -225
- data/test/files/simple_spreadsheet_from_italo.ods +0 -0
- data/test/files/simple_spreadsheet_from_italo.xls +0 -0
- data/test/files/simple_spreadsheet_from_italo.xml +0 -242
- data/test/files/so_datetime.csv +0 -7
- data/test/files/style.ods +0 -0
- data/test/files/style.xls +0 -0
- data/test/files/style.xlsx +0 -0
- data/test/files/style.xml +0 -154
- data/test/files/time-test.csv +0 -2
- data/test/files/time-test.ods +0 -0
- data/test/files/time-test.xls +0 -0
- data/test/files/time-test.xlsx +0 -0
- data/test/files/time-test.xml +0 -131
- data/test/files/type_excel.ods +0 -0
- data/test/files/type_excel.xlsx +0 -0
- data/test/files/type_excelx.ods +0 -0
- data/test/files/type_excelx.xls +0 -0
- data/test/files/type_openoffice.xls +0 -0
- data/test/files/type_openoffice.xlsx +0 -0
- data/test/files/whitespace.ods +0 -0
- data/test/files/whitespace.xls +0 -0
- data/test/files/whitespace.xlsx +0 -0
- data/test/files/whitespace.xml +0 -184
- data/test/rm_sub_test.rb +0 -12
- data/test/rm_test.rb +0 -7
- data/website/index.html +0 -385
- data/website/index.txt +0 -423
- data/website/javascripts/rounded_corners_lite.inc.js +0 -285
- data/website/stylesheets/screen.css +0 -130
- data/website/template.rhtml +0 -48
data/lib/roo/csv.rb
CHANGED
|
@@ -1,113 +1,120 @@
|
|
|
1
|
-
require 'csv'
|
|
2
|
-
require 'time'
|
|
3
|
-
|
|
4
|
-
# The CSV class can read csv files (must be separated with commas) which then
|
|
5
|
-
# can be handled like spreadsheets. This means you can access cells like A5
|
|
6
|
-
# within these files.
|
|
7
|
-
# The CSV class provides only string objects. If you want conversions to other
|
|
8
|
-
# types you have to do it yourself.
|
|
9
|
-
#
|
|
10
|
-
# You can pass options to the underlying CSV parse operation, via the
|
|
11
|
-
# :csv_options option.
|
|
12
|
-
#
|
|
13
|
-
|
|
14
|
-
class Roo::CSV < Roo::Base
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
sheet
|
|
73
|
-
|
|
74
|
-
@
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
1
|
+
require 'csv'
|
|
2
|
+
require 'time'
|
|
3
|
+
|
|
4
|
+
# The CSV class can read csv files (must be separated with commas) which then
|
|
5
|
+
# can be handled like spreadsheets. This means you can access cells like A5
|
|
6
|
+
# within these files.
|
|
7
|
+
# The CSV class provides only string objects. If you want conversions to other
|
|
8
|
+
# types you have to do it yourself.
|
|
9
|
+
#
|
|
10
|
+
# You can pass options to the underlying CSV parse operation, via the
|
|
11
|
+
# :csv_options option.
|
|
12
|
+
#
|
|
13
|
+
|
|
14
|
+
class Roo::CSV < Roo::Base
|
|
15
|
+
|
|
16
|
+
attr_reader :filename
|
|
17
|
+
|
|
18
|
+
# Returns an array with the names of the sheets. In CSV class there is only
|
|
19
|
+
# one dummy sheet, because a csv file cannot have more than one sheet.
|
|
20
|
+
def sheets
|
|
21
|
+
['default']
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def cell(row, col, sheet=nil)
|
|
25
|
+
sheet ||= default_sheet
|
|
26
|
+
read_cells(sheet)
|
|
27
|
+
@cell[normalize(row,col)]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def celltype(row, col, sheet=nil)
|
|
31
|
+
sheet ||= default_sheet
|
|
32
|
+
read_cells(sheet)
|
|
33
|
+
@cell_type[normalize(row,col)]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def cell_postprocessing(row,col,value)
|
|
37
|
+
value
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def csv_options
|
|
41
|
+
@options[:csv_options] || {}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
TYPE_MAP = {
|
|
47
|
+
String => :string,
|
|
48
|
+
Float => :float,
|
|
49
|
+
Date => :date,
|
|
50
|
+
DateTime => :datetime,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
def celltype_class(value)
|
|
54
|
+
TYPE_MAP[value.class]
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def each_row(options, &block)
|
|
58
|
+
if uri?(filename)
|
|
59
|
+
make_tmpdir do |tmpdir|
|
|
60
|
+
tmp_filename = download_uri(filename, tmpdir)
|
|
61
|
+
CSV.foreach(tmp_filename, options, &block)
|
|
62
|
+
end
|
|
63
|
+
else
|
|
64
|
+
CSV.foreach(filename, options, &block)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def read_cells(sheet = default_sheet)
|
|
69
|
+
sheet ||= default_sheet
|
|
70
|
+
return if @cells_read[sheet]
|
|
71
|
+
@first_row[sheet] = 1
|
|
72
|
+
@last_row[sheet] = 0
|
|
73
|
+
@first_column[sheet] = 1
|
|
74
|
+
@last_column[sheet] = 1
|
|
75
|
+
rownum = 1
|
|
76
|
+
each_row csv_options do |row|
|
|
77
|
+
row.each_with_index do |elem,i|
|
|
78
|
+
@cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
|
|
79
|
+
@cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
|
|
80
|
+
if i+1 > @last_column[sheet]
|
|
81
|
+
@last_column[sheet] += 1
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
rownum += 1
|
|
85
|
+
@last_row[sheet] += 1
|
|
86
|
+
end
|
|
87
|
+
@cells_read[sheet] = true
|
|
88
|
+
#-- adjust @first_row if neccessary
|
|
89
|
+
while !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
|
|
90
|
+
@first_row[sheet] += 1
|
|
91
|
+
end
|
|
92
|
+
#-- adjust @last_row if neccessary
|
|
93
|
+
while !row(@last_row[sheet]).any? and @last_row[sheet] and
|
|
94
|
+
@last_row[sheet] > @first_row[sheet]
|
|
95
|
+
@last_row[sheet] -= 1
|
|
96
|
+
end
|
|
97
|
+
#-- adjust @first_column if neccessary
|
|
98
|
+
while !column(@first_column[sheet]).any? and
|
|
99
|
+
@first_column[sheet] and
|
|
100
|
+
@first_column[sheet] < @last_column[sheet]
|
|
101
|
+
@first_column[sheet] += 1
|
|
102
|
+
end
|
|
103
|
+
#-- adjust @last_column if neccessary
|
|
104
|
+
while !column(@last_column[sheet]).any? and
|
|
105
|
+
@last_column[sheet] and
|
|
106
|
+
@last_column[sheet] > @first_column[sheet]
|
|
107
|
+
@last_column[sheet] -= 1
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def clean_sheet(sheet)
|
|
112
|
+
read_cells(sheet)
|
|
113
|
+
|
|
114
|
+
@cell.each_pair do |coord, value|
|
|
115
|
+
@cell[coord] = sanitize_value(value) if value.is_a?(::String)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
@cleaned[sheet] = true
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'roo/excelx/extractor'
|
|
2
|
+
|
|
3
|
+
module Roo
|
|
4
|
+
class Excelx::Comments < Excelx::Extractor
|
|
5
|
+
|
|
6
|
+
def comments
|
|
7
|
+
@comments ||= extract_comments
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def extract_comments
|
|
13
|
+
if doc_exists?
|
|
14
|
+
Hash[doc.xpath("//comments/commentList/comment").map do |comment|
|
|
15
|
+
value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
|
|
16
|
+
[::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
|
|
17
|
+
end]
|
|
18
|
+
else
|
|
19
|
+
{}
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
module Roo
|
|
2
|
+
class Excelx::Extractor
|
|
3
|
+
def initialize(path)
|
|
4
|
+
@path = path
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
private
|
|
8
|
+
|
|
9
|
+
def doc
|
|
10
|
+
@doc ||=
|
|
11
|
+
if doc_exists?
|
|
12
|
+
::Roo::Utils.load_xml(@path).remove_namespaces!
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def doc_exists?
|
|
17
|
+
@path && File.exist?(@path)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
require 'roo/excelx/extractor'
|
|
2
|
+
|
|
3
|
+
module Roo
|
|
4
|
+
class Excelx::Relationships < Excelx::Extractor
|
|
5
|
+
def [](index)
|
|
6
|
+
to_a[index]
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def to_a
|
|
10
|
+
@relationships ||= extract_relationships
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def extract_relationships
|
|
16
|
+
if doc_exists?
|
|
17
|
+
Hash[doc.xpath("/Relationships/Relationship").map do |rel|
|
|
18
|
+
[rel.attribute('Id').text, rel]
|
|
19
|
+
end]
|
|
20
|
+
else
|
|
21
|
+
[]
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
require 'roo/excelx/extractor'
|
|
2
|
+
|
|
3
|
+
module Roo
|
|
4
|
+
class Excelx::SharedStrings < Excelx::Extractor
|
|
5
|
+
def [](index)
|
|
6
|
+
to_a[index]
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def to_a
|
|
10
|
+
@array ||= extract_shared_strings
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def extract_shared_strings
|
|
16
|
+
if doc_exists?
|
|
17
|
+
# read the shared strings xml document
|
|
18
|
+
doc.xpath("/sst/si").map do |si|
|
|
19
|
+
shared_string = ''
|
|
20
|
+
si.children.each do |elem|
|
|
21
|
+
case elem.name
|
|
22
|
+
when 'r'
|
|
23
|
+
elem.children.each do |r_elem|
|
|
24
|
+
if r_elem.name == 't'
|
|
25
|
+
shared_string << r_elem.content
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
when 't'
|
|
29
|
+
shared_string = elem.content
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
shared_string
|
|
33
|
+
end
|
|
34
|
+
else
|
|
35
|
+
[]
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
require 'roo/excelx/extractor'
|
|
2
|
+
|
|
3
|
+
module Roo
|
|
4
|
+
class Excelx::SheetDoc < Excelx::Extractor
|
|
5
|
+
def initialize(path, relationships, styles, shared_strings, workbook, options = {})
|
|
6
|
+
super(path)
|
|
7
|
+
@options = options
|
|
8
|
+
@relationships = relationships
|
|
9
|
+
@styles = styles
|
|
10
|
+
@shared_strings = shared_strings
|
|
11
|
+
@workbook = workbook
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def cells(relationships)
|
|
15
|
+
@cells ||= extract_cells(relationships)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def hyperlinks(relationships)
|
|
19
|
+
@hyperlinks ||= extract_hyperlinks(relationships)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Get the dimensions for the sheet.
|
|
23
|
+
# This is the upper bound of cells that might
|
|
24
|
+
# be parsed. (the document may be sparse so cell count is only upper bound)
|
|
25
|
+
def dimensions
|
|
26
|
+
@dimensions ||= extract_dimensions
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Yield each row xml element to caller
|
|
30
|
+
def each_row_streaming(&block)
|
|
31
|
+
Roo::Utils.each_element(@path, 'row', &block)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Yield each cell as Excelx::Cell to caller for given
|
|
35
|
+
# row xml
|
|
36
|
+
def each_cell(row_xml)
|
|
37
|
+
return [] unless row_xml
|
|
38
|
+
row_xml.children.each do |cell_element|
|
|
39
|
+
key = ::Roo::Utils.ref_to_key(cell_element['r'])
|
|
40
|
+
yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def cell_from_xml(cell_xml, hyperlink)
|
|
47
|
+
# This is error prone, to_i will silently turn a nil into a 0
|
|
48
|
+
# and it works by coincidence that Format[0] is general
|
|
49
|
+
style = cell_xml['s'].to_i # should be here
|
|
50
|
+
# c: <c r="A5" s="2">
|
|
51
|
+
# <v>22606</v>
|
|
52
|
+
# </c>, format: , tmp_type: float
|
|
53
|
+
value_type =
|
|
54
|
+
case cell_xml['t']
|
|
55
|
+
when 's'
|
|
56
|
+
:shared
|
|
57
|
+
when 'b'
|
|
58
|
+
:boolean
|
|
59
|
+
# 2011-02-25 BEGIN
|
|
60
|
+
when 'str'
|
|
61
|
+
:string
|
|
62
|
+
# 2011-02-25 END
|
|
63
|
+
# 2011-09-15 BEGIN
|
|
64
|
+
when 'inlineStr'
|
|
65
|
+
:inlinestr
|
|
66
|
+
# 2011-09-15 END
|
|
67
|
+
else
|
|
68
|
+
format = @styles.style_format(style)
|
|
69
|
+
Excelx::Format.to_type(format)
|
|
70
|
+
end
|
|
71
|
+
formula = nil
|
|
72
|
+
row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
|
|
73
|
+
cell_xml.children.each do |cell|
|
|
74
|
+
case cell.name
|
|
75
|
+
when 'is'
|
|
76
|
+
cell.children.each do |inline_str|
|
|
77
|
+
if inline_str.name == 't'
|
|
78
|
+
return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
when 'f'
|
|
82
|
+
formula = cell.content
|
|
83
|
+
when 'v'
|
|
84
|
+
if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
|
|
85
|
+
value_type =
|
|
86
|
+
if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
|
|
87
|
+
:datetime
|
|
88
|
+
else
|
|
89
|
+
:date
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
excelx_type = [:numeric_or_formula,format.to_s]
|
|
93
|
+
value =
|
|
94
|
+
case value_type
|
|
95
|
+
when :shared
|
|
96
|
+
value_type = :string
|
|
97
|
+
excelx_type = :string
|
|
98
|
+
@shared_strings[cell.content.to_i]
|
|
99
|
+
when :boolean
|
|
100
|
+
(cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
|
|
101
|
+
when :date, :time, :datetime
|
|
102
|
+
cell.content
|
|
103
|
+
when :formula
|
|
104
|
+
cell.content.to_f
|
|
105
|
+
when :string
|
|
106
|
+
excelx_type = :string
|
|
107
|
+
cell.content
|
|
108
|
+
else
|
|
109
|
+
value_type = :float
|
|
110
|
+
cell.content
|
|
111
|
+
end
|
|
112
|
+
return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def extract_hyperlinks(relationships)
|
|
119
|
+
Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink|
|
|
120
|
+
if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text]
|
|
121
|
+
[::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
|
|
122
|
+
end
|
|
123
|
+
end.compact]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def expand_merged_ranges(cells)
|
|
127
|
+
# Extract merged ranges from xml
|
|
128
|
+
merges = {}
|
|
129
|
+
doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
|
|
130
|
+
tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
|
|
131
|
+
for row in tl[0]..br[0] do
|
|
132
|
+
for col in tl[1]..br[1] do
|
|
133
|
+
next if row == tl[0] && col == tl[1]
|
|
134
|
+
merges[[row,col]] = tl
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
# Duplicate value into all cells in merged range
|
|
139
|
+
merges.each do |dst, src|
|
|
140
|
+
cells[dst] = cells[src]
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def extract_cells(relationships)
|
|
145
|
+
extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
|
|
146
|
+
key = ::Roo::Utils.ref_to_key(cell_xml['r'])
|
|
147
|
+
[key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
|
|
148
|
+
end]
|
|
149
|
+
if @options[:expand_merged_ranges]
|
|
150
|
+
expand_merged_ranges(extracted_cells)
|
|
151
|
+
end
|
|
152
|
+
extracted_cells
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def extract_dimensions
|
|
156
|
+
Roo::Utils.each_element(@path, 'dimension') do |dimension|
|
|
157
|
+
return dimension.attributes["ref"].value
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
=begin
|
|
162
|
+
Datei xl/comments1.xml
|
|
163
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
164
|
+
<comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
|
165
|
+
<authors>
|
|
166
|
+
<author />
|
|
167
|
+
</authors>
|
|
168
|
+
<commentList>
|
|
169
|
+
<comment ref="B4" authorId="0">
|
|
170
|
+
<text>
|
|
171
|
+
<r>
|
|
172
|
+
<rPr>
|
|
173
|
+
<sz val="10" />
|
|
174
|
+
<rFont val="Arial" />
|
|
175
|
+
<family val="2" />
|
|
176
|
+
</rPr>
|
|
177
|
+
<t>Kommentar fuer B4</t>
|
|
178
|
+
</r>
|
|
179
|
+
</text>
|
|
180
|
+
</comment>
|
|
181
|
+
<comment ref="B5" authorId="0">
|
|
182
|
+
<text>
|
|
183
|
+
<r>
|
|
184
|
+
<rPr>
|
|
185
|
+
<sz val="10" />
|
|
186
|
+
<rFont val="Arial" />
|
|
187
|
+
<family val="2" />
|
|
188
|
+
</rPr>
|
|
189
|
+
<t>Kommentar fuer B5</t>
|
|
190
|
+
</r>
|
|
191
|
+
</text>
|
|
192
|
+
</comment>
|
|
193
|
+
</commentList>
|
|
194
|
+
</comments>
|
|
195
|
+
=end
|
|
196
|
+
=begin
|
|
197
|
+
if @comments_doc[self.sheets.index(sheet)]
|
|
198
|
+
read_comments(sheet)
|
|
199
|
+
end
|
|
200
|
+
=end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
require 'roo/font'
|
|
2
|
+
require 'roo/excelx/extractor'
|
|
3
|
+
|
|
4
|
+
module Roo
|
|
5
|
+
class Excelx::Styles < Excelx::Extractor
|
|
6
|
+
# convert internal excelx attribute to a format
|
|
7
|
+
def style_format(style)
|
|
8
|
+
id = num_fmt_ids[style.to_i]
|
|
9
|
+
num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def definitions
|
|
13
|
+
@definitions ||= extract_definitions
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
def num_fmt_ids
|
|
19
|
+
@num_fmt_ids ||= extract_num_fmt_ids
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def num_fmts
|
|
23
|
+
@num_fmts ||= extract_num_fmts
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def fonts
|
|
27
|
+
@fonts ||= extract_fonts
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def extract_definitions
|
|
31
|
+
doc.xpath("//cellXfs").flat_map do |xfs|
|
|
32
|
+
xfs.children.map do |xf|
|
|
33
|
+
fonts[xf['fontId'].to_i]
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def extract_fonts
|
|
39
|
+
doc.xpath("//fonts/font").map do |font_el|
|
|
40
|
+
Font.new.tap do |font|
|
|
41
|
+
font.bold = !font_el.xpath('./b').empty?
|
|
42
|
+
font.italic = !font_el.xpath('./i').empty?
|
|
43
|
+
font.underline = !font_el.xpath('./u').empty?
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def extract_num_fmt_ids
|
|
49
|
+
doc.xpath("//cellXfs").flat_map do |xfs|
|
|
50
|
+
xfs.children.map do |xf|
|
|
51
|
+
xf['numFmtId']
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def extract_num_fmts
|
|
57
|
+
Hash[doc.xpath("//numFmt").map do |num_fmt|
|
|
58
|
+
[num_fmt['numFmtId'], num_fmt['formatCode']]
|
|
59
|
+
end]
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
require 'roo/excelx/extractor'
|
|
2
|
+
|
|
3
|
+
module Roo
|
|
4
|
+
class Excelx::Workbook < Excelx::Extractor
|
|
5
|
+
class Label
|
|
6
|
+
attr_reader :sheet, :row, :col, :name
|
|
7
|
+
|
|
8
|
+
def initialize(name, sheet, row, col)
|
|
9
|
+
@name = name
|
|
10
|
+
@sheet = sheet
|
|
11
|
+
@row = row.to_i
|
|
12
|
+
@col = ::Roo::Utils.letter_to_number(col)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def key
|
|
16
|
+
[@row, @col]
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def initialize(path)
|
|
21
|
+
super
|
|
22
|
+
if !doc_exists?
|
|
23
|
+
raise ArgumentError, 'missing required workbook file'
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def sheets
|
|
28
|
+
doc.xpath("//sheet")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# aka labels
|
|
32
|
+
def defined_names
|
|
33
|
+
Hash[doc.xpath("//definedName").map do |defined_name|
|
|
34
|
+
# "Sheet1!$C$5"
|
|
35
|
+
sheet, coordinates = defined_name.text.split('!$', 2)
|
|
36
|
+
col,row = coordinates.split('$')
|
|
37
|
+
name = defined_name['name']
|
|
38
|
+
[name, Label.new(name, sheet,row,col)]
|
|
39
|
+
end]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def base_date
|
|
43
|
+
@base_date ||=
|
|
44
|
+
begin
|
|
45
|
+
# Default to 1900 (minus one day due to excel quirk) but use 1904 if
|
|
46
|
+
# it's set in the Workbook's workbookPr
|
|
47
|
+
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
|
|
48
|
+
result = Date.new(1899,12,30) # default
|
|
49
|
+
doc.css("workbookPr[date1904]").each do |workbookPr|
|
|
50
|
+
if workbookPr["date1904"] =~ /true|1/i
|
|
51
|
+
result = Date.new(1904,01,01)
|
|
52
|
+
break
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
result
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|