roo 1.13.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG.md +500 -0
  6. data/Gemfile +16 -10
  7. data/Guardfile +24 -0
  8. data/LICENSE +3 -1
  9. data/README.md +254 -0
  10. data/Rakefile +23 -23
  11. data/examples/roo_soap_client.rb +28 -31
  12. data/examples/roo_soap_server.rb +4 -6
  13. data/examples/write_me.rb +9 -10
  14. data/lib/roo/base.rb +303 -388
  15. data/lib/roo/csv.rb +120 -113
  16. data/lib/roo/excelx/comments.rb +24 -0
  17. data/lib/roo/excelx/extractor.rb +20 -0
  18. data/lib/roo/excelx/relationships.rb +26 -0
  19. data/lib/roo/excelx/shared_strings.rb +40 -0
  20. data/lib/roo/excelx/sheet_doc.rb +202 -0
  21. data/lib/roo/excelx/styles.rb +62 -0
  22. data/lib/roo/excelx/workbook.rb +59 -0
  23. data/lib/roo/excelx.rb +452 -484
  24. data/lib/roo/font.rb +17 -0
  25. data/lib/roo/libre_office.rb +5 -0
  26. data/lib/roo/link.rb +15 -0
  27. data/lib/roo/{openoffice.rb → open_office.rb} +678 -496
  28. data/lib/roo/spreadsheet.rb +20 -23
  29. data/lib/roo/utils.rb +78 -0
  30. data/lib/roo/version.rb +3 -0
  31. data/lib/roo.rb +18 -24
  32. data/roo.gemspec +20 -204
  33. data/spec/lib/roo/base_spec.rb +1 -4
  34. data/spec/lib/roo/csv_spec.rb +21 -13
  35. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  36. data/spec/lib/roo/excelx_spec.rb +388 -11
  37. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  38. data/spec/lib/roo/openoffice_spec.rb +2 -8
  39. data/spec/lib/roo/spreadsheet_spec.rb +40 -12
  40. data/spec/lib/roo/utils_spec.rb +106 -0
  41. data/spec/spec_helper.rb +2 -1
  42. data/test/test_generic_spreadsheet.rb +19 -67
  43. data/test/test_helper.rb +9 -56
  44. data/test/test_roo.rb +252 -477
  45. metadata +63 -302
  46. data/CHANGELOG +0 -417
  47. data/Gemfile.lock +0 -78
  48. data/README.markdown +0 -126
  49. data/VERSION +0 -1
  50. data/lib/roo/excel.rb +0 -355
  51. data/lib/roo/excel2003xml.rb +0 -300
  52. data/lib/roo/google.rb +0 -292
  53. data/lib/roo/roo_rails_helper.rb +0 -83
  54. data/lib/roo/worksheet.rb +0 -18
  55. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  56. data/spec/lib/roo/excel_spec.rb +0 -17
  57. data/spec/lib/roo/google_spec.rb +0 -64
  58. data/test/files/1900_base.xls +0 -0
  59. data/test/files/1900_base.xlsx +0 -0
  60. data/test/files/1904_base.xls +0 -0
  61. data/test/files/1904_base.xlsx +0 -0
  62. data/test/files/Bibelbund.csv +0 -3741
  63. data/test/files/Bibelbund.ods +0 -0
  64. data/test/files/Bibelbund.xls +0 -0
  65. data/test/files/Bibelbund.xlsx +0 -0
  66. data/test/files/Bibelbund.xml +0 -62518
  67. data/test/files/Bibelbund1.ods +0 -0
  68. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  69. data/test/files/bad_excel_date.xls +0 -0
  70. data/test/files/bbu.ods +0 -0
  71. data/test/files/bbu.xls +0 -0
  72. data/test/files/bbu.xlsx +0 -0
  73. data/test/files/bbu.xml +0 -152
  74. data/test/files/bode-v1.ods.zip +0 -0
  75. data/test/files/bode-v1.xls.zip +0 -0
  76. data/test/files/boolean.csv +0 -2
  77. data/test/files/boolean.ods +0 -0
  78. data/test/files/boolean.xls +0 -0
  79. data/test/files/boolean.xlsx +0 -0
  80. data/test/files/boolean.xml +0 -112
  81. data/test/files/borders.ods +0 -0
  82. data/test/files/borders.xls +0 -0
  83. data/test/files/borders.xlsx +0 -0
  84. data/test/files/borders.xml +0 -144
  85. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  86. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  87. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  88. data/test/files/comments.ods +0 -0
  89. data/test/files/comments.xls +0 -0
  90. data/test/files/comments.xlsx +0 -0
  91. data/test/files/csvtypes.csv +0 -1
  92. data/test/files/datetime.ods +0 -0
  93. data/test/files/datetime.xls +0 -0
  94. data/test/files/datetime.xlsx +0 -0
  95. data/test/files/datetime.xml +0 -142
  96. data/test/files/datetime_floatconv.xls +0 -0
  97. data/test/files/datetime_floatconv.xml +0 -148
  98. data/test/files/dreimalvier.ods +0 -0
  99. data/test/files/emptysheets.ods +0 -0
  100. data/test/files/emptysheets.xls +0 -0
  101. data/test/files/emptysheets.xlsx +0 -0
  102. data/test/files/emptysheets.xml +0 -105
  103. data/test/files/excel2003.xml +0 -21140
  104. data/test/files/false_encoding.xls +0 -0
  105. data/test/files/false_encoding.xml +0 -132
  106. data/test/files/file_item_error.xlsx +0 -0
  107. data/test/files/formula.ods +0 -0
  108. data/test/files/formula.xls +0 -0
  109. data/test/files/formula.xlsx +0 -0
  110. data/test/files/formula.xml +0 -134
  111. data/test/files/formula_parse_error.xls +0 -0
  112. data/test/files/formula_parse_error.xml +0 -1833
  113. data/test/files/formula_string_error.xlsx +0 -0
  114. data/test/files/html-escape.ods +0 -0
  115. data/test/files/link.xls +0 -0
  116. data/test/files/link.xlsx +0 -0
  117. data/test/files/matrix.ods +0 -0
  118. data/test/files/matrix.xls +0 -0
  119. data/test/files/named_cells.ods +0 -0
  120. data/test/files/named_cells.xls +0 -0
  121. data/test/files/named_cells.xlsx +0 -0
  122. data/test/files/no_spreadsheet_file.txt +0 -1
  123. data/test/files/numbers1.csv +0 -18
  124. data/test/files/numbers1.ods +0 -0
  125. data/test/files/numbers1.xls +0 -0
  126. data/test/files/numbers1.xlsx +0 -0
  127. data/test/files/numbers1.xml +0 -312
  128. data/test/files/numeric-link.xlsx +0 -0
  129. data/test/files/only_one_sheet.ods +0 -0
  130. data/test/files/only_one_sheet.xls +0 -0
  131. data/test/files/only_one_sheet.xlsx +0 -0
  132. data/test/files/only_one_sheet.xml +0 -67
  133. data/test/files/paragraph.ods +0 -0
  134. data/test/files/paragraph.xls +0 -0
  135. data/test/files/paragraph.xlsx +0 -0
  136. data/test/files/paragraph.xml +0 -127
  137. data/test/files/prova.xls +0 -0
  138. data/test/files/ric.ods +0 -0
  139. data/test/files/simple_spreadsheet.ods +0 -0
  140. data/test/files/simple_spreadsheet.xls +0 -0
  141. data/test/files/simple_spreadsheet.xlsx +0 -0
  142. data/test/files/simple_spreadsheet.xml +0 -225
  143. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  144. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  145. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  146. data/test/files/so_datetime.csv +0 -7
  147. data/test/files/style.ods +0 -0
  148. data/test/files/style.xls +0 -0
  149. data/test/files/style.xlsx +0 -0
  150. data/test/files/style.xml +0 -154
  151. data/test/files/time-test.csv +0 -2
  152. data/test/files/time-test.ods +0 -0
  153. data/test/files/time-test.xls +0 -0
  154. data/test/files/time-test.xlsx +0 -0
  155. data/test/files/time-test.xml +0 -131
  156. data/test/files/type_excel.ods +0 -0
  157. data/test/files/type_excel.xlsx +0 -0
  158. data/test/files/type_excelx.ods +0 -0
  159. data/test/files/type_excelx.xls +0 -0
  160. data/test/files/type_openoffice.xls +0 -0
  161. data/test/files/type_openoffice.xlsx +0 -0
  162. data/test/files/whitespace.ods +0 -0
  163. data/test/files/whitespace.xls +0 -0
  164. data/test/files/whitespace.xlsx +0 -0
  165. data/test/files/whitespace.xml +0 -184
  166. data/test/rm_sub_test.rb +0 -12
  167. data/test/rm_test.rb +0 -7
  168. data/website/index.html +0 -385
  169. data/website/index.txt +0 -423
  170. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  171. data/website/stylesheets/screen.css +0 -130
  172. data/website/template.rhtml +0 -48
data/lib/roo/csv.rb CHANGED
@@ -1,113 +1,120 @@
1
- require 'csv'
2
- require 'time'
3
-
4
- # The CSV class can read csv files (must be separated with commas) which then
5
- # can be handled like spreadsheets. This means you can access cells like A5
6
- # within these files.
7
- # The CSV class provides only string objects. If you want conversions to other
8
- # types you have to do it yourself.
9
- #
10
- # You can pass options to the underlying CSV parse operation, via the
11
- # :csv_options option.
12
- #
13
-
14
- class Roo::CSV < Roo::Base
15
- def initialize(filename, options = {})
16
- super
17
- end
18
-
19
- attr_reader :filename
20
-
21
- # Returns an array with the names of the sheets. In CSV class there is only
22
- # one dummy sheet, because a csv file cannot have more than one sheet.
23
- def sheets
24
- ['default']
25
- end
26
-
27
- def cell(row, col, sheet=nil)
28
- sheet ||= @default_sheet
29
- read_cells(sheet)
30
- @cell[normalize(row,col)]
31
- end
32
-
33
- def celltype(row, col, sheet=nil)
34
- sheet ||= @default_sheet
35
- read_cells(sheet)
36
- @cell_type[normalize(row,col)]
37
- end
38
-
39
- def cell_postprocessing(row,col,value)
40
- value
41
- end
42
-
43
- def csv_options
44
- @options[:csv_options] || {}
45
- end
46
-
47
- private
48
-
49
- TYPE_MAP = {
50
- String => :string,
51
- Float => :float,
52
- Date => :date,
53
- DateTime => :datetime,
54
- }
55
-
56
- def celltype_class(value)
57
- TYPE_MAP[value.class]
58
- end
59
-
60
- def each_row(options, &block)
61
- if uri?(filename)
62
- make_tmpdir do |tmpdir|
63
- tmp_filename = download_uri(filename, tmpdir)
64
- CSV.foreach(tmp_filename, options, &block)
65
- end
66
- else
67
- CSV.foreach(filename, options, &block)
68
- end
69
- end
70
-
71
- def read_cells(sheet=nil)
72
- sheet ||= @default_sheet
73
- return if @cells_read[sheet]
74
- @first_row[sheet] = 1
75
- @last_row[sheet] = 0
76
- @first_column[sheet] = 1
77
- @last_column[sheet] = 1
78
- rownum = 1
79
- each_row csv_options do |row|
80
- row.each_with_index do |elem,i|
81
- @cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
82
- @cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
83
- if i+1 > @last_column[sheet]
84
- @last_column[sheet] += 1
85
- end
86
- end
87
- rownum += 1
88
- @last_row[sheet] += 1
89
- end
90
- @cells_read[sheet] = true
91
- #-- adjust @first_row if neccessary
92
- while !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
93
- @first_row[sheet] += 1
94
- end
95
- #-- adjust @last_row if neccessary
96
- while !row(@last_row[sheet]).any? and @last_row[sheet] and
97
- @last_row[sheet] > @first_row[sheet]
98
- @last_row[sheet] -= 1
99
- end
100
- #-- adjust @first_column if neccessary
101
- while !column(@first_column[sheet]).any? and
102
- @first_column[sheet] and
103
- @first_column[sheet] < @last_column[sheet]
104
- @first_column[sheet] += 1
105
- end
106
- #-- adjust @last_column if neccessary
107
- while !column(@last_column[sheet]).any? and
108
- @last_column[sheet] and
109
- @last_column[sheet] > @first_column[sheet]
110
- @last_column[sheet] -= 1
111
- end
112
- end
113
- end
1
+ require 'csv'
2
+ require 'time'
3
+
4
+ # The CSV class can read csv files (must be separated with commas) which then
5
+ # can be handled like spreadsheets. This means you can access cells like A5
6
+ # within these files.
7
+ # The CSV class provides only string objects. If you want conversions to other
8
+ # types you have to do it yourself.
9
+ #
10
+ # You can pass options to the underlying CSV parse operation, via the
11
+ # :csv_options option.
12
+ #
13
+
14
+ class Roo::CSV < Roo::Base
15
+
16
+ attr_reader :filename
17
+
18
+ # Returns an array with the names of the sheets. In CSV class there is only
19
+ # one dummy sheet, because a csv file cannot have more than one sheet.
20
+ def sheets
21
+ ['default']
22
+ end
23
+
24
+ def cell(row, col, sheet=nil)
25
+ sheet ||= default_sheet
26
+ read_cells(sheet)
27
+ @cell[normalize(row,col)]
28
+ end
29
+
30
+ def celltype(row, col, sheet=nil)
31
+ sheet ||= default_sheet
32
+ read_cells(sheet)
33
+ @cell_type[normalize(row,col)]
34
+ end
35
+
36
+ def cell_postprocessing(row,col,value)
37
+ value
38
+ end
39
+
40
+ def csv_options
41
+ @options[:csv_options] || {}
42
+ end
43
+
44
+ private
45
+
46
+ TYPE_MAP = {
47
+ String => :string,
48
+ Float => :float,
49
+ Date => :date,
50
+ DateTime => :datetime,
51
+ }
52
+
53
+ def celltype_class(value)
54
+ TYPE_MAP[value.class]
55
+ end
56
+
57
+ def each_row(options, &block)
58
+ if uri?(filename)
59
+ make_tmpdir do |tmpdir|
60
+ tmp_filename = download_uri(filename, tmpdir)
61
+ CSV.foreach(tmp_filename, options, &block)
62
+ end
63
+ else
64
+ CSV.foreach(filename, options, &block)
65
+ end
66
+ end
67
+
68
+ def read_cells(sheet = default_sheet)
69
+ sheet ||= default_sheet
70
+ return if @cells_read[sheet]
71
+ @first_row[sheet] = 1
72
+ @last_row[sheet] = 0
73
+ @first_column[sheet] = 1
74
+ @last_column[sheet] = 1
75
+ rownum = 1
76
+ each_row csv_options do |row|
77
+ row.each_with_index do |elem,i|
78
+ @cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
79
+ @cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
80
+ if i+1 > @last_column[sheet]
81
+ @last_column[sheet] += 1
82
+ end
83
+ end
84
+ rownum += 1
85
+ @last_row[sheet] += 1
86
+ end
87
+ @cells_read[sheet] = true
88
+ #-- adjust @first_row if neccessary
89
+ while !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
90
+ @first_row[sheet] += 1
91
+ end
92
+ #-- adjust @last_row if neccessary
93
+ while !row(@last_row[sheet]).any? and @last_row[sheet] and
94
+ @last_row[sheet] > @first_row[sheet]
95
+ @last_row[sheet] -= 1
96
+ end
97
+ #-- adjust @first_column if neccessary
98
+ while !column(@first_column[sheet]).any? and
99
+ @first_column[sheet] and
100
+ @first_column[sheet] < @last_column[sheet]
101
+ @first_column[sheet] += 1
102
+ end
103
+ #-- adjust @last_column if neccessary
104
+ while !column(@last_column[sheet]).any? and
105
+ @last_column[sheet] and
106
+ @last_column[sheet] > @first_column[sheet]
107
+ @last_column[sheet] -= 1
108
+ end
109
+ end
110
+
111
+ def clean_sheet(sheet)
112
+ read_cells(sheet)
113
+
114
+ @cell.each_pair do |coord, value|
115
+ @cell[coord] = sanitize_value(value) if value.is_a?(::String)
116
+ end
117
+
118
+ @cleaned[sheet] = true
119
+ end
120
+ end
@@ -0,0 +1,24 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Comments < Excelx::Extractor
5
+
6
+ def comments
7
+ @comments ||= extract_comments
8
+ end
9
+
10
+ private
11
+
12
+ def extract_comments
13
+ if doc_exists?
14
+ Hash[doc.xpath("//comments/commentList/comment").map do |comment|
15
+ value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
16
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
17
+ end]
18
+ else
19
+ {}
20
+ end
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,20 @@
1
+ module Roo
2
+ class Excelx::Extractor
3
+ def initialize(path)
4
+ @path = path
5
+ end
6
+
7
+ private
8
+
9
+ def doc
10
+ @doc ||=
11
+ if doc_exists?
12
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
13
+ end
14
+ end
15
+
16
+ def doc_exists?
17
+ @path && File.exist?(@path)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Relationships < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @relationships ||= extract_relationships
11
+ end
12
+
13
+ private
14
+
15
+ def extract_relationships
16
+ if doc_exists?
17
+ Hash[doc.xpath("/Relationships/Relationship").map do |rel|
18
+ [rel.attribute('Id').text, rel]
19
+ end]
20
+ else
21
+ []
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,40 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SharedStrings < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @array ||= extract_shared_strings
11
+ end
12
+
13
+ private
14
+
15
+ def extract_shared_strings
16
+ if doc_exists?
17
+ # read the shared strings xml document
18
+ doc.xpath("/sst/si").map do |si|
19
+ shared_string = ''
20
+ si.children.each do |elem|
21
+ case elem.name
22
+ when 'r'
23
+ elem.children.each do |r_elem|
24
+ if r_elem.name == 't'
25
+ shared_string << r_elem.content
26
+ end
27
+ end
28
+ when 't'
29
+ shared_string = elem.content
30
+ end
31
+ end
32
+ shared_string
33
+ end
34
+ else
35
+ []
36
+ end
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,202 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SheetDoc < Excelx::Extractor
5
+ def initialize(path, relationships, styles, shared_strings, workbook, options = {})
6
+ super(path)
7
+ @options = options
8
+ @relationships = relationships
9
+ @styles = styles
10
+ @shared_strings = shared_strings
11
+ @workbook = workbook
12
+ end
13
+
14
+ def cells(relationships)
15
+ @cells ||= extract_cells(relationships)
16
+ end
17
+
18
+ def hyperlinks(relationships)
19
+ @hyperlinks ||= extract_hyperlinks(relationships)
20
+ end
21
+
22
+ # Get the dimensions for the sheet.
23
+ # This is the upper bound of cells that might
24
+ # be parsed. (the document may be sparse so cell count is only upper bound)
25
+ def dimensions
26
+ @dimensions ||= extract_dimensions
27
+ end
28
+
29
+ # Yield each row xml element to caller
30
+ def each_row_streaming(&block)
31
+ Roo::Utils.each_element(@path, 'row', &block)
32
+ end
33
+
34
+ # Yield each cell as Excelx::Cell to caller for given
35
+ # row xml
36
+ def each_cell(row_xml)
37
+ return [] unless row_xml
38
+ row_xml.children.each do |cell_element|
39
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
40
+ yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def cell_from_xml(cell_xml, hyperlink)
47
+ # This is error prone, to_i will silently turn a nil into a 0
48
+ # and it works by coincidence that Format[0] is general
49
+ style = cell_xml['s'].to_i # should be here
50
+ # c: <c r="A5" s="2">
51
+ # <v>22606</v>
52
+ # </c>, format: , tmp_type: float
53
+ value_type =
54
+ case cell_xml['t']
55
+ when 's'
56
+ :shared
57
+ when 'b'
58
+ :boolean
59
+ # 2011-02-25 BEGIN
60
+ when 'str'
61
+ :string
62
+ # 2011-02-25 END
63
+ # 2011-09-15 BEGIN
64
+ when 'inlineStr'
65
+ :inlinestr
66
+ # 2011-09-15 END
67
+ else
68
+ format = @styles.style_format(style)
69
+ Excelx::Format.to_type(format)
70
+ end
71
+ formula = nil
72
+ row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
73
+ cell_xml.children.each do |cell|
74
+ case cell.name
75
+ when 'is'
76
+ cell.children.each do |inline_str|
77
+ if inline_str.name == 't'
78
+ return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
79
+ end
80
+ end
81
+ when 'f'
82
+ formula = cell.content
83
+ when 'v'
84
+ if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
85
+ value_type =
86
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
87
+ :datetime
88
+ else
89
+ :date
90
+ end
91
+ end
92
+ excelx_type = [:numeric_or_formula,format.to_s]
93
+ value =
94
+ case value_type
95
+ when :shared
96
+ value_type = :string
97
+ excelx_type = :string
98
+ @shared_strings[cell.content.to_i]
99
+ when :boolean
100
+ (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
101
+ when :date, :time, :datetime
102
+ cell.content
103
+ when :formula
104
+ cell.content.to_f
105
+ when :string
106
+ excelx_type = :string
107
+ cell.content
108
+ else
109
+ value_type = :float
110
+ cell.content
111
+ end
112
+ return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
113
+ end
114
+ end
115
+ Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
116
+ end
117
+
118
+ def extract_hyperlinks(relationships)
119
+ Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink|
120
+ if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text]
121
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
122
+ end
123
+ end.compact]
124
+ end
125
+
126
+ def expand_merged_ranges(cells)
127
+ # Extract merged ranges from xml
128
+ merges = {}
129
+ doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
130
+ tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
131
+ for row in tl[0]..br[0] do
132
+ for col in tl[1]..br[1] do
133
+ next if row == tl[0] && col == tl[1]
134
+ merges[[row,col]] = tl
135
+ end
136
+ end
137
+ end
138
+ # Duplicate value into all cells in merged range
139
+ merges.each do |dst, src|
140
+ cells[dst] = cells[src]
141
+ end
142
+ end
143
+
144
+ def extract_cells(relationships)
145
+ extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
146
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
147
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
148
+ end]
149
+ if @options[:expand_merged_ranges]
150
+ expand_merged_ranges(extracted_cells)
151
+ end
152
+ extracted_cells
153
+ end
154
+
155
+ def extract_dimensions
156
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
157
+ return dimension.attributes["ref"].value
158
+ end
159
+ end
160
+
161
+ =begin
162
+ Datei xl/comments1.xml
163
+ <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
164
+ <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
165
+ <authors>
166
+ <author />
167
+ </authors>
168
+ <commentList>
169
+ <comment ref="B4" authorId="0">
170
+ <text>
171
+ <r>
172
+ <rPr>
173
+ <sz val="10" />
174
+ <rFont val="Arial" />
175
+ <family val="2" />
176
+ </rPr>
177
+ <t>Kommentar fuer B4</t>
178
+ </r>
179
+ </text>
180
+ </comment>
181
+ <comment ref="B5" authorId="0">
182
+ <text>
183
+ <r>
184
+ <rPr>
185
+ <sz val="10" />
186
+ <rFont val="Arial" />
187
+ <family val="2" />
188
+ </rPr>
189
+ <t>Kommentar fuer B5</t>
190
+ </r>
191
+ </text>
192
+ </comment>
193
+ </commentList>
194
+ </comments>
195
+ =end
196
+ =begin
197
+ if @comments_doc[self.sheets.index(sheet)]
198
+ read_comments(sheet)
199
+ end
200
+ =end
201
+ end
202
+ end
@@ -0,0 +1,62 @@
1
+ require 'roo/font'
2
+ require 'roo/excelx/extractor'
3
+
4
+ module Roo
5
+ class Excelx::Styles < Excelx::Extractor
6
+ # convert internal excelx attribute to a format
7
+ def style_format(style)
8
+ id = num_fmt_ids[style.to_i]
9
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
10
+ end
11
+
12
+ def definitions
13
+ @definitions ||= extract_definitions
14
+ end
15
+
16
+ private
17
+
18
+ def num_fmt_ids
19
+ @num_fmt_ids ||= extract_num_fmt_ids
20
+ end
21
+
22
+ def num_fmts
23
+ @num_fmts ||= extract_num_fmts
24
+ end
25
+
26
+ def fonts
27
+ @fonts ||= extract_fonts
28
+ end
29
+
30
+ def extract_definitions
31
+ doc.xpath("//cellXfs").flat_map do |xfs|
32
+ xfs.children.map do |xf|
33
+ fonts[xf['fontId'].to_i]
34
+ end
35
+ end
36
+ end
37
+
38
+ def extract_fonts
39
+ doc.xpath("//fonts/font").map do |font_el|
40
+ Font.new.tap do |font|
41
+ font.bold = !font_el.xpath('./b').empty?
42
+ font.italic = !font_el.xpath('./i').empty?
43
+ font.underline = !font_el.xpath('./u').empty?
44
+ end
45
+ end
46
+ end
47
+
48
+ def extract_num_fmt_ids
49
+ doc.xpath("//cellXfs").flat_map do |xfs|
50
+ xfs.children.map do |xf|
51
+ xf['numFmtId']
52
+ end
53
+ end
54
+ end
55
+
56
+ def extract_num_fmts
57
+ Hash[doc.xpath("//numFmt").map do |num_fmt|
58
+ [num_fmt['numFmtId'], num_fmt['formatCode']]
59
+ end]
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,59 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Workbook < Excelx::Extractor
5
+ class Label
6
+ attr_reader :sheet, :row, :col, :name
7
+
8
+ def initialize(name, sheet, row, col)
9
+ @name = name
10
+ @sheet = sheet
11
+ @row = row.to_i
12
+ @col = ::Roo::Utils.letter_to_number(col)
13
+ end
14
+
15
+ def key
16
+ [@row, @col]
17
+ end
18
+ end
19
+
20
+ def initialize(path)
21
+ super
22
+ if !doc_exists?
23
+ raise ArgumentError, 'missing required workbook file'
24
+ end
25
+ end
26
+
27
+ def sheets
28
+ doc.xpath("//sheet")
29
+ end
30
+
31
+ # aka labels
32
+ def defined_names
33
+ Hash[doc.xpath("//definedName").map do |defined_name|
34
+ # "Sheet1!$C$5"
35
+ sheet, coordinates = defined_name.text.split('!$', 2)
36
+ col,row = coordinates.split('$')
37
+ name = defined_name['name']
38
+ [name, Label.new(name, sheet,row,col)]
39
+ end]
40
+ end
41
+
42
+ def base_date
43
+ @base_date ||=
44
+ begin
45
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
46
+ # it's set in the Workbook's workbookPr
47
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
48
+ result = Date.new(1899,12,30) # default
49
+ doc.css("workbookPr[date1904]").each do |workbookPr|
50
+ if workbookPr["date1904"] =~ /true|1/i
51
+ result = Date.new(1904,01,01)
52
+ break
53
+ end
54
+ end
55
+ result
56
+ end
57
+ end
58
+ end
59
+ end