roo-andyw8 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG +438 -0
  6. data/Gemfile +24 -0
  7. data/Guardfile +24 -0
  8. data/LICENSE +22 -0
  9. data/README.md +121 -0
  10. data/Rakefile +23 -0
  11. data/examples/roo_soap_client.rb +50 -0
  12. data/examples/roo_soap_server.rb +26 -0
  13. data/examples/write_me.rb +31 -0
  14. data/lib/roo.rb +28 -0
  15. data/lib/roo/base.rb +717 -0
  16. data/lib/roo/csv.rb +110 -0
  17. data/lib/roo/excelx.rb +540 -0
  18. data/lib/roo/excelx/comments.rb +23 -0
  19. data/lib/roo/excelx/extractor.rb +20 -0
  20. data/lib/roo/excelx/relationships.rb +26 -0
  21. data/lib/roo/excelx/shared_strings.rb +40 -0
  22. data/lib/roo/excelx/sheet_doc.rb +175 -0
  23. data/lib/roo/excelx/styles.rb +62 -0
  24. data/lib/roo/excelx/workbook.rb +59 -0
  25. data/lib/roo/font.rb +17 -0
  26. data/lib/roo/libre_office.rb +5 -0
  27. data/lib/roo/link.rb +15 -0
  28. data/lib/roo/open_office.rb +652 -0
  29. data/lib/roo/spreadsheet.rb +31 -0
  30. data/lib/roo/utils.rb +81 -0
  31. data/lib/roo/version.rb +3 -0
  32. data/roo.gemspec +27 -0
  33. data/scripts/txt2html +67 -0
  34. data/spec/fixtures/vcr_cassettes/google_drive.yml +165 -0
  35. data/spec/fixtures/vcr_cassettes/google_drive_access_token.yml +73 -0
  36. data/spec/fixtures/vcr_cassettes/google_drive_set.yml +857 -0
  37. data/spec/lib/roo/base_spec.rb +4 -0
  38. data/spec/lib/roo/csv_spec.rb +48 -0
  39. data/spec/lib/roo/excelx/format_spec.rb +51 -0
  40. data/spec/lib/roo/excelx_spec.rb +363 -0
  41. data/spec/lib/roo/libreoffice_spec.rb +13 -0
  42. data/spec/lib/roo/openoffice_spec.rb +15 -0
  43. data/spec/lib/roo/spreadsheet_spec.rb +88 -0
  44. data/spec/lib/roo/utils_spec.rb +105 -0
  45. data/spec/spec_helper.rb +9 -0
  46. data/test/all_ss.rb +11 -0
  47. data/test/files/1900_base.xlsx +0 -0
  48. data/test/files/1904_base.xlsx +0 -0
  49. data/test/files/Bibelbund.csv +3741 -0
  50. data/test/files/Bibelbund.ods +0 -0
  51. data/test/files/Bibelbund.xlsx +0 -0
  52. data/test/files/Bibelbund1.ods +0 -0
  53. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  54. data/test/files/advanced_header.ods +0 -0
  55. data/test/files/bbu.ods +0 -0
  56. data/test/files/bbu.xlsx +0 -0
  57. data/test/files/bode-v1.ods.zip +0 -0
  58. data/test/files/bode-v1.xls.zip +0 -0
  59. data/test/files/boolean.csv +2 -0
  60. data/test/files/boolean.ods +0 -0
  61. data/test/files/boolean.xlsx +0 -0
  62. data/test/files/borders.ods +0 -0
  63. data/test/files/borders.xlsx +0 -0
  64. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  65. data/test/files/comments.ods +0 -0
  66. data/test/files/comments.xlsx +0 -0
  67. data/test/files/csvtypes.csv +1 -0
  68. data/test/files/datetime.ods +0 -0
  69. data/test/files/datetime.xlsx +0 -0
  70. data/test/files/dreimalvier.ods +0 -0
  71. data/test/files/emptysheets.ods +0 -0
  72. data/test/files/emptysheets.xlsx +0 -0
  73. data/test/files/encrypted-letmein.ods +0 -0
  74. data/test/files/file_item_error.xlsx +0 -0
  75. data/test/files/formula.ods +0 -0
  76. data/test/files/formula.xlsx +0 -0
  77. data/test/files/formula_string_error.xlsx +0 -0
  78. data/test/files/html-escape.ods +0 -0
  79. data/test/files/link.csv +1 -0
  80. data/test/files/link.xlsx +0 -0
  81. data/test/files/matrix.ods +0 -0
  82. data/test/files/named_cells.ods +0 -0
  83. data/test/files/named_cells.xlsx +0 -0
  84. data/test/files/no_spreadsheet_file.txt +1 -0
  85. data/test/files/numbers-export.xlsx +0 -0
  86. data/test/files/numbers1.csv +18 -0
  87. data/test/files/numbers1.ods +0 -0
  88. data/test/files/numbers1.xlsx +0 -0
  89. data/test/files/numbers1withnull.xlsx +0 -0
  90. data/test/files/numeric-link.xlsx +0 -0
  91. data/test/files/only_one_sheet.ods +0 -0
  92. data/test/files/only_one_sheet.xlsx +0 -0
  93. data/test/files/paragraph.ods +0 -0
  94. data/test/files/paragraph.xlsx +0 -0
  95. data/test/files/ric.ods +0 -0
  96. data/test/files/sheet1.xml +109 -0
  97. data/test/files/simple_spreadsheet.ods +0 -0
  98. data/test/files/simple_spreadsheet.xlsx +0 -0
  99. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  100. data/test/files/so_datetime.csv +8 -0
  101. data/test/files/style.ods +0 -0
  102. data/test/files/style.xlsx +0 -0
  103. data/test/files/time-test.csv +2 -0
  104. data/test/files/time-test.ods +0 -0
  105. data/test/files/time-test.xlsx +0 -0
  106. data/test/files/type_excel.ods +0 -0
  107. data/test/files/type_excel.xlsx +0 -0
  108. data/test/files/type_excelx.ods +0 -0
  109. data/test/files/type_openoffice.xlsx +0 -0
  110. data/test/files/whitespace.ods +0 -0
  111. data/test/files/whitespace.xlsx +0 -0
  112. data/test/test_generic_spreadsheet.rb +211 -0
  113. data/test/test_helper.rb +58 -0
  114. data/test/test_roo.rb +1977 -0
  115. metadata +318 -0
@@ -0,0 +1,23 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Comments < Excelx::Extractor
5
+
6
+ def comments
7
+ @comments ||= extract_comments
8
+ end
9
+
10
+ private
11
+
12
+ def extract_comments
13
+ if doc_exists?
14
+ Hash[doc.xpath("//comments/commentList/comment").map do |comment|
15
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), comment.at_xpath('./text/r/t').text]
16
+ end]
17
+ else
18
+ {}
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,20 @@
1
+ module Roo
2
+ class Excelx::Extractor
3
+ def initialize(path)
4
+ @path = path
5
+ end
6
+
7
+ private
8
+
9
+ def doc
10
+ @doc ||=
11
+ if doc_exists?
12
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
13
+ end
14
+ end
15
+
16
+ def doc_exists?
17
+ @path && File.exist?(@path)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Relationships < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @relationships ||= extract_relationships
11
+ end
12
+
13
+ private
14
+
15
+ def extract_relationships
16
+ if doc_exists?
17
+ Hash[doc.xpath("/Relationships/Relationship").map do |rel|
18
+ [rel.attribute('Id').text, rel]
19
+ end]
20
+ else
21
+ []
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,40 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SharedStrings < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @array ||= extract_shared_strings
11
+ end
12
+
13
+ private
14
+
15
+ def extract_shared_strings
16
+ if doc_exists?
17
+ # read the shared strings xml document
18
+ doc.xpath("/sst/si").map do |si|
19
+ shared_string = ''
20
+ si.children.each do |elem|
21
+ case elem.name
22
+ when 'r'
23
+ elem.children.each do |r_elem|
24
+ if r_elem.name == 't'
25
+ shared_string << r_elem.content
26
+ end
27
+ end
28
+ when 't'
29
+ shared_string = elem.content
30
+ end
31
+ end
32
+ shared_string
33
+ end
34
+ else
35
+ []
36
+ end
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,175 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SheetDoc < Excelx::Extractor
5
+ def initialize(path, relationships, styles, shared_strings, workbook)
6
+ super(path)
7
+ @relationships = relationships
8
+ @styles = styles
9
+ @shared_strings = shared_strings
10
+ @workbook = workbook
11
+ end
12
+
13
+ def cells(relationships)
14
+ @cells ||= extract_cells(relationships)
15
+ end
16
+
17
+ def hyperlinks(relationships)
18
+ @hyperlinks ||= extract_hyperlinks(relationships)
19
+ end
20
+
21
+ # Get the dimensions for the sheet.
22
+ # This is the upper bound of cells that might
23
+ # be parsed. (the document may be sparse so cell count is only upper bound)
24
+ def dimensions
25
+ @dimensions ||= extract_dimensions
26
+ end
27
+
28
+ # Yield each row xml element to caller
29
+ def each_row_streaming(&block)
30
+ Roo::Utils.each_element(@path, 'row', &block)
31
+ end
32
+
33
+ # Yield each cell as Excelx::Cell to caller for given
34
+ # row xml
35
+ def each_cell(row_xml)
36
+ return [] unless row_xml
37
+ row_xml.children.each do |cell_element|
38
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
39
+ yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ def cell_from_xml(cell_xml, hyperlink)
46
+ style = cell_xml['s'].to_i # should be here
47
+ # c: <c r="A5" s="2">
48
+ # <v>22606</v>
49
+ # </c>, format: , tmp_type: float
50
+ value_type =
51
+ case cell_xml['t']
52
+ when 's'
53
+ :shared
54
+ when 'b'
55
+ :boolean
56
+ # 2011-02-25 BEGIN
57
+ when 'str'
58
+ :string
59
+ # 2011-02-25 END
60
+ # 2011-09-15 BEGIN
61
+ when 'inlineStr'
62
+ :inlinestr
63
+ # 2011-09-15 END
64
+ else
65
+ format = @styles.style_format(style)
66
+ Excelx::Format.to_type(format)
67
+ end
68
+ formula = nil
69
+ row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
70
+ cell_xml.children.each do |cell|
71
+ case cell.name
72
+ when 'is'
73
+ cell.children.each do |inline_str|
74
+ if inline_str.name == 't'
75
+ return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
76
+ end
77
+ end
78
+ when 'f'
79
+ formula = cell.content
80
+ when 'v'
81
+ if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
82
+ value_type =
83
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
84
+ :datetime
85
+ else
86
+ :date
87
+ end
88
+ end
89
+ excelx_type = [:numeric_or_formula,format.to_s]
90
+ value =
91
+ case value_type
92
+ when :shared
93
+ value_type = :string
94
+ excelx_type = :string
95
+ @shared_strings[cell.content.to_i]
96
+ when :boolean
97
+ (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
98
+ when :date, :time, :datetime
99
+ cell.content
100
+ when :formula
101
+ cell.content.to_f
102
+ when :string
103
+ excelx_type = :string
104
+ cell.content
105
+ else
106
+ value_type = :float
107
+ cell.content
108
+ end
109
+ return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
110
+ end
111
+ end
112
+ Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
113
+ end
114
+
115
+ def extract_hyperlinks(relationships)
116
+ Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink|
117
+ if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text]
118
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
119
+ end
120
+ end.compact]
121
+ end
122
+
123
+ def extract_cells(relationships)
124
+ Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
125
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
126
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
127
+ end]
128
+ end
129
+
130
+ def extract_dimensions
131
+ doc.xpath("/worksheet/dimension").map { |dim| dim.attributes["ref"].value }.first
132
+ end
133
+
134
+ =begin
135
+ Datei xl/comments1.xml
136
+ <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
137
+ <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
138
+ <authors>
139
+ <author />
140
+ </authors>
141
+ <commentList>
142
+ <comment ref="B4" authorId="0">
143
+ <text>
144
+ <r>
145
+ <rPr>
146
+ <sz val="10" />
147
+ <rFont val="Arial" />
148
+ <family val="2" />
149
+ </rPr>
150
+ <t>Kommentar fuer B4</t>
151
+ </r>
152
+ </text>
153
+ </comment>
154
+ <comment ref="B5" authorId="0">
155
+ <text>
156
+ <r>
157
+ <rPr>
158
+ <sz val="10" />
159
+ <rFont val="Arial" />
160
+ <family val="2" />
161
+ </rPr>
162
+ <t>Kommentar fuer B5</t>
163
+ </r>
164
+ </text>
165
+ </comment>
166
+ </commentList>
167
+ </comments>
168
+ =end
169
+ =begin
170
+ if @comments_doc[self.sheets.index(sheet)]
171
+ read_comments(sheet)
172
+ end
173
+ =end
174
+ end
175
+ end
@@ -0,0 +1,62 @@
1
+ require 'roo/font'
2
+ require 'roo/excelx/extractor'
3
+
4
+ module Roo
5
+ class Excelx::Styles < Excelx::Extractor
6
+ # convert internal excelx attribute to a format
7
+ def style_format(style)
8
+ id = num_fmt_ids[style.to_i]
9
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
10
+ end
11
+
12
+ def definitions
13
+ @definitions ||= extract_definitions
14
+ end
15
+
16
+ private
17
+
18
+ def num_fmt_ids
19
+ @num_fmt_ids ||= extract_num_fmt_ids
20
+ end
21
+
22
+ def num_fmts
23
+ @num_fmts ||= extract_num_fmts
24
+ end
25
+
26
+ def fonts
27
+ @fonts ||= extract_fonts
28
+ end
29
+
30
+ def extract_definitions
31
+ doc.xpath("//cellXfs").flat_map do |xfs|
32
+ xfs.children.map do |xf|
33
+ fonts[xf['fontId'].to_i]
34
+ end
35
+ end
36
+ end
37
+
38
+ def extract_fonts
39
+ doc.xpath("//fonts/font").map do |font_el|
40
+ Font.new.tap do |font|
41
+ font.bold = !font_el.xpath('./b').empty?
42
+ font.italic = !font_el.xpath('./i').empty?
43
+ font.underline = !font_el.xpath('./u').empty?
44
+ end
45
+ end
46
+ end
47
+
48
+ def extract_num_fmt_ids
49
+ doc.xpath("//cellXfs").flat_map do |xfs|
50
+ xfs.children.map do |xf|
51
+ xf['numFmtId']
52
+ end
53
+ end
54
+ end
55
+
56
+ def extract_num_fmts
57
+ Hash[doc.xpath("//numFmt").map do |num_fmt|
58
+ [num_fmt['numFmtId'], num_fmt['formatCode']]
59
+ end]
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,59 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Workbook < Excelx::Extractor
5
+ class Label
6
+ attr_reader :sheet, :row, :col, :name
7
+
8
+ def initialize(name, sheet, row, col)
9
+ @name = name
10
+ @sheet = sheet
11
+ @row = row.to_i
12
+ @col = ::Roo::Utils.letter_to_number(col)
13
+ end
14
+
15
+ def key
16
+ [@row, @col]
17
+ end
18
+ end
19
+
20
+ def initialize(path)
21
+ super
22
+ if !doc_exists?
23
+ raise ArgumentError, 'missing required workbook file'
24
+ end
25
+ end
26
+
27
+ def sheets
28
+ doc.xpath("//sheet")
29
+ end
30
+
31
+ # aka labels
32
+ def defined_names
33
+ Hash[doc.xpath("//definedName").map do |defined_name|
34
+ # "Sheet1!$C$5"
35
+ sheet, coordinates = defined_name.text.split('!$', 2)
36
+ col,row = coordinates.split('$')
37
+ name = defined_name['name']
38
+ [name, Label.new(name, sheet,row,col)]
39
+ end]
40
+ end
41
+
42
+ def base_date
43
+ @base_date ||=
44
+ begin
45
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
46
+ # it's set in the Workbook's workbookPr
47
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
48
+ result = Date.new(1899,12,30) # default
49
+ doc.css("workbookPr[date1904]").each do |workbookPr|
50
+ if workbookPr["date1904"] =~ /true|1/i
51
+ result = Date.new(1904,01,01)
52
+ break
53
+ end
54
+ end
55
+ result
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,17 @@
1
+ module Roo
2
+ class Font
3
+ attr_accessor :bold, :italic, :underline
4
+
5
+ def bold?
6
+ @bold
7
+ end
8
+
9
+ def italic?
10
+ @italic
11
+ end
12
+
13
+ def underline?
14
+ @underline
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,5 @@
1
+ require 'roo/open_office'
2
+
3
+ # LibreOffice is just an alias for Roo::OpenOffice class
4
+ class Roo::LibreOffice < Roo::OpenOffice
5
+ end
@@ -0,0 +1,15 @@
1
+ module Roo
2
+ class Link < String
3
+ attr_reader :href
4
+ alias :url :href
5
+
6
+ def initialize(href='', text=href)
7
+ super(text)
8
+ @href = href
9
+ end
10
+
11
+ def to_uri
12
+ URI.parse href
13
+ end
14
+ end
15
+ end