culturecode-roo 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. data/.gitignore +7 -0
  2. data/.simplecov +4 -0
  3. data/.travis.yml +13 -0
  4. data/CHANGELOG +438 -0
  5. data/Gemfile +24 -0
  6. data/Guardfile +24 -0
  7. data/LICENSE +22 -0
  8. data/README.md +121 -0
  9. data/Rakefile +23 -0
  10. data/examples/roo_soap_client.rb +50 -0
  11. data/examples/roo_soap_server.rb +26 -0
  12. data/examples/write_me.rb +31 -0
  13. data/lib/roo.rb +28 -0
  14. data/lib/roo/base.rb +717 -0
  15. data/lib/roo/csv.rb +110 -0
  16. data/lib/roo/excelx.rb +542 -0
  17. data/lib/roo/excelx/comments.rb +23 -0
  18. data/lib/roo/excelx/extractor.rb +20 -0
  19. data/lib/roo/excelx/relationships.rb +26 -0
  20. data/lib/roo/excelx/shared_strings.rb +40 -0
  21. data/lib/roo/excelx/sheet_doc.rb +175 -0
  22. data/lib/roo/excelx/styles.rb +62 -0
  23. data/lib/roo/excelx/workbook.rb +59 -0
  24. data/lib/roo/font.rb +17 -0
  25. data/lib/roo/libre_office.rb +5 -0
  26. data/lib/roo/link.rb +15 -0
  27. data/lib/roo/open_office.rb +652 -0
  28. data/lib/roo/spreadsheet.rb +31 -0
  29. data/lib/roo/utils.rb +81 -0
  30. data/lib/roo/version.rb +3 -0
  31. data/roo.gemspec +27 -0
  32. data/scripts/txt2html +67 -0
  33. data/spec/fixtures/vcr_cassettes/google_drive.yml +165 -0
  34. data/spec/fixtures/vcr_cassettes/google_drive_access_token.yml +73 -0
  35. data/spec/fixtures/vcr_cassettes/google_drive_set.yml +857 -0
  36. data/spec/lib/roo/base_spec.rb +4 -0
  37. data/spec/lib/roo/csv_spec.rb +48 -0
  38. data/spec/lib/roo/excelx/format_spec.rb +51 -0
  39. data/spec/lib/roo/excelx_spec.rb +363 -0
  40. data/spec/lib/roo/libreoffice_spec.rb +13 -0
  41. data/spec/lib/roo/openoffice_spec.rb +15 -0
  42. data/spec/lib/roo/spreadsheet_spec.rb +88 -0
  43. data/spec/lib/roo/utils_spec.rb +105 -0
  44. data/spec/spec_helper.rb +9 -0
  45. data/test/all_ss.rb +11 -0
  46. data/test/files/1900_base.xlsx +0 -0
  47. data/test/files/1904_base.xlsx +0 -0
  48. data/test/files/Bibelbund.csv +3741 -0
  49. data/test/files/Bibelbund.ods +0 -0
  50. data/test/files/Bibelbund.xlsx +0 -0
  51. data/test/files/Bibelbund1.ods +0 -0
  52. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  53. data/test/files/advanced_header.ods +0 -0
  54. data/test/files/bbu.ods +0 -0
  55. data/test/files/bbu.xlsx +0 -0
  56. data/test/files/bode-v1.ods.zip +0 -0
  57. data/test/files/bode-v1.xls.zip +0 -0
  58. data/test/files/boolean.csv +2 -0
  59. data/test/files/boolean.ods +0 -0
  60. data/test/files/boolean.xlsx +0 -0
  61. data/test/files/borders.ods +0 -0
  62. data/test/files/borders.xlsx +0 -0
  63. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  64. data/test/files/comments.ods +0 -0
  65. data/test/files/comments.xlsx +0 -0
  66. data/test/files/csvtypes.csv +1 -0
  67. data/test/files/datetime.ods +0 -0
  68. data/test/files/datetime.xlsx +0 -0
  69. data/test/files/dreimalvier.ods +0 -0
  70. data/test/files/emptysheets.ods +0 -0
  71. data/test/files/emptysheets.xlsx +0 -0
  72. data/test/files/encrypted-letmein.ods +0 -0
  73. data/test/files/file_item_error.xlsx +0 -0
  74. data/test/files/formula.ods +0 -0
  75. data/test/files/formula.xlsx +0 -0
  76. data/test/files/formula_string_error.xlsx +0 -0
  77. data/test/files/html-escape.ods +0 -0
  78. data/test/files/link.csv +1 -0
  79. data/test/files/link.xlsx +0 -0
  80. data/test/files/matrix.ods +0 -0
  81. data/test/files/named_cells.ods +0 -0
  82. data/test/files/named_cells.xlsx +0 -0
  83. data/test/files/no_spreadsheet_file.txt +1 -0
  84. data/test/files/numbers-export.xlsx +0 -0
  85. data/test/files/numbers1.csv +18 -0
  86. data/test/files/numbers1.ods +0 -0
  87. data/test/files/numbers1.xlsx +0 -0
  88. data/test/files/numbers1withnull.xlsx +0 -0
  89. data/test/files/numeric-link.xlsx +0 -0
  90. data/test/files/only_one_sheet.ods +0 -0
  91. data/test/files/only_one_sheet.xlsx +0 -0
  92. data/test/files/paragraph.ods +0 -0
  93. data/test/files/paragraph.xlsx +0 -0
  94. data/test/files/ric.ods +0 -0
  95. data/test/files/sheet1.xml +109 -0
  96. data/test/files/simple_spreadsheet.ods +0 -0
  97. data/test/files/simple_spreadsheet.xlsx +0 -0
  98. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  99. data/test/files/so_datetime.csv +8 -0
  100. data/test/files/style.ods +0 -0
  101. data/test/files/style.xlsx +0 -0
  102. data/test/files/time-test.csv +2 -0
  103. data/test/files/time-test.ods +0 -0
  104. data/test/files/time-test.xlsx +0 -0
  105. data/test/files/type_excel.ods +0 -0
  106. data/test/files/type_excel.xlsx +0 -0
  107. data/test/files/type_excelx.ods +0 -0
  108. data/test/files/type_openoffice.xlsx +0 -0
  109. data/test/files/whitespace.ods +0 -0
  110. data/test/files/whitespace.xlsx +0 -0
  111. data/test/test_generic_spreadsheet.rb +211 -0
  112. data/test/test_helper.rb +58 -0
  113. data/test/test_roo.rb +1977 -0
  114. metadata +329 -0
@@ -0,0 +1,23 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Comments < Excelx::Extractor
5
+
6
+ def comments
7
+ @comments ||= extract_comments
8
+ end
9
+
10
+ private
11
+
12
+ def extract_comments
13
+ if doc_exists?
14
+ Hash[doc.xpath("//comments/commentList/comment").map do |comment|
15
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), comment.at_xpath('./text/r/t').text]
16
+ end]
17
+ else
18
+ {}
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,20 @@
1
+ module Roo
2
+ class Excelx::Extractor
3
+ def initialize(path)
4
+ @path = path
5
+ end
6
+
7
+ private
8
+
9
+ def doc
10
+ @doc ||=
11
+ if doc_exists?
12
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
13
+ end
14
+ end
15
+
16
+ def doc_exists?
17
+ @path && File.exist?(@path)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Relationships < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @relationships ||= extract_relationships
11
+ end
12
+
13
+ private
14
+
15
+ def extract_relationships
16
+ if doc_exists?
17
+ Hash[doc.xpath("/Relationships/Relationship").map do |rel|
18
+ [rel.attribute('Id').text, rel]
19
+ end]
20
+ else
21
+ []
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,40 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SharedStrings < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @array ||= extract_shared_strings
11
+ end
12
+
13
+ private
14
+
15
+ def extract_shared_strings
16
+ if doc_exists?
17
+ # read the shared strings xml document
18
+ doc.xpath("/sst/si").map do |si|
19
+ shared_string = ''
20
+ si.children.each do |elem|
21
+ case elem.name
22
+ when 'r'
23
+ elem.children.each do |r_elem|
24
+ if r_elem.name == 't'
25
+ shared_string << r_elem.content
26
+ end
27
+ end
28
+ when 't'
29
+ shared_string = elem.content
30
+ end
31
+ end
32
+ shared_string
33
+ end
34
+ else
35
+ []
36
+ end
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,175 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SheetDoc < Excelx::Extractor
5
+ def initialize(path, relationships, styles, shared_strings, workbook)
6
+ super(path)
7
+ @relationships = relationships
8
+ @styles = styles
9
+ @shared_strings = shared_strings
10
+ @workbook = workbook
11
+ end
12
+
13
+ def cells(relationships)
14
+ @cells ||= extract_cells(relationships)
15
+ end
16
+
17
+ def hyperlinks(relationships)
18
+ @hyperlinks ||= extract_hyperlinks(relationships)
19
+ end
20
+
21
+ # Get the dimensions for the sheet.
22
+ # This is the upper bound of cells that might
23
+ # be parsed. (the document may be sparse so cell count is only upper bound)
24
+ def dimensions
25
+ @dimensions ||= extract_dimensions
26
+ end
27
+
28
+ # Yield each row xml element to caller
29
+ def each_row_streaming(&block)
30
+ Roo::Utils.each_element(@path, 'row', &block)
31
+ end
32
+
33
+ # Yield each cell as Excelx::Cell to caller for given
34
+ # row xml
35
+ def each_cell(row_xml)
36
+ return [] unless row_xml
37
+ row_xml.children.each do |cell_element|
38
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
39
+ yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ def cell_from_xml(cell_xml, hyperlink)
46
+ style = cell_xml['s'].to_i # should be here
47
+ # c: <c r="A5" s="2">
48
+ # <v>22606</v>
49
+ # </c>, format: , tmp_type: float
50
+ value_type =
51
+ case cell_xml['t']
52
+ when 's'
53
+ :shared
54
+ when 'b'
55
+ :boolean
56
+ # 2011-02-25 BEGIN
57
+ when 'str'
58
+ :string
59
+ # 2011-02-25 END
60
+ # 2011-09-15 BEGIN
61
+ when 'inlineStr'
62
+ :inlinestr
63
+ # 2011-09-15 END
64
+ else
65
+ format = @styles.style_format(style)
66
+ Excelx::Format.to_type(format)
67
+ end
68
+ formula = nil
69
+ row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
70
+ cell_xml.children.each do |cell|
71
+ case cell.name
72
+ when 'is'
73
+ cell.children.each do |inline_str|
74
+ if inline_str.name == 't'
75
+ return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
76
+ end
77
+ end
78
+ when 'f'
79
+ formula = cell.content
80
+ when 'v'
81
+ if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
82
+ value_type =
83
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
84
+ :datetime
85
+ else
86
+ :date
87
+ end
88
+ end
89
+ excelx_type = [:numeric_or_formula,format.to_s]
90
+ value =
91
+ case value_type
92
+ when :shared
93
+ value_type = :string
94
+ excelx_type = :string
95
+ @shared_strings[cell.content.to_i]
96
+ when :boolean
97
+ (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
98
+ when :date, :time, :datetime
99
+ cell.content
100
+ when :formula
101
+ cell.content.to_f
102
+ when :string
103
+ excelx_type = :string
104
+ cell.content
105
+ else
106
+ value_type = :float
107
+ cell.content
108
+ end
109
+ return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
110
+ end
111
+ end
112
+ Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
113
+ end
114
+
115
+ def extract_hyperlinks(relationships)
116
+ Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink|
117
+ if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text]
118
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
119
+ end
120
+ end.compact]
121
+ end
122
+
123
+ def extract_cells(relationships)
124
+ Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
125
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
126
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
127
+ end]
128
+ end
129
+
130
+ def extract_dimensions
131
+ doc.xpath("/worksheet/dimension").map { |dim| dim.attributes["ref"].value }.first
132
+ end
133
+
134
+ =begin
135
+ Datei xl/comments1.xml
136
+ <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
137
+ <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
138
+ <authors>
139
+ <author />
140
+ </authors>
141
+ <commentList>
142
+ <comment ref="B4" authorId="0">
143
+ <text>
144
+ <r>
145
+ <rPr>
146
+ <sz val="10" />
147
+ <rFont val="Arial" />
148
+ <family val="2" />
149
+ </rPr>
150
+ <t>Kommentar fuer B4</t>
151
+ </r>
152
+ </text>
153
+ </comment>
154
+ <comment ref="B5" authorId="0">
155
+ <text>
156
+ <r>
157
+ <rPr>
158
+ <sz val="10" />
159
+ <rFont val="Arial" />
160
+ <family val="2" />
161
+ </rPr>
162
+ <t>Kommentar fuer B5</t>
163
+ </r>
164
+ </text>
165
+ </comment>
166
+ </commentList>
167
+ </comments>
168
+ =end
169
+ =begin
170
+ if @comments_doc[self.sheets.index(sheet)]
171
+ read_comments(sheet)
172
+ end
173
+ =end
174
+ end
175
+ end
@@ -0,0 +1,62 @@
1
+ require 'roo/font'
2
+ require 'roo/excelx/extractor'
3
+
4
+ module Roo
5
+ class Excelx::Styles < Excelx::Extractor
6
+ # convert internal excelx attribute to a format
7
+ def style_format(style)
8
+ id = num_fmt_ids[style.to_i]
9
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
10
+ end
11
+
12
+ def definitions
13
+ @definitions ||= extract_definitions
14
+ end
15
+
16
+ private
17
+
18
+ def num_fmt_ids
19
+ @num_fmt_ids ||= extract_num_fmt_ids
20
+ end
21
+
22
+ def num_fmts
23
+ @num_fmts ||= extract_num_fmts
24
+ end
25
+
26
+ def fonts
27
+ @fonts ||= extract_fonts
28
+ end
29
+
30
+ def extract_definitions
31
+ doc.xpath("//cellXfs").flat_map do |xfs|
32
+ xfs.children.map do |xf|
33
+ fonts[xf['fontId'].to_i]
34
+ end
35
+ end
36
+ end
37
+
38
+ def extract_fonts
39
+ doc.xpath("//fonts/font").map do |font_el|
40
+ Font.new.tap do |font|
41
+ font.bold = !font_el.xpath('./b').empty?
42
+ font.italic = !font_el.xpath('./i').empty?
43
+ font.underline = !font_el.xpath('./u').empty?
44
+ end
45
+ end
46
+ end
47
+
48
+ def extract_num_fmt_ids
49
+ doc.xpath("//cellXfs").flat_map do |xfs|
50
+ xfs.children.map do |xf|
51
+ xf['numFmtId']
52
+ end
53
+ end
54
+ end
55
+
56
+ def extract_num_fmts
57
+ Hash[doc.xpath("//numFmt").map do |num_fmt|
58
+ [num_fmt['numFmtId'], num_fmt['formatCode']]
59
+ end]
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,59 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Workbook < Excelx::Extractor
5
+ class Label
6
+ attr_reader :sheet, :row, :col, :name
7
+
8
+ def initialize(name, sheet, row, col)
9
+ @name = name
10
+ @sheet = sheet
11
+ @row = row.to_i
12
+ @col = ::Roo::Utils.letter_to_number(col)
13
+ end
14
+
15
+ def key
16
+ [@row, @col]
17
+ end
18
+ end
19
+
20
+ def initialize(path)
21
+ super
22
+ if !doc_exists?
23
+ raise ArgumentError, 'missing required workbook file'
24
+ end
25
+ end
26
+
27
+ def sheets
28
+ doc.xpath("//sheet")
29
+ end
30
+
31
+ # aka labels
32
+ def defined_names
33
+ Hash[doc.xpath("//definedName").map do |defined_name|
34
+ # "Sheet1!$C$5"
35
+ sheet, coordinates = defined_name.text.split('!$', 2)
36
+ col,row = coordinates.split('$')
37
+ name = defined_name['name']
38
+ [name, Label.new(name, sheet,row,col)]
39
+ end]
40
+ end
41
+
42
+ def base_date
43
+ @base_date ||=
44
+ begin
45
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
46
+ # it's set in the Workbook's workbookPr
47
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
48
+ result = Date.new(1899,12,30) # default
49
+ doc.css("workbookPr[date1904]").each do |workbookPr|
50
+ if workbookPr["date1904"] =~ /true|1/i
51
+ result = Date.new(1904,01,01)
52
+ break
53
+ end
54
+ end
55
+ result
56
+ end
57
+ end
58
+ end
59
+ end
data/lib/roo/font.rb ADDED
@@ -0,0 +1,17 @@
1
+ module Roo
2
+ class Font
3
+ attr_accessor :bold, :italic, :underline
4
+
5
+ def bold?
6
+ @bold
7
+ end
8
+
9
+ def italic?
10
+ @italic
11
+ end
12
+
13
+ def underline?
14
+ @underline
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,5 @@
1
+ require 'roo/open_office'
2
+
3
+ # LibreOffice is just an alias for Roo::OpenOffice class
4
+ class Roo::LibreOffice < Roo::OpenOffice
5
+ end
data/lib/roo/link.rb ADDED
@@ -0,0 +1,15 @@
1
+ module Roo
2
+ class Link < String
3
+ attr_reader :href
4
+ alias :url :href
5
+
6
+ def initialize(href='', text=href)
7
+ super(text)
8
+ @href = href
9
+ end
10
+
11
+ def to_uri
12
+ URI.parse href
13
+ end
14
+ end
15
+ end