excelxml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +75 -0
- data/Rakefile +8 -0
- data/excelxml.gemspec +25 -0
- data/lib/excelxml.rb +3 -0
- data/lib/excelxml/cell.rb +22 -0
- data/lib/excelxml/row.rb +12 -0
- data/lib/excelxml/table.rb +13 -0
- data/lib/excelxml/version.rb +3 -0
- data/lib/excelxml/workbook.rb +56 -0
- data/lib/excelxml/worksheet.rb +98 -0
- data/test/fixtures/workbook.xml +143 -0
- data/test/minitest_helper.rb +10 -0
- data/test/test_excelxml.rb +64 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 17db4ac7fc2a4c0422b43f0b71973218f34f037c
|
4
|
+
data.tar.gz: 505a1585dbc4fdfba90b3118aaa737da7c8e11d0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 123a484dc0b37f13b258de4890fd822b0d49d1c7c726a7c2cedafdbdc7462d3d3978417fef5d71b22bef7f64756a940d3d1da463b753ad767ae8f057044f811c
|
7
|
+
data.tar.gz: 4a2a51b41ba21be68d8b910654a5474fbc35fb52c2775460067db8c9f4bd678764f5bf929f247abd1857e85e7a70bf1246c3364b9819d7576b2766c45c2ab4a5
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Benjamin Delsol
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
# ExcelXml
|
2
|
+
|
3
|
+
ExcelXml can parse the data out of an Excel XML Spreadsheet 2003.
|
4
|
+
|
5
|
+
See the Usage section below to get an idea of how it works.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'excelxml'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install excelxml
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
# First step is to inherit from a worksheet parser.
|
25
|
+
class PersonParser < ExcelXml::Worksheet::Parser
|
26
|
+
NAME = /name/i
|
27
|
+
CITY = /city/i
|
28
|
+
STATE = /state/i
|
29
|
+
AGE = /age/i
|
30
|
+
|
31
|
+
class Person < Struct.new(:name, :city, :state, :age); end
|
32
|
+
|
33
|
+
# Override the #mandator_columns() method to auto discover your worksheets header row.
|
34
|
+
# #is_header? could alternatively be overridden.
|
35
|
+
def mandatory_columns
|
36
|
+
[NAME, CITY]
|
37
|
+
end
|
38
|
+
|
39
|
+
# A rows accessor will be available to your parser so that you can iterate through worksheet data.
|
40
|
+
def persons
|
41
|
+
@persons ||= rows.collect do |fields|
|
42
|
+
Person.new(fields[NAME], fields[CITY], fields[STATE], fields[AGE])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
my_parsers = ExcelXml::Workbook::Parser.new(File.read(workbook), worksheet_parsers: PersonParser)
|
48
|
+
|
49
|
+
# You'll get a PersonParser instance for each worksheet that matched PersonParser#mandatory_columns
|
50
|
+
# or PersonParser#is_header?.
|
51
|
+
my_parsers[PersonParser].length # => 1
|
52
|
+
person_parser = my_parsers[PersonParser].first # => #<PersonParser:0x00555555f42ab8>
|
53
|
+
|
54
|
+
person_parser.worksheet.name # => "PersonSheet"
|
55
|
+
person_parser.persons.each do |person|
|
56
|
+
puts "#{person.name} lives in #{person.city}."
|
57
|
+
end
|
58
|
+
|
59
|
+
# Worksheets that did not match a parsers #mandatory_columns or #is_header? end up here.
|
60
|
+
raw_worksheet = my_parsers.unidentified_worksheets.first # => #<ExcelXml::Worksheet:0x00555555f43841>
|
61
|
+
raw_worksheet.name # => "NoHeaderSheet"
|
62
|
+
|
63
|
+
# ExcelXml::Worksheet#rows is a 2-dimensional array of string representing your worksheet.
|
64
|
+
raw_worksheet.rows.each do |raw_row|
|
65
|
+
raw_row.each {|raw_data| puts raw_data }
|
66
|
+
end
|
67
|
+
```
|
68
|
+
|
69
|
+
## Contributing
|
70
|
+
|
71
|
+
1. Fork it ( http://github.com/bdiz/excelxml/fork )
|
72
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
73
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
74
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
75
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/excelxml.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'excelxml/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "excelxml"
|
8
|
+
spec.version = ExcelXml::VERSION
|
9
|
+
spec.authors = ["Ben Delsol"]
|
10
|
+
spec.email = [] # contact me via github (username: bdiz)
|
11
|
+
spec.summary = %q{Parses the data out of Excel XML 2003 workbooks/sheets.}
|
12
|
+
spec.description = %q{}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "happymapper"
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
end
|
data/lib/excelxml.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
require 'happymapper'
|
3
|
+
|
4
|
+
module ExcelXml
|
5
|
+
class Cell
|
6
|
+
include HappyMapper
|
7
|
+
tag "Cell"
|
8
|
+
element :data, String, tag: "Data", single: true
|
9
|
+
attribute :index, Integer, tag: "Index"
|
10
|
+
attribute :merge_down, Integer, tag: "MergeDown"
|
11
|
+
attribute :merge_across, Integer, tag: "MergeAcross"
|
12
|
+
alias_method :orig_merge_down, :merge_down
|
13
|
+
alias_method :orig_merge_across, :merge_across
|
14
|
+
def merge_down
|
15
|
+
orig_merge_down || 0
|
16
|
+
end
|
17
|
+
def merge_across
|
18
|
+
orig_merge_across || 0
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
data/lib/excelxml/row.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
|
2
|
+
require 'excelxml/worksheet'
|
3
|
+
|
4
|
+
module ExcelXml
|
5
|
+
|
6
|
+
class Workbook
|
7
|
+
|
8
|
+
include HappyMapper
|
9
|
+
tag "Workbook"
|
10
|
+
has_many :worksheets, Worksheet
|
11
|
+
|
12
|
+
class Parser
|
13
|
+
attr_reader :unidentified_worksheets
|
14
|
+
def initialize workbook_xml, opts={}
|
15
|
+
only_these_worksheets = [opts.delete(:only_these_worksheets)].flatten.compact if opts[:only_these_worksheets]
|
16
|
+
@worksheet_parser_classes = [opts.delete(:worksheet_parsers)].flatten.compact
|
17
|
+
@worksheet_parser_hash = @worksheet_parser_classes.each_with_object({}) {|wspc, hsh| hsh[wspc] = [] }
|
18
|
+
raise ArgumentError, "unknown options #{opts.keys.inspect}" unless opts.empty?
|
19
|
+
@unidentified_worksheets = []
|
20
|
+
ExcelXml::Workbook.parse(workbook_xml, single: true).worksheets.each do |worksheet|
|
21
|
+
next if only_these_worksheets and !only_these_worksheets.include?(worksheet.name)
|
22
|
+
worksheet_identified = false
|
23
|
+
worksheet.rows.each_with_index do |row, row_idx|
|
24
|
+
worksheet_identifiers.each do |wsp|
|
25
|
+
if wsp.is_header? (row_idx+1), row
|
26
|
+
add_worksheet_parser(wsp.class, worksheet, row_idx)
|
27
|
+
worksheet_identified = true
|
28
|
+
break
|
29
|
+
end
|
30
|
+
end
|
31
|
+
break if worksheet_identified
|
32
|
+
end unless @worksheet_parser_classes.empty?
|
33
|
+
@unidentified_worksheets << worksheet unless worksheet_identified
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def [] worksheet_parser_class
|
38
|
+
@worksheet_parser_hash[worksheet_parser_class]
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def add_worksheet_parser worksheet_parser_class, worksheet, header_row_idx
|
44
|
+
@worksheet_parser_hash[worksheet_parser_class] << worksheet_parser_class.new(worksheet)
|
45
|
+
@worksheet_parser_hash[worksheet_parser_class].last.header_row_index = header_row_idx
|
46
|
+
end
|
47
|
+
|
48
|
+
def worksheet_identifiers
|
49
|
+
@worksheet_identifiers ||= @worksheet_parser_classes.collect {|wspc| wspc.new(nil) }
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
@@ -0,0 +1,98 @@
|
|
1
|
+
|
2
|
+
require 'excelxml/table'
|
3
|
+
|
4
|
+
module ExcelXml
|
5
|
+
|
6
|
+
class Worksheet
|
7
|
+
|
8
|
+
include HappyMapper
|
9
|
+
tag "Worksheet"
|
10
|
+
attribute :name, String, tag: "Name"
|
11
|
+
element :table, Table, :single => true
|
12
|
+
|
13
|
+
def rows
|
14
|
+
@grid ||= begin
|
15
|
+
grid = Array.new(table.row_count) { Array.new(table.column_count) }
|
16
|
+
grid_row_idx = -1
|
17
|
+
table.rows.each_with_index do |row, row_idx|
|
18
|
+
grid_row_idx = row.index ? row.index - 1 : grid_row_idx + 1
|
19
|
+
grid_col_idx = -1
|
20
|
+
row.cells.each_with_index do |cell, cell_idx|
|
21
|
+
grid_col_idx = cell.index ? cell.index - 1 : grid_col_idx + 1
|
22
|
+
(0..cell.merge_down).each do |down|
|
23
|
+
(0..cell.merge_across).each do |across|
|
24
|
+
grid[grid_row_idx+down][grid_col_idx+across] = cell.data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
grid_col_idx += cell.merge_across
|
28
|
+
end
|
29
|
+
end
|
30
|
+
grid
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class Parser
|
35
|
+
|
36
|
+
attr_reader :worksheet
|
37
|
+
attr_accessor :header_row_index
|
38
|
+
|
39
|
+
def initialize worksheet
|
40
|
+
@worksheet = worksheet
|
41
|
+
end
|
42
|
+
|
43
|
+
def rows
|
44
|
+
@rows ||= begin
|
45
|
+
@worksheet.rows[(header_row_index+1)..-1].collect do |fields|
|
46
|
+
Fields.new(fields, index_to_header_map)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def index_to_header_map
|
52
|
+
@index_to_header_map ||= @worksheet.rows[header_row_index].collect {|h| h ? h.strip : nil }
|
53
|
+
end
|
54
|
+
|
55
|
+
########################################
|
56
|
+
# Override methods
|
57
|
+
########################################
|
58
|
+
|
59
|
+
def mandatory_columns
|
60
|
+
[]
|
61
|
+
end
|
62
|
+
|
63
|
+
def is_header? row_number, fields
|
64
|
+
return true if mandatory_columns.all? {|mc| fields.any? {|f| next unless f; f.match mc } }
|
65
|
+
return false
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
class Fields < Array
|
72
|
+
def initialize *args
|
73
|
+
super(*args[0..-2])
|
74
|
+
@index_to_header_map = args.last
|
75
|
+
end
|
76
|
+
def [] regexp
|
77
|
+
return super unless regexp.is_a? Regexp
|
78
|
+
idx = @index_to_header_map.find_index {|e| e.match regexp }
|
79
|
+
raise "#{regexp.inspect} not found in #{@index_to_header_map.inspect}." if idx.nil?
|
80
|
+
super(idx).extend Field
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
module Field
|
85
|
+
def fixnum?
|
86
|
+
return Integer(self)
|
87
|
+
rescue
|
88
|
+
return false
|
89
|
+
end
|
90
|
+
def content?
|
91
|
+
return (self.is_a?(String) and !self.empty?)
|
92
|
+
end
|
93
|
+
def string?
|
94
|
+
return (self.content? and !self.fixnum?)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<?mso-application progid="Excel.Sheet"?>
|
3
|
+
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
|
4
|
+
xmlns:o="urn:schemas-microsoft-com:office:office"
|
5
|
+
xmlns:x="urn:schemas-microsoft-com:office:excel"
|
6
|
+
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
|
7
|
+
xmlns:html="http://www.w3.org/TR/REC-html40">
|
8
|
+
<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
|
9
|
+
<Author>Qualcomm User</Author>
|
10
|
+
<LastAuthor>Qualcomm User</LastAuthor>
|
11
|
+
<Created>2014-04-09T20:01:31Z</Created>
|
12
|
+
<LastSaved>2014-04-09T20:47:39Z</LastSaved>
|
13
|
+
<Company>Qualcomm Incorporated</Company>
|
14
|
+
<Version>14.00</Version>
|
15
|
+
</DocumentProperties>
|
16
|
+
<OfficeDocumentSettings xmlns="urn:schemas-microsoft-com:office:office">
|
17
|
+
<AllowPNG/>
|
18
|
+
</OfficeDocumentSettings>
|
19
|
+
<ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel">
|
20
|
+
<WindowHeight>10800</WindowHeight>
|
21
|
+
<WindowWidth>27795</WindowWidth>
|
22
|
+
<WindowTopX>480</WindowTopX>
|
23
|
+
<WindowTopY>105</WindowTopY>
|
24
|
+
<ActiveSheet>1</ActiveSheet>
|
25
|
+
<ProtectStructure>False</ProtectStructure>
|
26
|
+
<ProtectWindows>False</ProtectWindows>
|
27
|
+
</ExcelWorkbook>
|
28
|
+
<Styles>
|
29
|
+
<Style ss:ID="Default" ss:Name="Normal">
|
30
|
+
<Alignment ss:Vertical="Bottom"/>
|
31
|
+
<Borders/>
|
32
|
+
<Font ss:FontName="Calibri" x:Family="Swiss" ss:Size="11" ss:Color="#000000"/>
|
33
|
+
<Interior/>
|
34
|
+
<NumberFormat/>
|
35
|
+
<Protection/>
|
36
|
+
</Style>
|
37
|
+
<Style ss:ID="s63">
|
38
|
+
<Alignment ss:Horizontal="Center" ss:Vertical="Bottom"/>
|
39
|
+
</Style>
|
40
|
+
</Styles>
|
41
|
+
<Worksheet ss:Name="NoHeaderSheet">
|
42
|
+
<Table ss:ExpandedColumnCount="7" ss:ExpandedRowCount="6" x:FullColumns="1"
|
43
|
+
x:FullRows="1" ss:DefaultRowHeight="15">
|
44
|
+
<Row>
|
45
|
+
<Cell><Data ss:Type="Number">1</Data></Cell>
|
46
|
+
<Cell><Data ss:Type="Number">2</Data></Cell>
|
47
|
+
<Cell><Data ss:Type="Number">3</Data></Cell>
|
48
|
+
<Cell><Data ss:Type="Number">4</Data></Cell>
|
49
|
+
<Cell><Data ss:Type="Number">5</Data></Cell>
|
50
|
+
</Row>
|
51
|
+
<Row>
|
52
|
+
<Cell><Data ss:Type="Number">6</Data></Cell>
|
53
|
+
<Cell ss:MergeAcross="1" ss:StyleID="s63"><Data ss:Type="Number">7</Data></Cell>
|
54
|
+
<Cell><Data ss:Type="Number">8</Data></Cell>
|
55
|
+
<Cell><Data ss:Type="Number">9</Data></Cell>
|
56
|
+
<Cell><Data ss:Type="Number">10</Data></Cell>
|
57
|
+
</Row>
|
58
|
+
<Row>
|
59
|
+
<Cell ss:MergeDown="2" ss:StyleID="s63"><Data ss:Type="Number">11</Data></Cell>
|
60
|
+
<Cell><Data ss:Type="Number">12</Data></Cell>
|
61
|
+
<Cell><Data ss:Type="Number">13</Data></Cell>
|
62
|
+
<Cell ss:MergeAcross="3" ss:MergeDown="2" ss:StyleID="s63"><Data
|
63
|
+
ss:Type="Number">14</Data></Cell>
|
64
|
+
</Row>
|
65
|
+
<Row>
|
66
|
+
<Cell ss:Index="2"><Data ss:Type="Number">15</Data></Cell>
|
67
|
+
<Cell><Data ss:Type="Number">16</Data></Cell>
|
68
|
+
</Row>
|
69
|
+
<Row>
|
70
|
+
<Cell ss:Index="2"><Data ss:Type="Number">17</Data></Cell>
|
71
|
+
<Cell><Data ss:Type="Number">18</Data></Cell>
|
72
|
+
</Row>
|
73
|
+
<Row>
|
74
|
+
<Cell ss:MergeAcross="6" ss:StyleID="s63"><Data ss:Type="Number">19</Data></Cell>
|
75
|
+
</Row>
|
76
|
+
</Table>
|
77
|
+
<WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel">
|
78
|
+
<PageSetup>
|
79
|
+
<Header x:Margin="0.3"/>
|
80
|
+
<Footer x:Margin="0.3"/>
|
81
|
+
<PageMargins x:Bottom="0.75" x:Left="0.7" x:Right="0.7" x:Top="0.75"/>
|
82
|
+
</PageSetup>
|
83
|
+
<ProtectObjects>False</ProtectObjects>
|
84
|
+
<ProtectScenarios>False</ProtectScenarios>
|
85
|
+
</WorksheetOptions>
|
86
|
+
</Worksheet>
|
87
|
+
<Worksheet ss:Name="PersonSheet">
|
88
|
+
<Table ss:ExpandedColumnCount="6" ss:ExpandedRowCount="8" x:FullColumns="1"
|
89
|
+
x:FullRows="1" ss:DefaultRowHeight="15">
|
90
|
+
<Row>
|
91
|
+
<Cell><Data ss:Type="String">my excel worksheet</Data></Cell>
|
92
|
+
</Row>
|
93
|
+
<Row>
|
94
|
+
<Cell><Data ss:Type="String">name</Data></Cell>
|
95
|
+
<Cell><Data ss:Type="String">date of birth</Data></Cell>
|
96
|
+
<Cell><Data ss:Type="String">profession</Data></Cell>
|
97
|
+
<Cell><Data ss:Type="String">city</Data></Cell>
|
98
|
+
<Cell><Data ss:Type="String">state</Data></Cell>
|
99
|
+
<Cell><Data ss:Type="String">age</Data></Cell>
|
100
|
+
</Row>
|
101
|
+
<Row>
|
102
|
+
<Cell ss:MergeAcross="2" ss:MergeDown="2" ss:StyleID="s63"><Data
|
103
|
+
ss:Type="String">jon</Data></Cell>
|
104
|
+
<Cell ss:MergeAcross="1" ss:MergeDown="4" ss:StyleID="s63"><Data
|
105
|
+
ss:Type="String">CA</Data></Cell>
|
106
|
+
<Cell><Data ss:Type="Number">1</Data></Cell>
|
107
|
+
</Row>
|
108
|
+
<Row>
|
109
|
+
<Cell ss:Index="6" ss:MergeDown="1" ss:StyleID="s63"><Data ss:Type="Number">2</Data></Cell>
|
110
|
+
</Row>
|
111
|
+
<Row ss:Index="6">
|
112
|
+
<Cell ss:MergeAcross="2" ss:MergeDown="1" ss:StyleID="s63"><Data
|
113
|
+
ss:Type="Number">13</Data></Cell>
|
114
|
+
<Cell ss:Index="6"><Data ss:Type="Number">4</Data></Cell>
|
115
|
+
</Row>
|
116
|
+
<Row ss:Index="8">
|
117
|
+
<Cell><Data ss:Type="String">ben</Data></Cell>
|
118
|
+
<Cell><Data ss:Type="String">may</Data></Cell>
|
119
|
+
<Cell><Data ss:Type="String">programmer</Data></Cell>
|
120
|
+
<Cell><Data ss:Type="String">San Diego</Data></Cell>
|
121
|
+
<Cell><Data ss:Type="String">CA</Data></Cell>
|
122
|
+
<Cell><Data ss:Type="Number">6</Data></Cell>
|
123
|
+
</Row>
|
124
|
+
</Table>
|
125
|
+
<WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel">
|
126
|
+
<PageSetup>
|
127
|
+
<Header x:Margin="0.3"/>
|
128
|
+
<Footer x:Margin="0.3"/>
|
129
|
+
<PageMargins x:Bottom="0.75" x:Left="0.7" x:Right="0.7" x:Top="0.75"/>
|
130
|
+
</PageSetup>
|
131
|
+
<Selected/>
|
132
|
+
<Panes>
|
133
|
+
<Pane>
|
134
|
+
<Number>3</Number>
|
135
|
+
<ActiveRow>24</ActiveRow>
|
136
|
+
<ActiveCol>6</ActiveCol>
|
137
|
+
</Pane>
|
138
|
+
</Panes>
|
139
|
+
<ProtectObjects>False</ProtectObjects>
|
140
|
+
<ProtectScenarios>False</ProtectScenarios>
|
141
|
+
</WorksheetOptions>
|
142
|
+
</Worksheet>
|
143
|
+
</Workbook>
|
@@ -0,0 +1,64 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../..', __FILE__)
|
2
|
+
require 'minitest_helper'
|
3
|
+
|
4
|
+
describe ExcelXml do
|
5
|
+
|
6
|
+
workbook = fixture_file_path('workbook.xml')
|
7
|
+
|
8
|
+
class PersonParser < ExcelXml::Worksheet::Parser
|
9
|
+
NAME = /name/i
|
10
|
+
BIRTH = /date\s+of\s+birth/i
|
11
|
+
PROFESSION = /profession/i
|
12
|
+
CITY = /city/i
|
13
|
+
STATE = /state/i
|
14
|
+
AGE = /age/i
|
15
|
+
MANDATORY_COLUMNS = [NAME, CITY]
|
16
|
+
|
17
|
+
class Person < Struct.new(:name, :birth, :profession, :city, :state, :age); end
|
18
|
+
|
19
|
+
def mandatory_columns
|
20
|
+
MANDATORY_COLUMNS
|
21
|
+
end
|
22
|
+
# def is_header? row_number, fields
|
23
|
+
# row_number == 1
|
24
|
+
# end
|
25
|
+
def persons
|
26
|
+
@persons ||= rows.collect do |fields|
|
27
|
+
Person.new(fields[NAME], fields[BIRTH], fields[PROFESSION], fields[CITY], fields[STATE], fields[AGE])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
EXPECTED_NO_HEADER_SHEET_INFO = [
|
33
|
+
["1", "2", "3", "4", "5", nil, nil],
|
34
|
+
["6", "7", "7", "8", "9", "10", nil],
|
35
|
+
%w(11 12 13 14 14 14 14),
|
36
|
+
%w(11 15 16 14 14 14 14),
|
37
|
+
%w(11 17 18 14 14 14 14),
|
38
|
+
%w(19 19 19 19 19 19 19),
|
39
|
+
]
|
40
|
+
EXPECTED_PERSON_INFO = [
|
41
|
+
%w(jon jon jon CA CA 1),
|
42
|
+
%w(jon jon jon CA CA 2),
|
43
|
+
%w(jon jon jon CA CA 2),
|
44
|
+
%w(13 13 13 CA CA 4),
|
45
|
+
["13", "13", "13", "CA", "CA", nil],
|
46
|
+
["ben", "may", "programmer", "San Diego", "CA", "6"]
|
47
|
+
]
|
48
|
+
|
49
|
+
it "it can parse a workbook" do
|
50
|
+
workbook_parser = ExcelXml::Workbook::Parser.new(File.read(workbook), worksheet_parsers: PersonParser)
|
51
|
+
|
52
|
+
workbook_parser.unidentified_worksheets.length.must_equal 1
|
53
|
+
workbook_parser.unidentified_worksheets.first.name.must_equal "NoHeaderSheet"
|
54
|
+
workbook_parser.unidentified_worksheets.first.rows.must_equal EXPECTED_NO_HEADER_SHEET_INFO
|
55
|
+
|
56
|
+
workbook_parser[PersonParser].length.must_equal 1
|
57
|
+
person_parser = workbook_parser[PersonParser].first
|
58
|
+
|
59
|
+
person_parser.worksheet.name.must_equal "PersonSheet"
|
60
|
+
person_parser.persons.collect {|person| person.to_a }.must_equal EXPECTED_PERSON_INFO
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
end
|
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: excelxml
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ben Delsol
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-04-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: happymapper
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.5'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.5'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: ''
|
56
|
+
email: []
|
57
|
+
executables: []
|
58
|
+
extensions: []
|
59
|
+
extra_rdoc_files: []
|
60
|
+
files:
|
61
|
+
- .gitignore
|
62
|
+
- Gemfile
|
63
|
+
- LICENSE.txt
|
64
|
+
- README.md
|
65
|
+
- Rakefile
|
66
|
+
- excelxml.gemspec
|
67
|
+
- lib/excelxml.rb
|
68
|
+
- lib/excelxml/cell.rb
|
69
|
+
- lib/excelxml/row.rb
|
70
|
+
- lib/excelxml/table.rb
|
71
|
+
- lib/excelxml/version.rb
|
72
|
+
- lib/excelxml/workbook.rb
|
73
|
+
- lib/excelxml/worksheet.rb
|
74
|
+
- test/fixtures/workbook.xml
|
75
|
+
- test/minitest_helper.rb
|
76
|
+
- test/test_excelxml.rb
|
77
|
+
homepage: ''
|
78
|
+
licenses:
|
79
|
+
- MIT
|
80
|
+
metadata: {}
|
81
|
+
post_install_message:
|
82
|
+
rdoc_options: []
|
83
|
+
require_paths:
|
84
|
+
- lib
|
85
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - '>='
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
requirements: []
|
96
|
+
rubyforge_project:
|
97
|
+
rubygems_version: 2.0.3
|
98
|
+
signing_key:
|
99
|
+
specification_version: 4
|
100
|
+
summary: Parses the data out of Excel XML 2003 workbooks/sheets.
|
101
|
+
test_files:
|
102
|
+
- test/fixtures/workbook.xml
|
103
|
+
- test/minitest_helper.rb
|
104
|
+
- test/test_excelxml.rb
|