simple-spreadsheet-extractor 0.3.5 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +39 -14
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/doc/schema-v1.xsd +62 -0
- data/jars/simple-spreadsheet-extractor-0.4.1.jar +0 -0
- data/lib/simple-spreadsheet-extractor.rb +11 -5
- data/simple-spreadsheet-extractor.gemspec +19 -21
- data/test/test_extraction.rb +8 -0
- metadata +8 -8
- data/.gitignore +0 -3
- data/jars/simple-spreadsheet-extractor-0.3.2.jar +0 -0
data/README.rdoc
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
= Simple Spreadsheet Extractor
|
2
2
|
|
3
|
-
Authors:: Stuart Owen
|
4
|
-
Version:: 0.
|
3
|
+
Authors:: Finn Bacall, Stuart Owen
|
4
|
+
Version:: 0.4.0
|
5
5
|
Contact:: mailto:stuart.owen@manchester.ac.uk
|
6
6
|
Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
|
7
7
|
Copyright:: (c) 2010 The University of Manchester, UK
|
@@ -53,31 +53,56 @@ Formulas are evaluated placing the result in the XML produced for that cell, how
|
|
53
53
|
|
54
54
|
Row and column indexes start at 1, rather than 0, to keep consistent with namings of the cells in Excel.
|
55
55
|
|
56
|
-
An XSD schema for the XML is available in doc/schema-v1.xsd["http://github.com/
|
56
|
+
An XSD schema for the XML is available in doc/schema-v1.xsd["http://github.com/fbacall/simple-spreadsheet-extractor-gem/blob/master/doc/schema-v1.xsd"]
|
57
|
+
|
58
|
+
The desired spreadsheet extractor jar can be specified by defining SPREADSHEET_EXTRACTOR_JAR_PATH in a config file (e.g. environment.rb)
|
57
59
|
|
58
60
|
== Example XML
|
59
61
|
|
60
62
|
<?xml version="1.0" encoding="UTF-8"?>
|
61
63
|
<workbook xmlns="http://www.sysmo-db.org/2010/xml/spreadsheet">
|
62
|
-
<sheet name="Sheet1" index="1" hidden="false" very_hidden="false" first_row="
|
63
|
-
<row index="1">
|
64
|
-
<cell column="1" column_alpha="A" row="1" type="numeric">12.0</cell>
|
65
|
-
<cell column="2" column_alpha="B" row="1" type="numeric">654153.0</cell>
|
66
|
-
<cell column="27" column_alpha="AA" row="1" type="string">AA</cell>
|
67
|
-
</row>
|
64
|
+
<sheet name="Sheet1" index="1" hidden="false" very_hidden="false" first_row="2" last_row="8">
|
68
65
|
<row index="2">
|
69
|
-
<cell column="
|
66
|
+
<cell column="2" column_alpha="B" row="2" type="string">test2</cell>
|
67
|
+
<cell column="3" column_alpha="C" row="2" type="string">test</cell>
|
68
|
+
<cell column="4" column_alpha="D" row="2" type="string">test3</cell>
|
70
69
|
</row>
|
70
|
+
|
71
71
|
<row index="3">
|
72
|
-
<cell column="
|
72
|
+
<cell column="2" column_alpha="B" row="3" type="string">a</cell>
|
73
|
+
<cell column="3" column_alpha="C" row="3" type="numeric">1.0</cell>
|
74
|
+
<cell column="4" column_alpha="D" row="3" type="numeric">22.0</cell>
|
73
75
|
</row>
|
74
76
|
<row index="4">
|
75
|
-
<cell column="
|
77
|
+
<cell column="2" column_alpha="B" row="4" type="string">b</cell>
|
78
|
+
|
79
|
+
<cell column="3" column_alpha="C" row="4" type="numeric">2.0</cell>
|
80
|
+
<cell column="4" column_alpha="D" row="4" type="numeric">5.0</cell>
|
76
81
|
</row>
|
77
82
|
<row index="5">
|
78
|
-
<cell column="
|
83
|
+
<cell column="2" column_alpha="B" row="5" type="string">c</cell>
|
84
|
+
<cell column="3" column_alpha="C" row="5" type="numeric">3.0</cell>
|
85
|
+
<cell column="4" column_alpha="D" row="5" type="numeric">1.0</cell>
|
86
|
+
|
87
|
+
</row>
|
88
|
+
<row index="6">
|
89
|
+
<cell column="2" column_alpha="B" row="6" type="string">d</cell>
|
90
|
+
<cell column="3" column_alpha="C" row="6" type="numeric">4.0</cell>
|
91
|
+
<cell column="4" column_alpha="D" row="6" type="numeric">5.0</cell>
|
92
|
+
</row>
|
93
|
+
<row index="7">
|
94
|
+
|
95
|
+
<cell column="2" column_alpha="B" row="7" type="string">e</cell>
|
96
|
+
<cell column="3" column_alpha="C" row="7" type="numeric">5.0</cell>
|
97
|
+
<cell column="4" column_alpha="D" row="7" type="numeric">6.0</cell>
|
98
|
+
</row>
|
99
|
+
<row index="8">
|
100
|
+
<cell column="2" column_alpha="B" row="8" type="string">total</cell>
|
101
|
+
<cell column="3" column_alpha="C" row="8" type="numeric" formula="SUM(C3:C7)">15.0</cell>
|
102
|
+
|
103
|
+
<cell column="4" column_alpha="D" row="8" type="numeric" formula="SUM(D3:D7)">39.0</cell>
|
79
104
|
</row>
|
80
105
|
</sheet>
|
81
106
|
<sheet name="Sheet2" index="2" hidden="false" very_hidden="false" first_row="1" last_row="1"/>
|
82
107
|
<sheet name="Sheet3" index="3" hidden="false" very_hidden="false" first_row="1" last_row="1"/>
|
83
|
-
</workbook>
|
108
|
+
</workbook>
|
data/Rakefile
CHANGED
@@ -15,7 +15,7 @@ begin
|
|
15
15
|
gemspec.description = "Takes a stream to a spreadsheet file and produces and XML representation of its contents"
|
16
16
|
gemspec.email = "stuart.owen@manchester.ac.uk"
|
17
17
|
gemspec.homepage = "http://github.com/myGrid/simple-spreadsheet-extractor-gem"
|
18
|
-
gemspec.authors = ["Stuart Owen"]
|
18
|
+
gemspec.authors = ["Stuart Owen","Finn Bacall"]
|
19
19
|
|
20
20
|
gemspec.files.include %w(jars)
|
21
21
|
gemspec.files.exclude "test/*"
|
@@ -35,4 +35,4 @@ task:test do
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
-
#end
|
38
|
+
#end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.2
|
data/doc/schema-v1.xsd
CHANGED
@@ -7,13 +7,52 @@
|
|
7
7
|
|
8
8
|
<xsd:complexType name="Workbook">
|
9
9
|
<xsd:sequence>
|
10
|
+
<xsd:element name="styles" type="Styles" minOccurs="1"
|
11
|
+
maxOccurs="1" />
|
10
12
|
<xsd:element name="sheet" type="Sheet" minOccurs="0"
|
11
13
|
maxOccurs="unbounded" />
|
12
14
|
</xsd:sequence>
|
13
15
|
</xsd:complexType>
|
14
16
|
|
17
|
+
<xsd:complexType name="Styles">
|
18
|
+
<xsd:sequence>
|
19
|
+
<xsd:element name="style" type="Style" minOccurs="0"
|
20
|
+
maxOccurs="unbounded" />
|
21
|
+
</xsd:sequence>
|
22
|
+
</xsd:complexType>
|
23
|
+
|
24
|
+
<xsd:complexType name="Style">
|
25
|
+
<xsd:sequence>
|
26
|
+
<xsd:element name="border-top" type="xsd:string" minOccurs="0"
|
27
|
+
maxOccurs="1" />
|
28
|
+
<xsd:element name="border-bottom" type="xsd:string" minOccurs="0"
|
29
|
+
maxOccurs="1" />
|
30
|
+
<xsd:element name="border-left" type="xsd:string" minOccurs="0"
|
31
|
+
maxOccurs="1" />
|
32
|
+
<xsd:element name="border-right" type="xsd:string" minOccurs="0"
|
33
|
+
maxOccurs="1" />
|
34
|
+
<xsd:element name="background-color" type="xsd:string" minOccurs="0"
|
35
|
+
maxOccurs="1" />
|
36
|
+
<xsd:element name="font-weight" type="xsd:string" minOccurs="0"
|
37
|
+
maxOccurs="1" />
|
38
|
+
<xsd:element name="font-style" type="xsd:string" minOccurs="0"
|
39
|
+
maxOccurs="1" />
|
40
|
+
<xsd:element name="text-decoration" type="xsd:string" minOccurs="0"
|
41
|
+
maxOccurs="1" />
|
42
|
+
<xsd:element name="font-size" type="xsd:string" minOccurs="0"
|
43
|
+
maxOccurs="1" />
|
44
|
+
<xsd:element name="font-family" type="xsd:string" minOccurs="0"
|
45
|
+
maxOccurs="1" />
|
46
|
+
<xsd:element name="color" type="xsd:string" minOccurs="0"
|
47
|
+
maxOccurs="1" />
|
48
|
+
</xsd:sequence>
|
49
|
+
<xsd:attribute name="id" type="xsd:string" use="required"></xsd:attribute>
|
50
|
+
</xsd:complexType>
|
51
|
+
|
15
52
|
<xsd:complexType name="Sheet">
|
16
53
|
<xsd:sequence>
|
54
|
+
<xsd:element name="columns" type="Columns" minOccurs="1"
|
55
|
+
maxOccurs="1" />
|
17
56
|
<xsd:element name="row" type="Row" minOccurs="0"
|
18
57
|
maxOccurs="unbounded" />
|
19
58
|
</xsd:sequence>
|
@@ -29,6 +68,26 @@
|
|
29
68
|
use="required"></xsd:attribute>
|
30
69
|
</xsd:complexType>
|
31
70
|
|
71
|
+
<xsd:complexType name="Columns">
|
72
|
+
<xsd:sequence>
|
73
|
+
<xsd:element name="column" type="Column" minOccurs="0"
|
74
|
+
maxOccurs="unbounded" />
|
75
|
+
</xsd:sequence>
|
76
|
+
<xsd:attribute name="first_column" type="xsd:positiveInteger"
|
77
|
+
use="required"></xsd:attribute>
|
78
|
+
<xsd:attribute name="last_column" type="xsd:positiveInteger"
|
79
|
+
use="required"></xsd:attribute>
|
80
|
+
</xsd:complexType>
|
81
|
+
|
82
|
+
<xsd:complexType name="Column">
|
83
|
+
<xsd:attribute name="index" type="xsd:positiveInteger"
|
84
|
+
use="required"></xsd:attribute>
|
85
|
+
<xsd:attribute name="column_alpha" type="xsd:string"
|
86
|
+
use="required"></xsd:attribute>
|
87
|
+
<xsd:attribute name="width" type="xsd:positiveInteger"
|
88
|
+
use="optional"></xsd:attribute>
|
89
|
+
</xsd:complexType>
|
90
|
+
|
32
91
|
<xsd:complexType name="Row">
|
33
92
|
<xsd:sequence>
|
34
93
|
<xsd:element name="cell" type="Cell" minOccurs="0"
|
@@ -36,6 +95,8 @@
|
|
36
95
|
</xsd:sequence>
|
37
96
|
<xsd:attribute name="index" type="xsd:positiveInteger"
|
38
97
|
use="required"></xsd:attribute>
|
98
|
+
<xsd:attribute name="height" type="xsd:string"
|
99
|
+
use="optional"></xsd:attribute>
|
39
100
|
</xsd:complexType>
|
40
101
|
|
41
102
|
<xsd:complexType name="Cell">
|
@@ -49,6 +110,7 @@
|
|
49
110
|
use="required"></xsd:attribute>
|
50
111
|
<xsd:attribute name="type" type="xsd:string" use="required"></xsd:attribute>
|
51
112
|
<xsd:attribute name="formula" type="xsd:string" use="optional"></xsd:attribute>
|
113
|
+
<xsd:attribute name="style" type="xsd:string" use="optional"></xsd:attribute>
|
52
114
|
</xsd:extension>
|
53
115
|
</xsd:simpleContent>
|
54
116
|
</xsd:complexType>
|
Binary file
|
@@ -10,10 +10,10 @@ module SysMODB
|
|
10
10
|
|
11
11
|
module SpreadsheetExtractor
|
12
12
|
|
13
|
-
|
14
|
-
COMMAND = "java -jar #{JAR_PATH}/simple-spreadsheet-extractor-0.3.2.jar"
|
13
|
+
DEFAULT_PATH = File.dirname(__FILE__) + "/../jars/simple-spreadsheet-extractor-0.4.1.jar"
|
15
14
|
|
16
15
|
def spreadsheet_to_xml spreadsheet_data
|
16
|
+
|
17
17
|
if RUBY_PLATFORM =~ /mswin32/
|
18
18
|
output = read_with_popen4 spreadsheet_data
|
19
19
|
else
|
@@ -23,13 +23,19 @@ module SysMODB
|
|
23
23
|
return output
|
24
24
|
end
|
25
25
|
|
26
|
+
|
27
|
+
|
28
|
+
def spreadsheet_extractor_command
|
29
|
+
"java -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
|
30
|
+
end
|
31
|
+
|
26
32
|
private
|
27
33
|
|
28
34
|
#opens using POpen4 - this is for the benefit of Windows. It has been found to be unstable in Linux and give occaisional segmentation faults
|
29
35
|
def read_with_popen4 spreadsheet_data
|
30
36
|
output=""
|
31
37
|
err_message = ""
|
32
|
-
status = POpen4::popen4(
|
38
|
+
status = POpen4::popen4(spreadsheet_extractor_command) do |stdout, stderr, stdin, pid|
|
33
39
|
stdin=stdin.binmode
|
34
40
|
spreadsheet_data.each_byte{|b| stdin.putc(b)}
|
35
41
|
stdin.close
|
@@ -49,7 +55,7 @@ module SysMODB
|
|
49
55
|
def read_with_open4 spreadsheet_data
|
50
56
|
output = ""
|
51
57
|
err_message = ""
|
52
|
-
status = Open4::popen4(
|
58
|
+
status = Open4::popen4(spreadsheet_extractor_command) do |pid, stdin, stdout, stderr|
|
53
59
|
while ((line = spreadsheet_data.gets) != nil) do
|
54
60
|
stdin << line
|
55
61
|
end
|
@@ -74,4 +80,4 @@ module SysMODB
|
|
74
80
|
end
|
75
81
|
|
76
82
|
end
|
77
|
-
end
|
83
|
+
end
|
@@ -1,40 +1,38 @@
|
|
1
1
|
# Generated by jeweler
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{simple-spreadsheet-extractor}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["Stuart Owen"]
|
12
|
-
s.date = %q{
|
11
|
+
s.authors = ["Stuart Owen", "Finn Bacall"]
|
12
|
+
s.date = %q{2011-01-18}
|
13
13
|
s.description = %q{Takes a stream to a spreadsheet file and produces and XML representation of its contents}
|
14
14
|
s.email = %q{stuart.owen@manchester.ac.uk}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENCE",
|
17
|
-
|
17
|
+
"README.rdoc"
|
18
18
|
]
|
19
19
|
s.files = [
|
20
|
-
"
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
"simple-spreadsheet-extractor.gemspec"
|
20
|
+
"LICENCE",
|
21
|
+
"README.rdoc",
|
22
|
+
"Rakefile",
|
23
|
+
"VERSION",
|
24
|
+
"doc/schema-v1.xsd",
|
25
|
+
"example.rb",
|
26
|
+
"jars/lib/dom4j-1.6.1.jar",
|
27
|
+
"jars/lib/poi-3.6.jar",
|
28
|
+
"jars/lib/poi-ooxml-3.6.jar",
|
29
|
+
"jars/lib/poi-ooxml-schemas-3.6.jar",
|
30
|
+
"jars/lib/xmlbeans-2.3.0.jar",
|
31
|
+
"jars/simple-spreadsheet-extractor-0.4.1.jar",
|
32
|
+
"lib/simple-spreadsheet-extractor.rb",
|
33
|
+
"simple-spreadsheet-extractor.gemspec"
|
35
34
|
]
|
36
35
|
s.homepage = %q{http://github.com/myGrid/simple-spreadsheet-extractor-gem}
|
37
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
38
36
|
s.require_paths = ["lib"]
|
39
37
|
s.rubygems_version = %q{1.3.6}
|
40
38
|
s.summary = %q{Basic spreadsheet content extraction using Apache POI}
|
data/test/test_extraction.rb
CHANGED
@@ -41,5 +41,13 @@ class TestExtraction < Test::Unit::TestCase
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
+
def test_for_segfault
|
45
|
+
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
46
|
+
10.times do |x|
|
47
|
+
f=open(test_sheet,"rb")
|
48
|
+
xml = spreadsheet_to_xml(f)
|
49
|
+
end
|
50
|
+
true
|
51
|
+
end
|
44
52
|
|
45
53
|
end
|
metadata
CHANGED
@@ -4,17 +4,18 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 4
|
8
|
+
- 2
|
9
|
+
version: 0.4.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Stuart Owen
|
13
|
+
- Finn Bacall
|
13
14
|
autorequire:
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date:
|
18
|
+
date: 2011-01-18 00:00:00 +00:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
@@ -55,7 +56,6 @@ extra_rdoc_files:
|
|
55
56
|
- LICENCE
|
56
57
|
- README.rdoc
|
57
58
|
files:
|
58
|
-
- .gitignore
|
59
59
|
- LICENCE
|
60
60
|
- README.rdoc
|
61
61
|
- Rakefile
|
@@ -67,7 +67,7 @@ files:
|
|
67
67
|
- jars/lib/poi-ooxml-3.6.jar
|
68
68
|
- jars/lib/poi-ooxml-schemas-3.6.jar
|
69
69
|
- jars/lib/xmlbeans-2.3.0.jar
|
70
|
-
- jars/simple-spreadsheet-extractor-0.
|
70
|
+
- jars/simple-spreadsheet-extractor-0.4.1.jar
|
71
71
|
- lib/simple-spreadsheet-extractor.rb
|
72
72
|
- simple-spreadsheet-extractor.gemspec
|
73
73
|
has_rdoc: true
|
@@ -75,8 +75,8 @@ homepage: http://github.com/myGrid/simple-spreadsheet-extractor-gem
|
|
75
75
|
licenses: []
|
76
76
|
|
77
77
|
post_install_message:
|
78
|
-
rdoc_options:
|
79
|
-
|
78
|
+
rdoc_options: []
|
79
|
+
|
80
80
|
require_paths:
|
81
81
|
- lib
|
82
82
|
required_ruby_version: !ruby/object:Gem::Requirement
|
data/.gitignore
DELETED
Binary file
|