simple-spreadsheet-extractor 0.3.5 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +39 -14
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/doc/schema-v1.xsd +62 -0
- data/jars/simple-spreadsheet-extractor-0.4.1.jar +0 -0
- data/lib/simple-spreadsheet-extractor.rb +11 -5
- data/simple-spreadsheet-extractor.gemspec +19 -21
- data/test/test_extraction.rb +8 -0
- metadata +8 -8
- data/.gitignore +0 -3
- data/jars/simple-spreadsheet-extractor-0.3.2.jar +0 -0
data/README.rdoc
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
= Simple Spreadsheet Extractor
|
2
2
|
|
3
|
-
Authors:: Stuart Owen
|
4
|
-
Version:: 0.
|
3
|
+
Authors:: Finn Bacall, Stuart Owen
|
4
|
+
Version:: 0.4.0
|
5
5
|
Contact:: mailto:stuart.owen@manchester.ac.uk
|
6
6
|
Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
|
7
7
|
Copyright:: (c) 2010 The University of Manchester, UK
|
@@ -53,31 +53,56 @@ Formulas are evaluated placing the result in the XML produced for that cell, how
|
|
53
53
|
|
54
54
|
Row and column indexes start at 1, rather than 0, to keep consistent with namings of the cells in Excel.
|
55
55
|
|
56
|
-
An XSD schema for the XML is available in doc/schema-v1.xsd["http://github.com/
|
56
|
+
An XSD schema for the XML is available in doc/schema-v1.xsd["http://github.com/fbacall/simple-spreadsheet-extractor-gem/blob/master/doc/schema-v1.xsd"]
|
57
|
+
|
58
|
+
The desired spreadsheet extractor jar can be specified by defining SPREADSHEET_EXTRACTOR_JAR_PATH in a config file (e.g. environment.rb)
|
57
59
|
|
58
60
|
== Example XML
|
59
61
|
|
60
62
|
<?xml version="1.0" encoding="UTF-8"?>
|
61
63
|
<workbook xmlns="http://www.sysmo-db.org/2010/xml/spreadsheet">
|
62
|
-
<sheet name="Sheet1" index="1" hidden="false" very_hidden="false" first_row="
|
63
|
-
<row index="1">
|
64
|
-
<cell column="1" column_alpha="A" row="1" type="numeric">12.0</cell>
|
65
|
-
<cell column="2" column_alpha="B" row="1" type="numeric">654153.0</cell>
|
66
|
-
<cell column="27" column_alpha="AA" row="1" type="string">AA</cell>
|
67
|
-
</row>
|
64
|
+
<sheet name="Sheet1" index="1" hidden="false" very_hidden="false" first_row="2" last_row="8">
|
68
65
|
<row index="2">
|
69
|
-
<cell column="
|
66
|
+
<cell column="2" column_alpha="B" row="2" type="string">test2</cell>
|
67
|
+
<cell column="3" column_alpha="C" row="2" type="string">test</cell>
|
68
|
+
<cell column="4" column_alpha="D" row="2" type="string">test3</cell>
|
70
69
|
</row>
|
70
|
+
|
71
71
|
<row index="3">
|
72
|
-
<cell column="
|
72
|
+
<cell column="2" column_alpha="B" row="3" type="string">a</cell>
|
73
|
+
<cell column="3" column_alpha="C" row="3" type="numeric">1.0</cell>
|
74
|
+
<cell column="4" column_alpha="D" row="3" type="numeric">22.0</cell>
|
73
75
|
</row>
|
74
76
|
<row index="4">
|
75
|
-
<cell column="
|
77
|
+
<cell column="2" column_alpha="B" row="4" type="string">b</cell>
|
78
|
+
|
79
|
+
<cell column="3" column_alpha="C" row="4" type="numeric">2.0</cell>
|
80
|
+
<cell column="4" column_alpha="D" row="4" type="numeric">5.0</cell>
|
76
81
|
</row>
|
77
82
|
<row index="5">
|
78
|
-
<cell column="
|
83
|
+
<cell column="2" column_alpha="B" row="5" type="string">c</cell>
|
84
|
+
<cell column="3" column_alpha="C" row="5" type="numeric">3.0</cell>
|
85
|
+
<cell column="4" column_alpha="D" row="5" type="numeric">1.0</cell>
|
86
|
+
|
87
|
+
</row>
|
88
|
+
<row index="6">
|
89
|
+
<cell column="2" column_alpha="B" row="6" type="string">d</cell>
|
90
|
+
<cell column="3" column_alpha="C" row="6" type="numeric">4.0</cell>
|
91
|
+
<cell column="4" column_alpha="D" row="6" type="numeric">5.0</cell>
|
92
|
+
</row>
|
93
|
+
<row index="7">
|
94
|
+
|
95
|
+
<cell column="2" column_alpha="B" row="7" type="string">e</cell>
|
96
|
+
<cell column="3" column_alpha="C" row="7" type="numeric">5.0</cell>
|
97
|
+
<cell column="4" column_alpha="D" row="7" type="numeric">6.0</cell>
|
98
|
+
</row>
|
99
|
+
<row index="8">
|
100
|
+
<cell column="2" column_alpha="B" row="8" type="string">total</cell>
|
101
|
+
<cell column="3" column_alpha="C" row="8" type="numeric" formula="SUM(C3:C7)">15.0</cell>
|
102
|
+
|
103
|
+
<cell column="4" column_alpha="D" row="8" type="numeric" formula="SUM(D3:D7)">39.0</cell>
|
79
104
|
</row>
|
80
105
|
</sheet>
|
81
106
|
<sheet name="Sheet2" index="2" hidden="false" very_hidden="false" first_row="1" last_row="1"/>
|
82
107
|
<sheet name="Sheet3" index="3" hidden="false" very_hidden="false" first_row="1" last_row="1"/>
|
83
|
-
</workbook>
|
108
|
+
</workbook>
|
data/Rakefile
CHANGED
@@ -15,7 +15,7 @@ begin
|
|
15
15
|
gemspec.description = "Takes a stream to a spreadsheet file and produces and XML representation of its contents"
|
16
16
|
gemspec.email = "stuart.owen@manchester.ac.uk"
|
17
17
|
gemspec.homepage = "http://github.com/myGrid/simple-spreadsheet-extractor-gem"
|
18
|
-
gemspec.authors = ["Stuart Owen"]
|
18
|
+
gemspec.authors = ["Stuart Owen","Finn Bacall"]
|
19
19
|
|
20
20
|
gemspec.files.include %w(jars)
|
21
21
|
gemspec.files.exclude "test/*"
|
@@ -35,4 +35,4 @@ task:test do
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
-
#end
|
38
|
+
#end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.2
|
data/doc/schema-v1.xsd
CHANGED
@@ -7,13 +7,52 @@
|
|
7
7
|
|
8
8
|
<xsd:complexType name="Workbook">
|
9
9
|
<xsd:sequence>
|
10
|
+
<xsd:element name="styles" type="Styles" minOccurs="1"
|
11
|
+
maxOccurs="1" />
|
10
12
|
<xsd:element name="sheet" type="Sheet" minOccurs="0"
|
11
13
|
maxOccurs="unbounded" />
|
12
14
|
</xsd:sequence>
|
13
15
|
</xsd:complexType>
|
14
16
|
|
17
|
+
<xsd:complexType name="Styles">
|
18
|
+
<xsd:sequence>
|
19
|
+
<xsd:element name="style" type="Style" minOccurs="0"
|
20
|
+
maxOccurs="unbounded" />
|
21
|
+
</xsd:sequence>
|
22
|
+
</xsd:complexType>
|
23
|
+
|
24
|
+
<xsd:complexType name="Style">
|
25
|
+
<xsd:sequence>
|
26
|
+
<xsd:element name="border-top" type="xsd:string" minOccurs="0"
|
27
|
+
maxOccurs="1" />
|
28
|
+
<xsd:element name="border-bottom" type="xsd:string" minOccurs="0"
|
29
|
+
maxOccurs="1" />
|
30
|
+
<xsd:element name="border-left" type="xsd:string" minOccurs="0"
|
31
|
+
maxOccurs="1" />
|
32
|
+
<xsd:element name="border-right" type="xsd:string" minOccurs="0"
|
33
|
+
maxOccurs="1" />
|
34
|
+
<xsd:element name="background-color" type="xsd:string" minOccurs="0"
|
35
|
+
maxOccurs="1" />
|
36
|
+
<xsd:element name="font-weight" type="xsd:string" minOccurs="0"
|
37
|
+
maxOccurs="1" />
|
38
|
+
<xsd:element name="font-style" type="xsd:string" minOccurs="0"
|
39
|
+
maxOccurs="1" />
|
40
|
+
<xsd:element name="text-decoration" type="xsd:string" minOccurs="0"
|
41
|
+
maxOccurs="1" />
|
42
|
+
<xsd:element name="font-size" type="xsd:string" minOccurs="0"
|
43
|
+
maxOccurs="1" />
|
44
|
+
<xsd:element name="font-family" type="xsd:string" minOccurs="0"
|
45
|
+
maxOccurs="1" />
|
46
|
+
<xsd:element name="color" type="xsd:string" minOccurs="0"
|
47
|
+
maxOccurs="1" />
|
48
|
+
</xsd:sequence>
|
49
|
+
<xsd:attribute name="id" type="xsd:string" use="required"></xsd:attribute>
|
50
|
+
</xsd:complexType>
|
51
|
+
|
15
52
|
<xsd:complexType name="Sheet">
|
16
53
|
<xsd:sequence>
|
54
|
+
<xsd:element name="columns" type="Columns" minOccurs="1"
|
55
|
+
maxOccurs="1" />
|
17
56
|
<xsd:element name="row" type="Row" minOccurs="0"
|
18
57
|
maxOccurs="unbounded" />
|
19
58
|
</xsd:sequence>
|
@@ -29,6 +68,26 @@
|
|
29
68
|
use="required"></xsd:attribute>
|
30
69
|
</xsd:complexType>
|
31
70
|
|
71
|
+
<xsd:complexType name="Columns">
|
72
|
+
<xsd:sequence>
|
73
|
+
<xsd:element name="column" type="Column" minOccurs="0"
|
74
|
+
maxOccurs="unbounded" />
|
75
|
+
</xsd:sequence>
|
76
|
+
<xsd:attribute name="first_column" type="xsd:positiveInteger"
|
77
|
+
use="required"></xsd:attribute>
|
78
|
+
<xsd:attribute name="last_column" type="xsd:positiveInteger"
|
79
|
+
use="required"></xsd:attribute>
|
80
|
+
</xsd:complexType>
|
81
|
+
|
82
|
+
<xsd:complexType name="Column">
|
83
|
+
<xsd:attribute name="index" type="xsd:positiveInteger"
|
84
|
+
use="required"></xsd:attribute>
|
85
|
+
<xsd:attribute name="column_alpha" type="xsd:string"
|
86
|
+
use="required"></xsd:attribute>
|
87
|
+
<xsd:attribute name="width" type="xsd:positiveInteger"
|
88
|
+
use="optional"></xsd:attribute>
|
89
|
+
</xsd:complexType>
|
90
|
+
|
32
91
|
<xsd:complexType name="Row">
|
33
92
|
<xsd:sequence>
|
34
93
|
<xsd:element name="cell" type="Cell" minOccurs="0"
|
@@ -36,6 +95,8 @@
|
|
36
95
|
</xsd:sequence>
|
37
96
|
<xsd:attribute name="index" type="xsd:positiveInteger"
|
38
97
|
use="required"></xsd:attribute>
|
98
|
+
<xsd:attribute name="height" type="xsd:string"
|
99
|
+
use="optional"></xsd:attribute>
|
39
100
|
</xsd:complexType>
|
40
101
|
|
41
102
|
<xsd:complexType name="Cell">
|
@@ -49,6 +110,7 @@
|
|
49
110
|
use="required"></xsd:attribute>
|
50
111
|
<xsd:attribute name="type" type="xsd:string" use="required"></xsd:attribute>
|
51
112
|
<xsd:attribute name="formula" type="xsd:string" use="optional"></xsd:attribute>
|
113
|
+
<xsd:attribute name="style" type="xsd:string" use="optional"></xsd:attribute>
|
52
114
|
</xsd:extension>
|
53
115
|
</xsd:simpleContent>
|
54
116
|
</xsd:complexType>
|
Binary file
|
@@ -10,10 +10,10 @@ module SysMODB
|
|
10
10
|
|
11
11
|
module SpreadsheetExtractor
|
12
12
|
|
13
|
-
|
14
|
-
COMMAND = "java -jar #{JAR_PATH}/simple-spreadsheet-extractor-0.3.2.jar"
|
13
|
+
DEFAULT_PATH = File.dirname(__FILE__) + "/../jars/simple-spreadsheet-extractor-0.4.1.jar"
|
15
14
|
|
16
15
|
def spreadsheet_to_xml spreadsheet_data
|
16
|
+
|
17
17
|
if RUBY_PLATFORM =~ /mswin32/
|
18
18
|
output = read_with_popen4 spreadsheet_data
|
19
19
|
else
|
@@ -23,13 +23,19 @@ module SysMODB
|
|
23
23
|
return output
|
24
24
|
end
|
25
25
|
|
26
|
+
|
27
|
+
|
28
|
+
def spreadsheet_extractor_command
|
29
|
+
"java -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
|
30
|
+
end
|
31
|
+
|
26
32
|
private
|
27
33
|
|
28
34
|
#opens using POpen4 - this is for the benefit of Windows. It has been found to be unstable in Linux and give occaisional segmentation faults
|
29
35
|
def read_with_popen4 spreadsheet_data
|
30
36
|
output=""
|
31
37
|
err_message = ""
|
32
|
-
status = POpen4::popen4(
|
38
|
+
status = POpen4::popen4(spreadsheet_extractor_command) do |stdout, stderr, stdin, pid|
|
33
39
|
stdin=stdin.binmode
|
34
40
|
spreadsheet_data.each_byte{|b| stdin.putc(b)}
|
35
41
|
stdin.close
|
@@ -49,7 +55,7 @@ module SysMODB
|
|
49
55
|
def read_with_open4 spreadsheet_data
|
50
56
|
output = ""
|
51
57
|
err_message = ""
|
52
|
-
status = Open4::popen4(
|
58
|
+
status = Open4::popen4(spreadsheet_extractor_command) do |pid, stdin, stdout, stderr|
|
53
59
|
while ((line = spreadsheet_data.gets) != nil) do
|
54
60
|
stdin << line
|
55
61
|
end
|
@@ -74,4 +80,4 @@ module SysMODB
|
|
74
80
|
end
|
75
81
|
|
76
82
|
end
|
77
|
-
end
|
83
|
+
end
|
@@ -1,40 +1,38 @@
|
|
1
1
|
# Generated by jeweler
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{simple-spreadsheet-extractor}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["Stuart Owen"]
|
12
|
-
s.date = %q{
|
11
|
+
s.authors = ["Stuart Owen", "Finn Bacall"]
|
12
|
+
s.date = %q{2011-01-18}
|
13
13
|
s.description = %q{Takes a stream to a spreadsheet file and produces and XML representation of its contents}
|
14
14
|
s.email = %q{stuart.owen@manchester.ac.uk}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENCE",
|
17
|
-
|
17
|
+
"README.rdoc"
|
18
18
|
]
|
19
19
|
s.files = [
|
20
|
-
"
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
"simple-spreadsheet-extractor.gemspec"
|
20
|
+
"LICENCE",
|
21
|
+
"README.rdoc",
|
22
|
+
"Rakefile",
|
23
|
+
"VERSION",
|
24
|
+
"doc/schema-v1.xsd",
|
25
|
+
"example.rb",
|
26
|
+
"jars/lib/dom4j-1.6.1.jar",
|
27
|
+
"jars/lib/poi-3.6.jar",
|
28
|
+
"jars/lib/poi-ooxml-3.6.jar",
|
29
|
+
"jars/lib/poi-ooxml-schemas-3.6.jar",
|
30
|
+
"jars/lib/xmlbeans-2.3.0.jar",
|
31
|
+
"jars/simple-spreadsheet-extractor-0.4.1.jar",
|
32
|
+
"lib/simple-spreadsheet-extractor.rb",
|
33
|
+
"simple-spreadsheet-extractor.gemspec"
|
35
34
|
]
|
36
35
|
s.homepage = %q{http://github.com/myGrid/simple-spreadsheet-extractor-gem}
|
37
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
38
36
|
s.require_paths = ["lib"]
|
39
37
|
s.rubygems_version = %q{1.3.6}
|
40
38
|
s.summary = %q{Basic spreadsheet content extraction using Apache POI}
|
data/test/test_extraction.rb
CHANGED
@@ -41,5 +41,13 @@ class TestExtraction < Test::Unit::TestCase
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
+
def test_for_segfault
|
45
|
+
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
46
|
+
10.times do |x|
|
47
|
+
f=open(test_sheet,"rb")
|
48
|
+
xml = spreadsheet_to_xml(f)
|
49
|
+
end
|
50
|
+
true
|
51
|
+
end
|
44
52
|
|
45
53
|
end
|
metadata
CHANGED
@@ -4,17 +4,18 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 4
|
8
|
+
- 2
|
9
|
+
version: 0.4.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Stuart Owen
|
13
|
+
- Finn Bacall
|
13
14
|
autorequire:
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date:
|
18
|
+
date: 2011-01-18 00:00:00 +00:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
@@ -55,7 +56,6 @@ extra_rdoc_files:
|
|
55
56
|
- LICENCE
|
56
57
|
- README.rdoc
|
57
58
|
files:
|
58
|
-
- .gitignore
|
59
59
|
- LICENCE
|
60
60
|
- README.rdoc
|
61
61
|
- Rakefile
|
@@ -67,7 +67,7 @@ files:
|
|
67
67
|
- jars/lib/poi-ooxml-3.6.jar
|
68
68
|
- jars/lib/poi-ooxml-schemas-3.6.jar
|
69
69
|
- jars/lib/xmlbeans-2.3.0.jar
|
70
|
-
- jars/simple-spreadsheet-extractor-0.
|
70
|
+
- jars/simple-spreadsheet-extractor-0.4.1.jar
|
71
71
|
- lib/simple-spreadsheet-extractor.rb
|
72
72
|
- simple-spreadsheet-extractor.gemspec
|
73
73
|
has_rdoc: true
|
@@ -75,8 +75,8 @@ homepage: http://github.com/myGrid/simple-spreadsheet-extractor-gem
|
|
75
75
|
licenses: []
|
76
76
|
|
77
77
|
post_install_message:
|
78
|
-
rdoc_options:
|
79
|
-
|
78
|
+
rdoc_options: []
|
79
|
+
|
80
80
|
require_paths:
|
81
81
|
- lib
|
82
82
|
required_ruby_version: !ruby/object:Gem::Requirement
|
data/.gitignore
DELETED
Binary file
|