simple-spreadsheet-extractor 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/README.rdoc +10 -3
- data/Rakefile +37 -0
- data/VERSION +1 -0
- data/doc/schema-v1.xsd +56 -0
- data/lib/simple-spreadsheet-extractor.rb +9 -16
- data/simple-spreadsheet-extractor.gemspec +57 -0
- data/test/test_extraction.rb +45 -0
- metadata +25 -19
data/.gitignore
ADDED
data/README.rdoc
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
= Simple Spreadsheet Extractor
|
2
2
|
|
3
3
|
Authors:: Stuart Owen
|
4
|
-
Version:: 0.3.
|
4
|
+
Version:: 0.3.3
|
5
5
|
Contact:: mailto:stuart.owen@manchester.ac.uk
|
6
6
|
Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
|
7
7
|
Copyright:: (c) 2010 The University of Manchester, UK
|
@@ -21,6 +21,10 @@ Java 1.6 (JRE) is required.
|
|
21
21
|
|
22
22
|
gem install simple-spreadsheet-extractor
|
23
23
|
|
24
|
+
Note that on Windows you must also do:
|
25
|
+
|
26
|
+
gem install win32-open3
|
27
|
+
|
24
28
|
== Usage
|
25
29
|
|
26
30
|
* require 'simple-spreadsheet-extractor'
|
@@ -30,12 +34,15 @@ Java 1.6 (JRE) is required.
|
|
30
34
|
|
31
35
|
e.g.
|
32
36
|
|
37
|
+
#example.rb - takes path, i.e. ruby example.rb /tmp/spreadsheet.xls
|
33
38
|
require 'rubygems'
|
34
39
|
require 'simple-spreadsheet-extractor'
|
35
40
|
|
36
41
|
include SysMODB::SpreadsheetExtractor
|
37
42
|
|
38
|
-
|
43
|
+
path=ARGV.first
|
44
|
+
|
45
|
+
f=open(path)
|
39
46
|
begin
|
40
47
|
puts spreadsheet_to_xml(f)
|
41
48
|
rescue SysMODB::SpreadsheetExtractionException=>e
|
@@ -46,7 +53,7 @@ Formulas are evaluated placing the result in the XML produced for that cell, how
|
|
46
53
|
|
47
54
|
Row and column indexes start at 1, rather than 0, to keep consistent with namings of the cells in Excel.
|
48
55
|
|
49
|
-
An XSD schema for the XML is available in doc/schema-v1.xsd
|
56
|
+
An XSD schema for the XML is available in doc/schema-v1.xsd["http://github.com/stuzart/simple-spreadsheet-extractor-gem/blob/master/doc/schema-v1.xsd]
|
50
57
|
|
51
58
|
== Example XML
|
52
59
|
|
data/Rakefile
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
|
8
|
+
task :default => [:test]
|
9
|
+
|
10
|
+
begin
|
11
|
+
require 'jeweler'
|
12
|
+
Jeweler::Tasks.new do |gemspec|
|
13
|
+
gemspec.name = "simple-spreadsheet-extractor"
|
14
|
+
gemspec.summary = "Basic spreadsheet content extraction using Apache POI"
|
15
|
+
gemspec.description = "Takes a stream to a spreadsheet file and produces and XML representation of its contents"
|
16
|
+
gemspec.email = "stuart.owen@manchester.ac.uk"
|
17
|
+
gemspec.homepage = "http://github.com/myGrid/simple-spreadsheet-extractor-gem"
|
18
|
+
gemspec.authors = ["Stuart Owen"]
|
19
|
+
|
20
|
+
gemspec.files.include %w(jars)
|
21
|
+
gemspec.files.exclude "test/*"
|
22
|
+
gemspec.extra_rdoc_files = ["README.rdoc", "LICENCE"]
|
23
|
+
gemspec.add_dependency("POpen4","0.1.4")
|
24
|
+
end
|
25
|
+
rescue LoadError
|
26
|
+
puts "Jeweler not available. Install it with: gem install jeweler"
|
27
|
+
end
|
28
|
+
|
29
|
+
task:test do
|
30
|
+
Rake::TestTask.new do |t|
|
31
|
+
t.libs << "test"
|
32
|
+
t.test_files = FileList['test/test*.rb']
|
33
|
+
t.verbose = true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
#end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.3.3
|
data/doc/schema-v1.xsd
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<xsd:schema targetNamespace="http://www.sysmo-db.org/2010/xml/spreadsheet"
|
3
|
+
xml:lang="en" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns="http://www.sysmo-db.org/2010/xml/spreadsheet"
|
4
|
+
elementFormDefault="qualified">
|
5
|
+
|
6
|
+
<xsd:element name="workbook" type="Workbook"></xsd:element>
|
7
|
+
|
8
|
+
<xsd:complexType name="Workbook">
|
9
|
+
<xsd:sequence>
|
10
|
+
<xsd:element name="sheet" type="Sheet" minOccurs="0"
|
11
|
+
maxOccurs="unbounded" />
|
12
|
+
</xsd:sequence>
|
13
|
+
</xsd:complexType>
|
14
|
+
|
15
|
+
<xsd:complexType name="Sheet">
|
16
|
+
<xsd:sequence>
|
17
|
+
<xsd:element name="row" type="Row" minOccurs="0"
|
18
|
+
maxOccurs="unbounded" />
|
19
|
+
</xsd:sequence>
|
20
|
+
<xsd:attribute name="name" type="xsd:string" use="required"></xsd:attribute>
|
21
|
+
<xsd:attribute name="index" type="xsd:positiveInteger"
|
22
|
+
use="required"></xsd:attribute>
|
23
|
+
<xsd:attribute name="hidden" type="xsd:boolean" use="required"></xsd:attribute>
|
24
|
+
<xsd:attribute name="very_hidden" type="xsd:boolean"
|
25
|
+
use="required"></xsd:attribute>
|
26
|
+
<xsd:attribute name="first_row" type="xsd:positiveInteger"
|
27
|
+
use="required"></xsd:attribute>
|
28
|
+
<xsd:attribute name="last_row" type="xsd:positiveInteger"
|
29
|
+
use="required"></xsd:attribute>
|
30
|
+
</xsd:complexType>
|
31
|
+
|
32
|
+
<xsd:complexType name="Row">
|
33
|
+
<xsd:sequence>
|
34
|
+
<xsd:element name="cell" type="Cell" minOccurs="0"
|
35
|
+
maxOccurs="unbounded" />
|
36
|
+
</xsd:sequence>
|
37
|
+
<xsd:attribute name="index" type="xsd:positiveInteger"
|
38
|
+
use="required"></xsd:attribute>
|
39
|
+
</xsd:complexType>
|
40
|
+
|
41
|
+
<xsd:complexType name="Cell">
|
42
|
+
<xsd:simpleContent>
|
43
|
+
<xsd:extension base="xsd:string">
|
44
|
+
<xsd:attribute name="column" type="xsd:positiveInteger"
|
45
|
+
use="required"></xsd:attribute>
|
46
|
+
<xsd:attribute name="column_alpha" type="xsd:string"
|
47
|
+
use="required"></xsd:attribute>
|
48
|
+
<xsd:attribute name="row" type="xsd:positiveInteger"
|
49
|
+
use="required"></xsd:attribute>
|
50
|
+
<xsd:attribute name="type" type="xsd:string" use="required"></xsd:attribute>
|
51
|
+
<xsd:attribute name="formula" type="xsd:string" use="optional"></xsd:attribute>
|
52
|
+
</xsd:extension>
|
53
|
+
</xsd:simpleContent>
|
54
|
+
</xsd:complexType>
|
55
|
+
|
56
|
+
</xsd:schema>
|
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require '
|
2
|
+
require 'popen4'
|
3
3
|
|
4
4
|
module SysMODB
|
5
5
|
|
@@ -14,24 +14,17 @@ module SysMODB
|
|
14
14
|
command = "java -jar #{JAR_PATH}/simple-spreadsheet-extractor-0.3.2.jar"
|
15
15
|
output = ""
|
16
16
|
err_message = ""
|
17
|
-
status =
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
stdin.close
|
17
|
+
status = POpen4::popen4(command) do |stdout, stderr, stdin, pid|
|
18
|
+
stdin=stdin.binmode
|
19
|
+
spreadsheet_data.each_byte{|b| stdin.putc(b)}
|
20
|
+
stdin.close
|
22
21
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
stdout.close
|
27
|
-
|
28
|
-
while ((line=stderr.gets)!= nil) do
|
29
|
-
err_message << line
|
30
|
-
end
|
31
|
-
stderr.close
|
22
|
+
output=stdout.read.strip
|
23
|
+
err_message=stderr.read.strip
|
24
|
+
|
32
25
|
end
|
33
26
|
|
34
|
-
if status.to_i != 0
|
27
|
+
if status.to_i != 0
|
35
28
|
raise SpreadsheetExtractionException.new(err_message)
|
36
29
|
end
|
37
30
|
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{simple-spreadsheet-extractor}
|
8
|
+
s.version = "0.3.3"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Stuart Owen"]
|
12
|
+
s.date = %q{2010-07-28}
|
13
|
+
s.description = %q{Takes a stream to a spreadsheet file and produces and XML representation of its contents}
|
14
|
+
s.email = %q{stuart.owen@manchester.ac.uk}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENCE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".gitignore",
|
21
|
+
"LICENCE",
|
22
|
+
"README.rdoc",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION",
|
25
|
+
"doc/schema-v1.xsd",
|
26
|
+
"jars/lib/dom4j-1.6.1.jar",
|
27
|
+
"jars/lib/poi-3.6.jar",
|
28
|
+
"jars/lib/poi-ooxml-3.6.jar",
|
29
|
+
"jars/lib/poi-ooxml-schemas-3.6.jar",
|
30
|
+
"jars/lib/xmlbeans-2.3.0.jar",
|
31
|
+
"jars/simple-spreadsheet-extractor-0.3.2.jar",
|
32
|
+
"lib/simple-spreadsheet-extractor.rb",
|
33
|
+
"simple-spreadsheet-extractor.gemspec"
|
34
|
+
]
|
35
|
+
s.homepage = %q{http://github.com/myGrid/simple-spreadsheet-extractor-gem}
|
36
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
37
|
+
s.require_paths = ["lib"]
|
38
|
+
s.rubygems_version = %q{1.3.7}
|
39
|
+
s.summary = %q{Basic spreadsheet content extraction using Apache POI}
|
40
|
+
s.test_files = [
|
41
|
+
"test/test_extraction.rb"
|
42
|
+
]
|
43
|
+
|
44
|
+
if s.respond_to? :specification_version then
|
45
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
46
|
+
s.specification_version = 3
|
47
|
+
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_runtime_dependency(%q<POpen4>, ["= 0.1.4"])
|
50
|
+
else
|
51
|
+
s.add_dependency(%q<POpen4>, ["= 0.1.4"])
|
52
|
+
end
|
53
|
+
else
|
54
|
+
s.add_dependency(%q<POpen4>, ["= 0.1.4"])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'simple-spreadsheet-extractor'
|
3
|
+
require 'libxml'
|
4
|
+
|
5
|
+
class TestExtraction < Test::Unit::TestCase
|
6
|
+
|
7
|
+
SCHEMA_FILE_PATH = File.dirname(__FILE__) + "/../doc/schema-v1.xsd"
|
8
|
+
|
9
|
+
include SysMODB::SpreadsheetExtractor
|
10
|
+
|
11
|
+
def test_from_file_object
|
12
|
+
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
13
|
+
f=open(test_sheet,"rb")
|
14
|
+
xml = spreadsheet_to_xml(f)
|
15
|
+
assert_not_nil xml
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_validate_xml
|
19
|
+
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
20
|
+
f=open(test_sheet,"rb")
|
21
|
+
xml = spreadsheet_to_xml(f)
|
22
|
+
validate_against_schema(xml)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_failure
|
26
|
+
test_sheet = File.dirname(__FILE__) + "/files/not-a-spreadsheet.xls"
|
27
|
+
f=open(test_sheet,"rb")
|
28
|
+
assert_raise SysMODB::SpreadsheetExtractionException do
|
29
|
+
spreadsheet_to_xml(f)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def validate_against_schema xml
|
34
|
+
document = LibXML::XML::Document.string(xml)
|
35
|
+
schema = LibXML::XML::Schema.new(SCHEMA_FILE_PATH)
|
36
|
+
begin
|
37
|
+
document.validate_schema(schema)
|
38
|
+
rescue LibXML::XML::Error => e
|
39
|
+
puts xml
|
40
|
+
assert false,"Error validating against schema: #{e.message}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-spreadsheet-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 3
|
10
|
+
version: 0.3.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Stuart Owen
|
@@ -15,23 +15,23 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-07-
|
18
|
+
date: 2010-07-28 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
22
|
+
name: POpen4
|
23
23
|
prerelease: false
|
24
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
27
|
- - "="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 19
|
30
30
|
segments:
|
31
|
-
- 1
|
32
31
|
- 0
|
33
32
|
- 1
|
34
|
-
|
33
|
+
- 4
|
34
|
+
version: 0.1.4
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: *id001
|
37
37
|
description: Takes a stream to a spreadsheet file and produces and XML representation of its contents
|
@@ -41,25 +41,31 @@ executables: []
|
|
41
41
|
extensions: []
|
42
42
|
|
43
43
|
extra_rdoc_files:
|
44
|
-
- README.rdoc
|
45
44
|
- LICENCE
|
45
|
+
- README.rdoc
|
46
46
|
files:
|
47
|
-
-
|
48
|
-
-
|
49
|
-
-
|
50
|
-
-
|
47
|
+
- .gitignore
|
48
|
+
- LICENCE
|
49
|
+
- README.rdoc
|
50
|
+
- Rakefile
|
51
|
+
- VERSION
|
52
|
+
- doc/schema-v1.xsd
|
51
53
|
- jars/lib/dom4j-1.6.1.jar
|
54
|
+
- jars/lib/poi-3.6.jar
|
52
55
|
- jars/lib/poi-ooxml-3.6.jar
|
56
|
+
- jars/lib/poi-ooxml-schemas-3.6.jar
|
57
|
+
- jars/lib/xmlbeans-2.3.0.jar
|
53
58
|
- jars/simple-spreadsheet-extractor-0.3.2.jar
|
54
|
-
-
|
55
|
-
-
|
59
|
+
- lib/simple-spreadsheet-extractor.rb
|
60
|
+
- simple-spreadsheet-extractor.gemspec
|
61
|
+
- test/test_extraction.rb
|
56
62
|
has_rdoc: true
|
57
63
|
homepage: http://github.com/myGrid/simple-spreadsheet-extractor-gem
|
58
64
|
licenses: []
|
59
65
|
|
60
66
|
post_install_message:
|
61
|
-
rdoc_options:
|
62
|
-
|
67
|
+
rdoc_options:
|
68
|
+
- --charset=UTF-8
|
63
69
|
require_paths:
|
64
70
|
- lib
|
65
71
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -87,5 +93,5 @@ rubygems_version: 1.3.7
|
|
87
93
|
signing_key:
|
88
94
|
specification_version: 3
|
89
95
|
summary: Basic spreadsheet content extraction using Apache POI
|
90
|
-
test_files:
|
91
|
-
|
96
|
+
test_files:
|
97
|
+
- test/test_extraction.rb
|