simple-spreadsheet-extractor 0.3.5 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  = Simple Spreadsheet Extractor
2
2
 
3
- Authors:: Stuart Owen
4
- Version:: 0.3.4
3
+ Authors:: Finn Bacall, Stuart Owen
4
+ Version:: 0.4.0
5
5
  Contact:: mailto:stuart.owen@manchester.ac.uk
6
6
  Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
7
7
  Copyright:: (c) 2010 The University of Manchester, UK
@@ -53,31 +53,56 @@ Formulas are evaluated placing the result in the XML produced for that cell, how
53
53
 
54
54
  Row and column indexes start at 1, rather than 0, to keep consistent with namings of the cells in Excel.
55
55
 
56
- An XSD schema for the XML is available in doc/schema-v1.xsd["http://github.com/stuzart/simple-spreadsheet-extractor-gem/blob/master/doc/schema-v1.xsd]
56
+ An XSD schema for the XML is available in doc/schema-v1.xsd["http://github.com/fbacall/simple-spreadsheet-extractor-gem/blob/master/doc/schema-v1.xsd"]
57
+
58
+ The desired spreadsheet extractor jar can be specified by defining SPREADSHEET_EXTRACTOR_JAR_PATH in a config file (e.g. environment.rb)
57
59
 
58
60
  == Example XML
59
61
 
60
62
  <?xml version="1.0" encoding="UTF-8"?>
61
63
  <workbook xmlns="http://www.sysmo-db.org/2010/xml/spreadsheet">
62
- <sheet name="Sheet1" index="1" hidden="false" very_hidden="false" first_row="1" last_row="5">
63
- <row index="1">
64
- <cell column="1" column_alpha="A" row="1" type="numeric">12.0</cell>
65
- <cell column="2" column_alpha="B" row="1" type="numeric">654153.0</cell>
66
- <cell column="27" column_alpha="AA" row="1" type="string">AA</cell>
67
- </row>
64
+ <sheet name="Sheet1" index="1" hidden="false" very_hidden="false" first_row="2" last_row="8">
68
65
  <row index="2">
69
- <cell column="1" column_alpha="A" row="2" type="numeric">547654.0</cell>
66
+ <cell column="2" column_alpha="B" row="2" type="string">test2</cell>
67
+ <cell column="3" column_alpha="C" row="2" type="string">test</cell>
68
+ <cell column="4" column_alpha="D" row="2" type="string">test3</cell>
70
69
  </row>
70
+
71
71
  <row index="3">
72
- <cell column="1" column_alpha="A" row="3" type="numeric">45465.0</cell>
72
+ <cell column="2" column_alpha="B" row="3" type="string">a</cell>
73
+ <cell column="3" column_alpha="C" row="3" type="numeric">1.0</cell>
74
+ <cell column="4" column_alpha="D" row="3" type="numeric">22.0</cell>
73
75
  </row>
74
76
  <row index="4">
75
- <cell column="1" column_alpha="A" row="4" type="numeric" formula="A1+1">13.0</cell>
77
+ <cell column="2" column_alpha="B" row="4" type="string">b</cell>
78
+
79
+ <cell column="3" column_alpha="C" row="4" type="numeric">2.0</cell>
80
+ <cell column="4" column_alpha="D" row="4" type="numeric">5.0</cell>
76
81
  </row>
77
82
  <row index="5">
78
- <cell column="1" column_alpha="A" row="5" type="numeric" formula="SUM(A1:A4)">593144.0</cell>
83
+ <cell column="2" column_alpha="B" row="5" type="string">c</cell>
84
+ <cell column="3" column_alpha="C" row="5" type="numeric">3.0</cell>
85
+ <cell column="4" column_alpha="D" row="5" type="numeric">1.0</cell>
86
+
87
+ </row>
88
+ <row index="6">
89
+ <cell column="2" column_alpha="B" row="6" type="string">d</cell>
90
+ <cell column="3" column_alpha="C" row="6" type="numeric">4.0</cell>
91
+ <cell column="4" column_alpha="D" row="6" type="numeric">5.0</cell>
92
+ </row>
93
+ <row index="7">
94
+
95
+ <cell column="2" column_alpha="B" row="7" type="string">e</cell>
96
+ <cell column="3" column_alpha="C" row="7" type="numeric">5.0</cell>
97
+ <cell column="4" column_alpha="D" row="7" type="numeric">6.0</cell>
98
+ </row>
99
+ <row index="8">
100
+ <cell column="2" column_alpha="B" row="8" type="string">total</cell>
101
+ <cell column="3" column_alpha="C" row="8" type="numeric" formula="SUM(C3:C7)">15.0</cell>
102
+
103
+ <cell column="4" column_alpha="D" row="8" type="numeric" formula="SUM(D3:D7)">39.0</cell>
79
104
  </row>
80
105
  </sheet>
81
106
  <sheet name="Sheet2" index="2" hidden="false" very_hidden="false" first_row="1" last_row="1"/>
82
107
  <sheet name="Sheet3" index="3" hidden="false" very_hidden="false" first_row="1" last_row="1"/>
83
- </workbook>
108
+ </workbook>
data/Rakefile CHANGED
@@ -15,7 +15,7 @@ begin
15
15
  gemspec.description = "Takes a stream to a spreadsheet file and produces and XML representation of its contents"
16
16
  gemspec.email = "stuart.owen@manchester.ac.uk"
17
17
  gemspec.homepage = "http://github.com/myGrid/simple-spreadsheet-extractor-gem"
18
- gemspec.authors = ["Stuart Owen"]
18
+ gemspec.authors = ["Stuart Owen","Finn Bacall"]
19
19
 
20
20
  gemspec.files.include %w(jars)
21
21
  gemspec.files.exclude "test/*"
@@ -35,4 +35,4 @@ task:test do
35
35
  end
36
36
  end
37
37
 
38
- #end
38
+ #end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.5
1
+ 0.4.2
@@ -7,13 +7,52 @@
7
7
 
8
8
  <xsd:complexType name="Workbook">
9
9
  <xsd:sequence>
10
+ <xsd:element name="styles" type="Styles" minOccurs="1"
11
+ maxOccurs="1" />
10
12
  <xsd:element name="sheet" type="Sheet" minOccurs="0"
11
13
  maxOccurs="unbounded" />
12
14
  </xsd:sequence>
13
15
  </xsd:complexType>
14
16
 
17
+ <xsd:complexType name="Styles">
18
+ <xsd:sequence>
19
+ <xsd:element name="style" type="Style" minOccurs="0"
20
+ maxOccurs="unbounded" />
21
+ </xsd:sequence>
22
+ </xsd:complexType>
23
+
24
+ <xsd:complexType name="Style">
25
+ <xsd:sequence>
26
+ <xsd:element name="border-top" type="xsd:string" minOccurs="0"
27
+ maxOccurs="1" />
28
+ <xsd:element name="border-bottom" type="xsd:string" minOccurs="0"
29
+ maxOccurs="1" />
30
+ <xsd:element name="border-left" type="xsd:string" minOccurs="0"
31
+ maxOccurs="1" />
32
+ <xsd:element name="border-right" type="xsd:string" minOccurs="0"
33
+ maxOccurs="1" />
34
+ <xsd:element name="background-color" type="xsd:string" minOccurs="0"
35
+ maxOccurs="1" />
36
+ <xsd:element name="font-weight" type="xsd:string" minOccurs="0"
37
+ maxOccurs="1" />
38
+ <xsd:element name="font-style" type="xsd:string" minOccurs="0"
39
+ maxOccurs="1" />
40
+ <xsd:element name="text-decoration" type="xsd:string" minOccurs="0"
41
+ maxOccurs="1" />
42
+ <xsd:element name="font-size" type="xsd:string" minOccurs="0"
43
+ maxOccurs="1" />
44
+ <xsd:element name="font-family" type="xsd:string" minOccurs="0"
45
+ maxOccurs="1" />
46
+ <xsd:element name="color" type="xsd:string" minOccurs="0"
47
+ maxOccurs="1" />
48
+ </xsd:sequence>
49
+ <xsd:attribute name="id" type="xsd:string" use="required"></xsd:attribute>
50
+ </xsd:complexType>
51
+
15
52
  <xsd:complexType name="Sheet">
16
53
  <xsd:sequence>
54
+ <xsd:element name="columns" type="Columns" minOccurs="1"
55
+ maxOccurs="1" />
17
56
  <xsd:element name="row" type="Row" minOccurs="0"
18
57
  maxOccurs="unbounded" />
19
58
  </xsd:sequence>
@@ -29,6 +68,26 @@
29
68
  use="required"></xsd:attribute>
30
69
  </xsd:complexType>
31
70
 
71
+ <xsd:complexType name="Columns">
72
+ <xsd:sequence>
73
+ <xsd:element name="column" type="Column" minOccurs="0"
74
+ maxOccurs="unbounded" />
75
+ </xsd:sequence>
76
+ <xsd:attribute name="first_column" type="xsd:positiveInteger"
77
+ use="required"></xsd:attribute>
78
+ <xsd:attribute name="last_column" type="xsd:positiveInteger"
79
+ use="required"></xsd:attribute>
80
+ </xsd:complexType>
81
+
82
+ <xsd:complexType name="Column">
83
+ <xsd:attribute name="index" type="xsd:positiveInteger"
84
+ use="required"></xsd:attribute>
85
+ <xsd:attribute name="column_alpha" type="xsd:string"
86
+ use="required"></xsd:attribute>
87
+ <xsd:attribute name="width" type="xsd:positiveInteger"
88
+ use="optional"></xsd:attribute>
89
+ </xsd:complexType>
90
+
32
91
  <xsd:complexType name="Row">
33
92
  <xsd:sequence>
34
93
  <xsd:element name="cell" type="Cell" minOccurs="0"
@@ -36,6 +95,8 @@
36
95
  </xsd:sequence>
37
96
  <xsd:attribute name="index" type="xsd:positiveInteger"
38
97
  use="required"></xsd:attribute>
98
+ <xsd:attribute name="height" type="xsd:string"
99
+ use="optional"></xsd:attribute>
39
100
  </xsd:complexType>
40
101
 
41
102
  <xsd:complexType name="Cell">
@@ -49,6 +110,7 @@
49
110
  use="required"></xsd:attribute>
50
111
  <xsd:attribute name="type" type="xsd:string" use="required"></xsd:attribute>
51
112
  <xsd:attribute name="formula" type="xsd:string" use="optional"></xsd:attribute>
113
+ <xsd:attribute name="style" type="xsd:string" use="optional"></xsd:attribute>
52
114
  </xsd:extension>
53
115
  </xsd:simpleContent>
54
116
  </xsd:complexType>
@@ -10,10 +10,10 @@ module SysMODB
10
10
 
11
11
  module SpreadsheetExtractor
12
12
 
13
- JAR_PATH = File.dirname(__FILE__) + "/../jars"
14
- COMMAND = "java -jar #{JAR_PATH}/simple-spreadsheet-extractor-0.3.2.jar"
13
+ DEFAULT_PATH = File.dirname(__FILE__) + "/../jars/simple-spreadsheet-extractor-0.4.1.jar"
15
14
 
16
15
  def spreadsheet_to_xml spreadsheet_data
16
+
17
17
  if RUBY_PLATFORM =~ /mswin32/
18
18
  output = read_with_popen4 spreadsheet_data
19
19
  else
@@ -23,13 +23,19 @@ module SysMODB
23
23
  return output
24
24
  end
25
25
 
26
+
27
+
28
+ def spreadsheet_extractor_command
29
+ "java -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
30
+ end
31
+
26
32
  private
27
33
 
28
34
  #opens using POpen4 - this is for the benefit of Windows. It has been found to be unstable in Linux and give occaisional segmentation faults
29
35
  def read_with_popen4 spreadsheet_data
30
36
  output=""
31
37
  err_message = ""
32
- status = POpen4::popen4(COMMAND) do |stdout, stderr, stdin, pid|
38
+ status = POpen4::popen4(spreadsheet_extractor_command) do |stdout, stderr, stdin, pid|
33
39
  stdin=stdin.binmode
34
40
  spreadsheet_data.each_byte{|b| stdin.putc(b)}
35
41
  stdin.close
@@ -49,7 +55,7 @@ module SysMODB
49
55
  def read_with_open4 spreadsheet_data
50
56
  output = ""
51
57
  err_message = ""
52
- status = Open4::popen4(COMMAND) do |pid, stdin, stdout, stderr|
58
+ status = Open4::popen4(spreadsheet_extractor_command) do |pid, stdin, stdout, stderr|
53
59
  while ((line = spreadsheet_data.gets) != nil) do
54
60
  stdin << line
55
61
  end
@@ -74,4 +80,4 @@ module SysMODB
74
80
  end
75
81
 
76
82
  end
77
- end
83
+ end
@@ -1,40 +1,38 @@
1
1
  # Generated by jeweler
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{simple-spreadsheet-extractor}
8
- s.version = "0.3.5"
8
+ s.version = "0.4.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Stuart Owen"]
12
- s.date = %q{2010-11-02}
11
+ s.authors = ["Stuart Owen", "Finn Bacall"]
12
+ s.date = %q{2011-01-18}
13
13
  s.description = %q{Takes a stream to a spreadsheet file and produces and XML representation of its contents}
14
14
  s.email = %q{stuart.owen@manchester.ac.uk}
15
15
  s.extra_rdoc_files = [
16
16
  "LICENCE",
17
- "README.rdoc"
17
+ "README.rdoc"
18
18
  ]
19
19
  s.files = [
20
- ".gitignore",
21
- "LICENCE",
22
- "README.rdoc",
23
- "Rakefile",
24
- "VERSION",
25
- "doc/schema-v1.xsd",
26
- "example.rb",
27
- "jars/lib/dom4j-1.6.1.jar",
28
- "jars/lib/poi-3.6.jar",
29
- "jars/lib/poi-ooxml-3.6.jar",
30
- "jars/lib/poi-ooxml-schemas-3.6.jar",
31
- "jars/lib/xmlbeans-2.3.0.jar",
32
- "jars/simple-spreadsheet-extractor-0.3.2.jar",
33
- "lib/simple-spreadsheet-extractor.rb",
34
- "simple-spreadsheet-extractor.gemspec"
20
+ "LICENCE",
21
+ "README.rdoc",
22
+ "Rakefile",
23
+ "VERSION",
24
+ "doc/schema-v1.xsd",
25
+ "example.rb",
26
+ "jars/lib/dom4j-1.6.1.jar",
27
+ "jars/lib/poi-3.6.jar",
28
+ "jars/lib/poi-ooxml-3.6.jar",
29
+ "jars/lib/poi-ooxml-schemas-3.6.jar",
30
+ "jars/lib/xmlbeans-2.3.0.jar",
31
+ "jars/simple-spreadsheet-extractor-0.4.1.jar",
32
+ "lib/simple-spreadsheet-extractor.rb",
33
+ "simple-spreadsheet-extractor.gemspec"
35
34
  ]
36
35
  s.homepage = %q{http://github.com/myGrid/simple-spreadsheet-extractor-gem}
37
- s.rdoc_options = ["--charset=UTF-8"]
38
36
  s.require_paths = ["lib"]
39
37
  s.rubygems_version = %q{1.3.6}
40
38
  s.summary = %q{Basic spreadsheet content extraction using Apache POI}
@@ -41,5 +41,13 @@ class TestExtraction < Test::Unit::TestCase
41
41
  end
42
42
  end
43
43
 
44
+ def test_for_segfault
45
+ test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
46
+ 10.times do |x|
47
+ f=open(test_sheet,"rb")
48
+ xml = spreadsheet_to_xml(f)
49
+ end
50
+ true
51
+ end
44
52
 
45
53
  end
metadata CHANGED
@@ -4,17 +4,18 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 3
8
- - 5
9
- version: 0.3.5
7
+ - 4
8
+ - 2
9
+ version: 0.4.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Stuart Owen
13
+ - Finn Bacall
13
14
  autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-11-02 00:00:00 +00:00
18
+ date: 2011-01-18 00:00:00 +00:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
@@ -55,7 +56,6 @@ extra_rdoc_files:
55
56
  - LICENCE
56
57
  - README.rdoc
57
58
  files:
58
- - .gitignore
59
59
  - LICENCE
60
60
  - README.rdoc
61
61
  - Rakefile
@@ -67,7 +67,7 @@ files:
67
67
  - jars/lib/poi-ooxml-3.6.jar
68
68
  - jars/lib/poi-ooxml-schemas-3.6.jar
69
69
  - jars/lib/xmlbeans-2.3.0.jar
70
- - jars/simple-spreadsheet-extractor-0.3.2.jar
70
+ - jars/simple-spreadsheet-extractor-0.4.1.jar
71
71
  - lib/simple-spreadsheet-extractor.rb
72
72
  - simple-spreadsheet-extractor.gemspec
73
73
  has_rdoc: true
@@ -75,8 +75,8 @@ homepage: http://github.com/myGrid/simple-spreadsheet-extractor-gem
75
75
  licenses: []
76
76
 
77
77
  post_install_message:
78
- rdoc_options:
79
- - --charset=UTF-8
78
+ rdoc_options: []
79
+
80
80
  require_paths:
81
81
  - lib
82
82
  required_ruby_version: !ruby/object:Gem::Requirement
data/.gitignore DELETED
@@ -1,3 +0,0 @@
1
- *.gem
2
- .project
3
- .loadpath