simple-spreadsheet-extractor 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ *.gem
2
+ .project
3
+ .loadpath
data/README.rdoc CHANGED
@@ -1,7 +1,7 @@
1
1
  = Simple Spreadsheet Extractor
2
2
 
3
3
  Authors:: Stuart Owen
4
- Version:: 0.3.2
4
+ Version:: 0.3.3
5
5
  Contact:: mailto:stuart.owen@manchester.ac.uk
6
6
  Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
7
7
  Copyright:: (c) 2010 The University of Manchester, UK
@@ -21,6 +21,10 @@ Java 1.6 (JRE) is required.
21
21
 
22
22
  gem install simple-spreadsheet-extractor
23
23
 
24
+ Note that on Windows you must also do:
25
+
26
+ gem install win32-open3
27
+
24
28
  == Usage
25
29
 
26
30
  * require 'simple-spreadsheet-extractor'
@@ -30,12 +34,15 @@ Java 1.6 (JRE) is required.
30
34
 
31
35
  e.g.
32
36
 
37
+ #example.rb - takes path, i.e. ruby example.rb /tmp/spreadsheet.xls
33
38
  require 'rubygems'
34
39
  require 'simple-spreadsheet-extractor'
35
40
 
36
41
  include SysMODB::SpreadsheetExtractor
37
42
 
38
- f=open("/tmp/test-spreadsheet.xls")
43
+ path=ARGV.first
44
+
45
+ f=open(path)
39
46
  begin
40
47
  puts spreadsheet_to_xml(f)
41
48
  rescue SysMODB::SpreadsheetExtractionException=>e
@@ -46,7 +53,7 @@ Formulas are evaluated placing the result in the XML produced for that cell, how
46
53
 
47
54
  Row and column indexes start at 1, rather than 0, to keep consistent with namings of the cells in Excel.
48
55
 
49
- An XSD schema for the XML is available in doc/schema-v1.xsd
56
+ An XSD schema for the XML is available in doc/schema-v1.xsd["http://github.com/stuzart/simple-spreadsheet-extractor-gem/blob/master/doc/schema-v1.xsd]
50
57
 
51
58
  == Example XML
52
59
 
data/Rakefile ADDED
@@ -0,0 +1,37 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rake/rdoctask'
4
+ require 'rubygems'
5
+
6
+ require 'rake/gempackagetask'
7
+
8
+ task :default => [:test]
9
+
10
+ begin
11
+ require 'jeweler'
12
+ Jeweler::Tasks.new do |gemspec|
13
+ gemspec.name = "simple-spreadsheet-extractor"
14
+ gemspec.summary = "Basic spreadsheet content extraction using Apache POI"
15
+ gemspec.description = "Takes a stream to a spreadsheet file and produces and XML representation of its contents"
16
+ gemspec.email = "stuart.owen@manchester.ac.uk"
17
+ gemspec.homepage = "http://github.com/myGrid/simple-spreadsheet-extractor-gem"
18
+ gemspec.authors = ["Stuart Owen"]
19
+
20
+ gemspec.files.include %w(jars)
21
+ gemspec.files.exclude "test/*"
22
+ gemspec.extra_rdoc_files = ["README.rdoc", "LICENCE"]
23
+ gemspec.add_dependency("POpen4","0.1.4")
24
+ end
25
+ rescue LoadError
26
+ puts "Jeweler not available. Install it with: gem install jeweler"
27
+ end
28
+
29
+ task:test do
30
+ Rake::TestTask.new do |t|
31
+ t.libs << "test"
32
+ t.test_files = FileList['test/test*.rb']
33
+ t.verbose = true
34
+ end
35
+ end
36
+
37
+ #end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.3.3
data/doc/schema-v1.xsd ADDED
@@ -0,0 +1,56 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xsd:schema targetNamespace="http://www.sysmo-db.org/2010/xml/spreadsheet"
3
+ xml:lang="en" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns="http://www.sysmo-db.org/2010/xml/spreadsheet"
4
+ elementFormDefault="qualified">
5
+
6
+ <xsd:element name="workbook" type="Workbook"></xsd:element>
7
+
8
+ <xsd:complexType name="Workbook">
9
+ <xsd:sequence>
10
+ <xsd:element name="sheet" type="Sheet" minOccurs="0"
11
+ maxOccurs="unbounded" />
12
+ </xsd:sequence>
13
+ </xsd:complexType>
14
+
15
+ <xsd:complexType name="Sheet">
16
+ <xsd:sequence>
17
+ <xsd:element name="row" type="Row" minOccurs="0"
18
+ maxOccurs="unbounded" />
19
+ </xsd:sequence>
20
+ <xsd:attribute name="name" type="xsd:string" use="required"></xsd:attribute>
21
+ <xsd:attribute name="index" type="xsd:positiveInteger"
22
+ use="required"></xsd:attribute>
23
+ <xsd:attribute name="hidden" type="xsd:boolean" use="required"></xsd:attribute>
24
+ <xsd:attribute name="very_hidden" type="xsd:boolean"
25
+ use="required"></xsd:attribute>
26
+ <xsd:attribute name="first_row" type="xsd:positiveInteger"
27
+ use="required"></xsd:attribute>
28
+ <xsd:attribute name="last_row" type="xsd:positiveInteger"
29
+ use="required"></xsd:attribute>
30
+ </xsd:complexType>
31
+
32
+ <xsd:complexType name="Row">
33
+ <xsd:sequence>
34
+ <xsd:element name="cell" type="Cell" minOccurs="0"
35
+ maxOccurs="unbounded" />
36
+ </xsd:sequence>
37
+ <xsd:attribute name="index" type="xsd:positiveInteger"
38
+ use="required"></xsd:attribute>
39
+ </xsd:complexType>
40
+
41
+ <xsd:complexType name="Cell">
42
+ <xsd:simpleContent>
43
+ <xsd:extension base="xsd:string">
44
+ <xsd:attribute name="column" type="xsd:positiveInteger"
45
+ use="required"></xsd:attribute>
46
+ <xsd:attribute name="column_alpha" type="xsd:string"
47
+ use="required"></xsd:attribute>
48
+ <xsd:attribute name="row" type="xsd:positiveInteger"
49
+ use="required"></xsd:attribute>
50
+ <xsd:attribute name="type" type="xsd:string" use="required"></xsd:attribute>
51
+ <xsd:attribute name="formula" type="xsd:string" use="optional"></xsd:attribute>
52
+ </xsd:extension>
53
+ </xsd:simpleContent>
54
+ </xsd:complexType>
55
+
56
+ </xsd:schema>
@@ -1,5 +1,5 @@
1
1
  require 'rubygems'
2
- require 'open4'
2
+ require 'popen4'
3
3
 
4
4
  module SysMODB
5
5
 
@@ -14,24 +14,17 @@ module SysMODB
14
14
  command = "java -jar #{JAR_PATH}/simple-spreadsheet-extractor-0.3.2.jar"
15
15
  output = ""
16
16
  err_message = ""
17
- status = Open4::popen4(command) do |pid, stdin, stdout, stderr|
18
- while ((line = spreadsheet_data.gets) != nil) do
19
- stdin << line
20
- end
21
- stdin.close
17
+ status = POpen4::popen4(command) do |stdout, stderr, stdin, pid|
18
+ stdin=stdin.binmode
19
+ spreadsheet_data.each_byte{|b| stdin.putc(b)}
20
+ stdin.close
22
21
 
23
- while ((line = stdout.gets) != nil) do
24
- output << line
25
- end
26
- stdout.close
27
-
28
- while ((line=stderr.gets)!= nil) do
29
- err_message << line
30
- end
31
- stderr.close
22
+ output=stdout.read.strip
23
+ err_message=stderr.read.strip
24
+
32
25
  end
33
26
 
34
- if status.to_i != 0
27
+ if status.to_i != 0
35
28
  raise SpreadsheetExtractionException.new(err_message)
36
29
  end
37
30
 
@@ -0,0 +1,57 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{simple-spreadsheet-extractor}
8
+ s.version = "0.3.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Stuart Owen"]
12
+ s.date = %q{2010-07-28}
13
+ s.description = %q{Takes a stream to a spreadsheet file and produces and XML representation of its contents}
14
+ s.email = %q{stuart.owen@manchester.ac.uk}
15
+ s.extra_rdoc_files = [
16
+ "LICENCE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENCE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "doc/schema-v1.xsd",
26
+ "jars/lib/dom4j-1.6.1.jar",
27
+ "jars/lib/poi-3.6.jar",
28
+ "jars/lib/poi-ooxml-3.6.jar",
29
+ "jars/lib/poi-ooxml-schemas-3.6.jar",
30
+ "jars/lib/xmlbeans-2.3.0.jar",
31
+ "jars/simple-spreadsheet-extractor-0.3.2.jar",
32
+ "lib/simple-spreadsheet-extractor.rb",
33
+ "simple-spreadsheet-extractor.gemspec"
34
+ ]
35
+ s.homepage = %q{http://github.com/myGrid/simple-spreadsheet-extractor-gem}
36
+ s.rdoc_options = ["--charset=UTF-8"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = %q{1.3.7}
39
+ s.summary = %q{Basic spreadsheet content extraction using Apache POI}
40
+ s.test_files = [
41
+ "test/test_extraction.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
+ s.add_runtime_dependency(%q<POpen4>, ["= 0.1.4"])
50
+ else
51
+ s.add_dependency(%q<POpen4>, ["= 0.1.4"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<POpen4>, ["= 0.1.4"])
55
+ end
56
+ end
57
+
@@ -0,0 +1,45 @@
1
+ require 'test/unit'
2
+ require 'simple-spreadsheet-extractor'
3
+ require 'libxml'
4
+
5
+ class TestExtraction < Test::Unit::TestCase
6
+
7
+ SCHEMA_FILE_PATH = File.dirname(__FILE__) + "/../doc/schema-v1.xsd"
8
+
9
+ include SysMODB::SpreadsheetExtractor
10
+
11
+ def test_from_file_object
12
+ test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
13
+ f=open(test_sheet,"rb")
14
+ xml = spreadsheet_to_xml(f)
15
+ assert_not_nil xml
16
+ end
17
+
18
+ def test_validate_xml
19
+ test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
20
+ f=open(test_sheet,"rb")
21
+ xml = spreadsheet_to_xml(f)
22
+ validate_against_schema(xml)
23
+ end
24
+
25
+ def test_failure
26
+ test_sheet = File.dirname(__FILE__) + "/files/not-a-spreadsheet.xls"
27
+ f=open(test_sheet,"rb")
28
+ assert_raise SysMODB::SpreadsheetExtractionException do
29
+ spreadsheet_to_xml(f)
30
+ end
31
+ end
32
+
33
+ def validate_against_schema xml
34
+ document = LibXML::XML::Document.string(xml)
35
+ schema = LibXML::XML::Schema.new(SCHEMA_FILE_PATH)
36
+ begin
37
+ document.validate_schema(schema)
38
+ rescue LibXML::XML::Error => e
39
+ puts xml
40
+ assert false,"Error validating against schema: #{e.message}"
41
+ end
42
+ end
43
+
44
+
45
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-spreadsheet-extractor
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 2
10
- version: 0.3.2
9
+ - 3
10
+ version: 0.3.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Stuart Owen
@@ -15,23 +15,23 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-07-27 00:00:00 +01:00
18
+ date: 2010-07-28 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: open4
22
+ name: POpen4
23
23
  prerelease: false
24
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
25
  none: false
26
26
  requirements:
27
27
  - - "="
28
28
  - !ruby/object:Gem::Version
29
- hash: 21
29
+ hash: 19
30
30
  segments:
31
- - 1
32
31
  - 0
33
32
  - 1
34
- version: 1.0.1
33
+ - 4
34
+ version: 0.1.4
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  description: Takes a stream to a spreadsheet file and produces and XML representation of its contents
@@ -41,25 +41,31 @@ executables: []
41
41
  extensions: []
42
42
 
43
43
  extra_rdoc_files:
44
- - README.rdoc
45
44
  - LICENCE
45
+ - README.rdoc
46
46
  files:
47
- - lib/simple-spreadsheet-extractor.rb
48
- - jars/lib/poi-ooxml-schemas-3.6.jar
49
- - jars/lib/poi-3.6.jar
50
- - jars/lib/xmlbeans-2.3.0.jar
47
+ - .gitignore
48
+ - LICENCE
49
+ - README.rdoc
50
+ - Rakefile
51
+ - VERSION
52
+ - doc/schema-v1.xsd
51
53
  - jars/lib/dom4j-1.6.1.jar
54
+ - jars/lib/poi-3.6.jar
52
55
  - jars/lib/poi-ooxml-3.6.jar
56
+ - jars/lib/poi-ooxml-schemas-3.6.jar
57
+ - jars/lib/xmlbeans-2.3.0.jar
53
58
  - jars/simple-spreadsheet-extractor-0.3.2.jar
54
- - README.rdoc
55
- - LICENCE
59
+ - lib/simple-spreadsheet-extractor.rb
60
+ - simple-spreadsheet-extractor.gemspec
61
+ - test/test_extraction.rb
56
62
  has_rdoc: true
57
63
  homepage: http://github.com/myGrid/simple-spreadsheet-extractor-gem
58
64
  licenses: []
59
65
 
60
66
  post_install_message:
61
- rdoc_options: []
62
-
67
+ rdoc_options:
68
+ - --charset=UTF-8
63
69
  require_paths:
64
70
  - lib
65
71
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -87,5 +93,5 @@ rubygems_version: 1.3.7
87
93
  signing_key:
88
94
  specification_version: 3
89
95
  summary: Basic spreadsheet content extraction using Apache POI
90
- test_files: []
91
-
96
+ test_files:
97
+ - test/test_extraction.rb