simple-spreadsheet-extractor 0.4.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  = Simple Spreadsheet Extractor
2
2
 
3
- Authors:: Finn Bacall, Stuart Owen
4
- Version:: 0.4.0
3
+ Authors:: Stuart Owen, Finn Bacall
4
+ Version:: 0.5.0
5
5
  Contact:: mailto:stuart.owen@manchester.ac.uk
6
6
  Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
7
7
  Copyright:: (c) 2010 The University of Manchester, UK
@@ -11,6 +11,8 @@ Copyright:: (c) 2010 The University of Manchester, UK
11
11
 
12
12
  This is a simple gem that provides a facility to read an XLS or XLSX Excel spreadsheet document and produce an XML representation of its content.
13
13
 
14
+ CSV output can also be generated for a single sheet.
15
+
14
16
  Internally it uses Apache POI, using the sister http://github.com/myGrid/simple-spreadsheet-extractor tool.
15
17
 
16
18
  This is a simple tool developed for use within SysMO-DB[http://www.sysmo-db.org].
@@ -29,7 +31,7 @@ Note that on Windows you must also do:
29
31
 
30
32
  * require 'simple-spreadsheet-extractor'
31
33
  * include the module SysMODB::SpreadsheetExtractor
32
- * pass an IO object to the method spreedsheet_to_xml which responds with the XML for the contents of the sheet.
34
+ * pass an IO object to the method spreedsheet_to_xml which responds with the XML for the contents of the spreadsheet. Alternatively use spreadsheet_to_csv for CSV.
33
35
  * if something goes wrong with the extraction then a SysMODB::SpreadsheetExtractionException will be thrown
34
36
 
35
37
  e.g.
@@ -42,7 +44,7 @@ e.g.
42
44
 
43
45
  path=ARGV.first
44
46
 
45
- f=open(path)
47
+ f=open path
46
48
  begin
47
49
  puts spreadsheet_to_xml(f)
48
50
  rescue SysMODB::SpreadsheetExtractionException=>e
@@ -53,10 +55,20 @@ Formulas are evaluated placing the result in the XML produced for that cell, how
53
55
 
54
56
  Row and column indexes start at 1, rather than 0, to keep consistent with namings of the cells in Excel.
55
57
 
56
- An XSD schema for the XML is available in doc/schema-v1.xsd["http://github.com/fbacall/simple-spreadsheet-extractor-gem/blob/master/doc/schema-v1.xsd"]
58
+ An XSD schema for the XML is available in doc/schema-v1.xsd["tree/master/doc/schema-v1.xsd"]
57
59
 
58
60
  The desired spreadsheet extractor jar can be specified by defining SPREADSHEET_EXTRACTOR_JAR_PATH in a config file (e.g. environment.rb)
59
61
 
62
+ CSV can be generated in a similar way, and also takes an optional sheet number. If the sheet number is missing then the first sheet is used.
63
+
64
+ Note that the sheet number for the first sheet is 1, and can either be a string or integer.
65
+
66
+ e.g.
67
+
68
+ puts spreadsheet_to_csv(f,"1")
69
+
70
+
71
+
60
72
  == Example XML
61
73
 
62
74
  <?xml version="1.0" encoding="UTF-8"?>
@@ -105,4 +117,4 @@ The desired spreadsheet extractor jar can be specified by defining SPREADSHEET_E
105
117
  </sheet>
106
118
  <sheet name="Sheet2" index="2" hidden="false" very_hidden="false" first_row="1" last_row="1"/>
107
119
  <sheet name="Sheet3" index="3" hidden="false" very_hidden="false" first_row="1" last_row="1"/>
108
- </workbook>
120
+ </workbook>
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.2
1
+ 0.5.0
@@ -10,32 +10,46 @@ module SysMODB
10
10
 
11
11
  module SpreadsheetExtractor
12
12
 
13
- DEFAULT_PATH = File.dirname(__FILE__) + "/../jars/simple-spreadsheet-extractor-0.4.1.jar"
13
+ DEFAULT_PATH = File.dirname(__FILE__) + "/../jars/simple-spreadsheet-extractor-0.5.0.jar"
14
14
 
15
- def spreadsheet_to_xml spreadsheet_data
16
-
17
- if RUBY_PLATFORM =~ /mswin32/
18
- output = read_with_popen4 spreadsheet_data
15
+ def spreadsheet_to_xml spreadsheet_data
16
+ if is_windows?
17
+ read_with_popen4 spreadsheet_data,"xml"
19
18
  else
20
- output = read_with_open4 spreadsheet_data
19
+ read_with_open4 spreadsheet_data,"xml"
20
+ end
21
+ end
22
+
23
+ def spreadsheet_to_csv spreadsheet_data,sheet=1,trim=false
24
+ if is_windows?
25
+ read_with_popen4 spreadsheet_data,"csv",sheet,trim
26
+ else
27
+ read_with_open4 spreadsheet_data,"csv",sheet,trim
21
28
  end
22
-
23
- return output
24
29
  end
25
30
 
26
31
 
27
32
 
28
- def spreadsheet_extractor_command
29
- "java -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
33
+ def spreadsheet_extractor_command format="xml",sheet=nil,trim=false
34
+ command = "java -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
35
+ command += " -o #{format}"
36
+ command += " -s #{sheet}" if sheet
37
+ command += " -t" if trim
38
+ command
30
39
  end
31
40
 
32
41
  private
33
-
34
- #opens using POpen4 - this is for the benefit of Windows. It has been found to be unstable in Linux and give occaisional segmentation faults
35
- def read_with_popen4 spreadsheet_data
42
+
43
+ def is_windows?
44
+ !(RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/).nil?
45
+ end
46
+
47
+ #opens using POpen4 - this is for the benefit of Windows. It has been found to be unstable in Linux and give occasional segmentation faults
48
+ def read_with_popen4 spreadsheet_data,format="xml",sheet=nil,trim=false
36
49
  output=""
37
50
  err_message = ""
38
- status = POpen4::popen4(spreadsheet_extractor_command) do |stdout, stderr, stdin, pid|
51
+ command = spreadsheet_extractor_command format,sheet,trim
52
+ status = POpen4::popen4(command) do |stdout, stderr, stdin, pid|
39
53
  stdin=stdin.binmode
40
54
  spreadsheet_data.each_byte{|b| stdin.putc(b)}
41
55
  stdin.close
@@ -49,13 +63,14 @@ module SysMODB
49
63
  raise SpreadsheetExtractionException.new(err_message)
50
64
  end
51
65
 
52
- return output
66
+ output.strip
53
67
  end
54
68
 
55
- def read_with_open4 spreadsheet_data
69
+ def read_with_open4 spreadsheet_data,format="xml",sheet=nil,trim=false
56
70
  output = ""
57
71
  err_message = ""
58
- status = Open4::popen4(spreadsheet_extractor_command) do |pid, stdin, stdout, stderr|
72
+ command = spreadsheet_extractor_command format,sheet,trim
73
+ status = Open4::popen4(command) do |pid, stdin, stdout, stderr|
59
74
  while ((line = spreadsheet_data.gets) != nil) do
60
75
  stdin << line
61
76
  end
@@ -76,7 +91,7 @@ module SysMODB
76
91
  raise SpreadsheetExtractionException.new(err_message)
77
92
  end
78
93
 
79
- return output
94
+ output.strip
80
95
  end
81
96
 
82
97
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{simple-spreadsheet-extractor}
8
- s.version = "0.4.2"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Stuart Owen", "Finn Bacall"]
12
- s.date = %q{2011-01-18}
12
+ s.date = %q{2011-03-03}
13
13
  s.description = %q{Takes a stream to a spreadsheet file and produces and XML representation of its contents}
14
14
  s.email = %q{stuart.owen@manchester.ac.uk}
15
15
  s.extra_rdoc_files = [
@@ -24,27 +24,26 @@ Gem::Specification.new do |s|
24
24
  "doc/schema-v1.xsd",
25
25
  "example.rb",
26
26
  "jars/lib/dom4j-1.6.1.jar",
27
- "jars/lib/poi-3.6.jar",
28
- "jars/lib/poi-ooxml-3.6.jar",
29
- "jars/lib/poi-ooxml-schemas-3.6.jar",
27
+ "jars/lib/poi-3.7.jar",
28
+ "jars/lib/poi-ooxml-3.7.jar",
29
+ "jars/lib/poi-ooxml-schemas-3.7.jar",
30
30
  "jars/lib/xmlbeans-2.3.0.jar",
31
- "jars/simple-spreadsheet-extractor-0.4.1.jar",
31
+ "jars/simple-spreadsheet-extractor-0.5.0.jar",
32
32
  "lib/simple-spreadsheet-extractor.rb",
33
33
  "simple-spreadsheet-extractor.gemspec"
34
34
  ]
35
35
  s.homepage = %q{http://github.com/myGrid/simple-spreadsheet-extractor-gem}
36
36
  s.require_paths = ["lib"]
37
- s.rubygems_version = %q{1.3.6}
37
+ s.rubygems_version = %q{1.4.2}
38
38
  s.summary = %q{Basic spreadsheet content extraction using Apache POI}
39
39
  s.test_files = [
40
40
  "test/test_extraction.rb"
41
41
  ]
42
42
 
43
43
  if s.respond_to? :specification_version then
44
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
45
44
  s.specification_version = 3
46
45
 
47
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
46
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
47
  s.add_runtime_dependency(%q<POpen4>, ["= 0.1.4"])
49
48
  s.add_runtime_dependency(%q<open4>, ["= 1.0.1"])
50
49
  else
@@ -40,6 +40,31 @@ class TestExtraction < Test::Unit::TestCase
40
40
  assert false,"Error validating against schema: #{e.message}"
41
41
  end
42
42
  end
43
+
44
+ def test_csv_output
45
+ test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
46
+ expected_file = File.dirname(__FILE__) + "/files/test-csv-output1.csv"
47
+ expected = open(expected_file,"rb").read
48
+
49
+ f=open(test_sheet,"rb")
50
+ csv = spreadsheet_to_csv(f,2)
51
+ assert_equal expected,csv
52
+
53
+ #try sheet as a string
54
+ f=open(test_sheet,"rb")
55
+ csv = spreadsheet_to_csv(f,"2")
56
+ assert_equal expected,csv
57
+ end
58
+
59
+ # def test_csv_output_trimmed
60
+ # test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
61
+ # expected_file = File.dirname(__FILE__) + "/files/test-csv-output1-trimmed.csv"
62
+ # expected = open(expected_file,"rb").read
63
+ #
64
+ # f=open(test_sheet,"rb")
65
+ # csv = spreadsheet_to_csv(f,2,true)
66
+ # assert_equal expected,csv
67
+ # end
43
68
 
44
69
  def test_for_segfault
45
70
  test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-spreadsheet-extractor
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ hash: 11
5
+ prerelease:
5
6
  segments:
6
7
  - 0
7
- - 4
8
- - 2
9
- version: 0.4.2
8
+ - 5
9
+ - 0
10
+ version: 0.5.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - Stuart Owen
@@ -15,16 +16,18 @@ autorequire:
15
16
  bindir: bin
16
17
  cert_chain: []
17
18
 
18
- date: 2011-01-18 00:00:00 +00:00
19
+ date: 2011-03-03 00:00:00 +00:00
19
20
  default_executable:
20
21
  dependencies:
21
22
  - !ruby/object:Gem::Dependency
22
23
  name: POpen4
23
24
  prerelease: false
24
25
  requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
25
27
  requirements:
26
28
  - - "="
27
29
  - !ruby/object:Gem::Version
30
+ hash: 19
28
31
  segments:
29
32
  - 0
30
33
  - 1
@@ -36,9 +39,11 @@ dependencies:
36
39
  name: open4
37
40
  prerelease: false
38
41
  requirement: &id002 !ruby/object:Gem::Requirement
42
+ none: false
39
43
  requirements:
40
44
  - - "="
41
45
  - !ruby/object:Gem::Version
46
+ hash: 21
42
47
  segments:
43
48
  - 1
44
49
  - 0
@@ -63,13 +68,14 @@ files:
63
68
  - doc/schema-v1.xsd
64
69
  - example.rb
65
70
  - jars/lib/dom4j-1.6.1.jar
66
- - jars/lib/poi-3.6.jar
67
- - jars/lib/poi-ooxml-3.6.jar
68
- - jars/lib/poi-ooxml-schemas-3.6.jar
71
+ - jars/lib/poi-3.7.jar
72
+ - jars/lib/poi-ooxml-3.7.jar
73
+ - jars/lib/poi-ooxml-schemas-3.7.jar
69
74
  - jars/lib/xmlbeans-2.3.0.jar
70
- - jars/simple-spreadsheet-extractor-0.4.1.jar
75
+ - jars/simple-spreadsheet-extractor-0.5.0.jar
71
76
  - lib/simple-spreadsheet-extractor.rb
72
77
  - simple-spreadsheet-extractor.gemspec
78
+ - test/test_extraction.rb
73
79
  has_rdoc: true
74
80
  homepage: http://github.com/myGrid/simple-spreadsheet-extractor-gem
75
81
  licenses: []
@@ -80,23 +86,27 @@ rdoc_options: []
80
86
  require_paths:
81
87
  - lib
82
88
  required_ruby_version: !ruby/object:Gem::Requirement
89
+ none: false
83
90
  requirements:
84
91
  - - ">="
85
92
  - !ruby/object:Gem::Version
93
+ hash: 3
86
94
  segments:
87
95
  - 0
88
96
  version: "0"
89
97
  required_rubygems_version: !ruby/object:Gem::Requirement
98
+ none: false
90
99
  requirements:
91
100
  - - ">="
92
101
  - !ruby/object:Gem::Version
102
+ hash: 3
93
103
  segments:
94
104
  - 0
95
105
  version: "0"
96
106
  requirements: []
97
107
 
98
108
  rubyforge_project:
99
- rubygems_version: 1.3.6
109
+ rubygems_version: 1.4.2
100
110
  signing_key:
101
111
  specification_version: 3
102
112
  summary: Basic spreadsheet content extraction using Apache POI
Binary file