simple-spreadsheet-extractor 0.13.4 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/.travis.yml +11 -0
- data/Gemfile +3 -1
- data/Gemfile.lock +71 -1
- data/README.rdoc +7 -2
- data/VERSION +1 -1
- data/jars/lib/log4j-1.2.17.jar +0 -0
- data/jars/lib/poi-3.11.jar +0 -0
- data/jars/lib/poi-ooxml-3.11.jar +0 -0
- data/jars/lib/{poi-ooxml-schemas-3.9.jar → poi-ooxml-schemas-3.11.jar} +0 -0
- data/jars/lib/{xmlbeans-2.3.0.jar → xmlbeans-2.6.0.jar} +0 -0
- data/jars/simple-spreadsheet-extractor-0.15.0.jar +0 -0
- data/lib/simple-spreadsheet-extractor.rb +1 -97
- data/lib/sysmodb/extractor.rb +71 -0
- data/lib/sysmodb/simple-spreadsheet-extractor.rb +38 -0
- data/simple-spreadsheet-extractor.gemspec +22 -11
- data/test/files/test-spreadsheet.xlsx +0 -0
- data/test/test_extraction.rb +7 -7
- metadata +52 -19
- data/jars/lib/poi-ooxml-3.9.jar +0 -0
- data/jars/lib/rightfield-poi-3.9.jar +0 -0
- data/jars/simple-spreadsheet-extractor-0.13.1.jar +0 -0
checksums.yaml
CHANGED
|
@@ -1,15 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
|
|
5
|
-
data.tar.gz: !binary |-
|
|
6
|
-
YjVmZTBjYzBmMzMzY2VhYzBiZjNhMTJkNzMwYzViNjYzMzAyM2M2OA==
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 39a9475bfe11ce50a2edc97d54c3f2e93a5e7421
|
|
4
|
+
data.tar.gz: cd181e56d1f58a4b318129deca3bca6544b62910
|
|
7
5
|
SHA512:
|
|
8
|
-
metadata.gz:
|
|
9
|
-
|
|
10
|
-
NDdkZmQwNjYzNGM3NGFhNmE2NDQ4MzY1ZmJhNTU1ZGQ4MTc4ZGIzOTFkZDBl
|
|
11
|
-
NDRjNmIwMzY0MDA4OWYxZDFiYjc1NDlhY2I5NzdlZTI5ODNiNTM=
|
|
12
|
-
data.tar.gz: !binary |-
|
|
13
|
-
MDAzZDI0OWVhZWU4ZmM5N2ZjYzQxMTVmYjUzYTM5ZmU2NWUxMzI0NmY2MWVm
|
|
14
|
-
NDZkMmRjMjgxOGUxNTdhNTk2ZjAwOGJlZmRiMDljYTM3MmNlM2JhZjY5ZDgy
|
|
15
|
-
ZTI1ZDhjMDVkNGJkNmNhN2Y3NjU3OGQ4OGRiODFmNDdiODMxM2Y=
|
|
6
|
+
metadata.gz: e9dd4fd69b122c774d731219e44273dd73971cb325313f9237bb048cf6b95b57637ac510a4a6e48c17824e72bfb1db252625a058e6dfbedba1c0685ec17d15a7
|
|
7
|
+
data.tar.gz: 0f89e8351ef0e20732a51cca35061e76cd9b8b5692a4936ec2ddfcb2113bbe4a4516e5e7c7d4945e90967a6a58e916778e6559a2892754d3a1a9b17f7e9da652
|
data/.travis.yml
ADDED
data/Gemfile
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
source 'https://rubygems.org'
|
|
2
2
|
source 'http://gems.github.com'
|
|
3
3
|
|
|
4
|
-
gem "libxml-ruby","~>2.6
|
|
4
|
+
gem "libxml-ruby","~>2.6"
|
|
5
5
|
gem "open4","1.3.0"
|
|
6
6
|
|
|
7
7
|
gem "rdoc",:group=>:development
|
|
8
8
|
gem "jeweler",:group=>:development
|
|
9
|
+
gem "rubocop", :group=>:development,:require=>false
|
|
10
|
+
gem "rubycritic", :group=>:development,:require=>false
|
data/Gemfile.lock
CHANGED
|
@@ -2,10 +2,36 @@ GEM
|
|
|
2
2
|
remote: https://rubygems.org/
|
|
3
3
|
remote: http://gems.github.com/
|
|
4
4
|
specs:
|
|
5
|
+
abstract_type (0.0.7)
|
|
6
|
+
adamantium (0.2.0)
|
|
7
|
+
ice_nine (~> 0.11.0)
|
|
8
|
+
memoizable (~> 0.4.0)
|
|
5
9
|
addressable (2.3.5)
|
|
10
|
+
ast (2.0.0)
|
|
11
|
+
astrolabe (1.3.0)
|
|
12
|
+
parser (>= 2.2.0.pre.3, < 3.0)
|
|
13
|
+
axiom-types (0.1.1)
|
|
14
|
+
descendants_tracker (~> 0.0.4)
|
|
15
|
+
ice_nine (~> 0.11.0)
|
|
16
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
|
6
17
|
builder (3.2.2)
|
|
18
|
+
coercible (1.0.0)
|
|
19
|
+
descendants_tracker (~> 0.0.1)
|
|
20
|
+
concord (0.1.5)
|
|
21
|
+
adamantium (~> 0.2.0)
|
|
22
|
+
equalizer (~> 0.0.9)
|
|
23
|
+
descendants_tracker (0.0.4)
|
|
24
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
|
25
|
+
diff-lcs (1.2.5)
|
|
26
|
+
equalizer (0.0.9)
|
|
7
27
|
faraday (0.8.8)
|
|
8
28
|
multipart-post (~> 1.2.0)
|
|
29
|
+
flay (2.4.0)
|
|
30
|
+
ruby_parser (~> 3.0)
|
|
31
|
+
sexp_processor (~> 4.0)
|
|
32
|
+
flog (4.2.1)
|
|
33
|
+
ruby_parser (~> 3.1, > 3.1.0)
|
|
34
|
+
sexp_processor (~> 4.4)
|
|
9
35
|
git (1.2.6)
|
|
10
36
|
github_api (0.10.1)
|
|
11
37
|
addressable
|
|
@@ -17,6 +43,7 @@ GEM
|
|
|
17
43
|
hashie (2.0.5)
|
|
18
44
|
highline (1.6.20)
|
|
19
45
|
httpauth (0.2.0)
|
|
46
|
+
ice_nine (0.11.1)
|
|
20
47
|
jeweler (1.8.8)
|
|
21
48
|
builder
|
|
22
49
|
bundler (~> 1.0)
|
|
@@ -30,6 +57,8 @@ GEM
|
|
|
30
57
|
jwt (0.1.8)
|
|
31
58
|
multi_json (>= 1.5)
|
|
32
59
|
libxml-ruby (2.6.0)
|
|
60
|
+
memoizable (0.4.2)
|
|
61
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
|
33
62
|
multi_json (1.8.2)
|
|
34
63
|
multi_xml (0.5.5)
|
|
35
64
|
multipart-post (1.2.0)
|
|
@@ -42,16 +71,57 @@ GEM
|
|
|
42
71
|
multi_xml (~> 0.5)
|
|
43
72
|
rack (~> 1.2)
|
|
44
73
|
open4 (1.3.0)
|
|
74
|
+
parser (2.2.0.3)
|
|
75
|
+
ast (>= 1.1, < 3.0)
|
|
76
|
+
powerpack (0.1.0)
|
|
77
|
+
procto (0.0.2)
|
|
45
78
|
rack (1.5.2)
|
|
79
|
+
rainbow (2.0.0)
|
|
46
80
|
rake (10.1.0)
|
|
47
81
|
rdoc (4.0.1)
|
|
48
82
|
json (~> 1.4)
|
|
83
|
+
reek (1.6.5)
|
|
84
|
+
parser (~> 2.2.0.pre.7)
|
|
85
|
+
rainbow (>= 1.99, < 3.0)
|
|
86
|
+
unparser (~> 0.2.2)
|
|
87
|
+
rubocop (0.29.1)
|
|
88
|
+
astrolabe (~> 1.3)
|
|
89
|
+
parser (>= 2.2.0.1, < 3.0)
|
|
90
|
+
powerpack (~> 0.1)
|
|
91
|
+
rainbow (>= 1.99.1, < 3.0)
|
|
92
|
+
ruby-progressbar (~> 1.4)
|
|
93
|
+
ruby-progressbar (1.7.1)
|
|
94
|
+
ruby_parser (3.6.4)
|
|
95
|
+
sexp_processor (~> 4.1)
|
|
96
|
+
rubycritic (1.3.0)
|
|
97
|
+
flay (= 2.4.0)
|
|
98
|
+
flog (= 4.2.1)
|
|
99
|
+
parser (>= 2.2.0, < 3.0)
|
|
100
|
+
reek (= 1.6.5)
|
|
101
|
+
virtus (~> 1.0)
|
|
102
|
+
sexp_processor (4.4.5)
|
|
103
|
+
thread_safe (0.3.4)
|
|
104
|
+
unparser (0.2.2)
|
|
105
|
+
abstract_type (~> 0.0.7)
|
|
106
|
+
adamantium (~> 0.2.0)
|
|
107
|
+
concord (~> 0.1.5)
|
|
108
|
+
diff-lcs (~> 1.2.5)
|
|
109
|
+
equalizer (~> 0.0.9)
|
|
110
|
+
parser (~> 2.2.0.2)
|
|
111
|
+
procto (~> 0.0.2)
|
|
112
|
+
virtus (1.0.4)
|
|
113
|
+
axiom-types (~> 0.1)
|
|
114
|
+
coercible (~> 1.0)
|
|
115
|
+
descendants_tracker (~> 0.0, >= 0.0.3)
|
|
116
|
+
equalizer (~> 0.0, >= 0.0.9)
|
|
49
117
|
|
|
50
118
|
PLATFORMS
|
|
51
119
|
ruby
|
|
52
120
|
|
|
53
121
|
DEPENDENCIES
|
|
54
122
|
jeweler
|
|
55
|
-
libxml-ruby (~> 2.6
|
|
123
|
+
libxml-ruby (~> 2.6)
|
|
56
124
|
open4 (= 1.3.0)
|
|
57
125
|
rdoc
|
|
126
|
+
rubocop
|
|
127
|
+
rubycritic
|
data/README.rdoc
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
= Simple Spreadsheet Extractor
|
|
2
2
|
|
|
3
3
|
Authors:: Stuart Owen, Finn Bacall
|
|
4
|
-
Version:: 0.
|
|
4
|
+
Version:: 0.15.0
|
|
5
5
|
Contact:: mailto:stuart.owen@manchester.ac.uk
|
|
6
6
|
Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
|
|
7
|
-
Copyright:: (c) 2010-
|
|
7
|
+
Copyright:: (c) 2010-2015 The University of Manchester, UK
|
|
8
|
+
|
|
9
|
+
{<img src="https://codeclimate.com/github/myGrid/simple-spreadsheet-extractor-gem/badges/gpa.svg" />}[https://codeclimate.com/github/myGrid/simple-spreadsheet-extractor-gem]
|
|
10
|
+
|
|
11
|
+
{<img src="https://travis-ci.org/myGrid/simple-spreadsheet-extractor-gem.svg?branch=master" alt="Build Status" />}[https://travis-ci.org/myGrid/simple-spreadsheet-extractor-gem]
|
|
8
12
|
|
|
9
13
|
|
|
10
14
|
== Synopsis
|
|
@@ -31,6 +35,7 @@ Java 1.6 (JRE) is required.
|
|
|
31
35
|
* include the module SysMODB::SpreadsheetExtractor
|
|
32
36
|
* pass an IO object to the method spreedsheet_to_xml which responds with the XML for the contents of the spreadsheet. Alternatively use spreadsheet_to_csv for CSV.
|
|
33
37
|
* if something goes wrong with the extraction then a SysMODB::SpreadsheetExtractionException will be thrown
|
|
38
|
+
* by default the JVM is allocated 512M of memory, you can override this by passing a string as the last argument. This will be passed to -Xmx in the java command.
|
|
34
39
|
|
|
35
40
|
e.g.
|
|
36
41
|
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.
|
|
1
|
+
0.15.0
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,97 +1 @@
|
|
|
1
|
-
require '
|
|
2
|
-
require 'open4'
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
module SysMODB
|
|
6
|
-
|
|
7
|
-
class SpreadsheetExtractionException < Exception
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
module SpreadsheetExtractor
|
|
11
|
-
JAR_VERSION="0.13.1"
|
|
12
|
-
DEFAULT_PATH = File.dirname(__FILE__) + "/../jars/simple-spreadsheet-extractor-#{JAR_VERSION}.jar"
|
|
13
|
-
|
|
14
|
-
def spreadsheet_to_xml spreadsheet_data
|
|
15
|
-
if is_windows?
|
|
16
|
-
raise Exception.new("Windows is not currently supported")
|
|
17
|
-
else
|
|
18
|
-
read_with_open4 spreadsheet_data,"xml"
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def spreadsheet_to_csv spreadsheet_data,sheet=1,trim=false
|
|
23
|
-
if is_windows?
|
|
24
|
-
raise Exception.new("Windows is not currently supported")
|
|
25
|
-
else
|
|
26
|
-
read_with_open4 spreadsheet_data,"csv",sheet,trim
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def spreadsheet_extractor_command format="xml",sheet=nil,trim=false
|
|
33
|
-
command = "java -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
|
|
34
|
-
command += " -o #{format}"
|
|
35
|
-
command += " -s #{sheet}" if sheet
|
|
36
|
-
command += " -t" if trim
|
|
37
|
-
command
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
private
|
|
41
|
-
|
|
42
|
-
def is_windows?
|
|
43
|
-
!(RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/).nil?
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
#opens using POpen4 - this is for the benefit of Windows. It has been found to be unstable in Linux and give occasional segmentation faults
|
|
47
|
-
#def read_with_popen4 spreadsheet_data,format="xml",sheet=nil,trim=false
|
|
48
|
-
# output=""
|
|
49
|
-
# err_message = ""
|
|
50
|
-
# command = spreadsheet_extractor_command format,sheet,trim
|
|
51
|
-
# status = POpen4::popen4(command) do |stdout, stderr, stdin, pid|
|
|
52
|
-
# stdin=stdin.binmode
|
|
53
|
-
# spreadsheet_data.each_byte{|b| stdin.putc(b)}
|
|
54
|
-
# stdin.close
|
|
55
|
-
#
|
|
56
|
-
# output=stdout.read.strip
|
|
57
|
-
# err_message=stderr.read.strip
|
|
58
|
-
#
|
|
59
|
-
# end
|
|
60
|
-
#
|
|
61
|
-
# if status.to_i != 0
|
|
62
|
-
# raise SpreadsheetExtractionException.new(err_message)
|
|
63
|
-
# end
|
|
64
|
-
#
|
|
65
|
-
# output.strip
|
|
66
|
-
#end
|
|
67
|
-
|
|
68
|
-
def read_with_open4 spreadsheet_data,format="xml",sheet=nil,trim=false
|
|
69
|
-
output = ""
|
|
70
|
-
err_message = ""
|
|
71
|
-
command = spreadsheet_extractor_command format,sheet,trim
|
|
72
|
-
status = Open4::popen4(command) do |pid, stdin, stdout, stderr|
|
|
73
|
-
while ((line = spreadsheet_data.gets) != nil) do
|
|
74
|
-
stdin << line
|
|
75
|
-
end
|
|
76
|
-
stdin.close
|
|
77
|
-
|
|
78
|
-
while ((line = stdout.gets) != nil) do
|
|
79
|
-
output << line
|
|
80
|
-
end
|
|
81
|
-
stdout.close
|
|
82
|
-
|
|
83
|
-
while ((line=stderr.gets)!= nil) do
|
|
84
|
-
err_message << line
|
|
85
|
-
end
|
|
86
|
-
stderr.close
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
if status.to_i != 0
|
|
90
|
-
raise SpreadsheetExtractionException.new(err_message)
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
output.strip
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
end
|
|
97
|
-
end
|
|
1
|
+
require 'sysmodb/simple-spreadsheet-extractor'
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
require 'open4'
|
|
2
|
+
|
|
3
|
+
module SysMODB
|
|
4
|
+
#Exception that is thrown when a problem occurs during the extraction
|
|
5
|
+
class SpreadsheetExtractionException < Exception; end
|
|
6
|
+
|
|
7
|
+
#handles the delegation to java, and executes the extraction passing the
|
|
8
|
+
#input file through STDIN, and reading the results through STDOUT.
|
|
9
|
+
class Extractor
|
|
10
|
+
JAR_VERSION="0.15.0"
|
|
11
|
+
DEFAULT_PATH = File.dirname(__FILE__) + "/../../jars/simple-spreadsheet-extractor-#{JAR_VERSION}.jar"
|
|
12
|
+
BUFFER_SIZE=250000 # 1/4 a megabyte
|
|
13
|
+
|
|
14
|
+
def initialize(memory_allocation)
|
|
15
|
+
@memory_allocation = memory_allocation
|
|
16
|
+
if is_windows?
|
|
17
|
+
raise Exception.new("Windows is not currently supported")
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def spreadsheet_to_xml(spreadsheet_data)
|
|
22
|
+
read_with_open4 spreadsheet_data,"xml"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def spreadsheet_to_csv(spreadsheet_data,sheet=1,trim=false)
|
|
26
|
+
read_with_open4 spreadsheet_data,"csv",sheet,trim
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def spreadsheet_extractor_command(format="xml",sheet=nil,trim=false)
|
|
32
|
+
command = "java -Xmx#{@memory_allocation} -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
|
|
33
|
+
command += " -o #{format}"
|
|
34
|
+
command += " -s #{sheet}" if sheet
|
|
35
|
+
command += " -t" if trim
|
|
36
|
+
command
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def is_windows?
|
|
40
|
+
!(RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/).nil?
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def read_with_open4(spreadsheet_data,format="xml",sheet=nil,trim=false)
|
|
44
|
+
output = ""
|
|
45
|
+
err_message = ""
|
|
46
|
+
command = spreadsheet_extractor_command format,sheet,trim
|
|
47
|
+
status = Open4.popen4(command) do |_pid, stdin, stdout, stderr|
|
|
48
|
+
while ((line = spreadsheet_data.gets(BUFFER_SIZE)) != nil) do
|
|
49
|
+
stdin << line
|
|
50
|
+
end
|
|
51
|
+
stdin.close
|
|
52
|
+
|
|
53
|
+
while ((line = stdout.gets(BUFFER_SIZE)) != nil) do
|
|
54
|
+
output << line
|
|
55
|
+
end
|
|
56
|
+
stdout.close
|
|
57
|
+
|
|
58
|
+
until ((line=stderr.gets((BUFFER_SIZE))).nil?) do
|
|
59
|
+
err_message << line
|
|
60
|
+
end
|
|
61
|
+
stderr.close
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
if status.to_i != 0
|
|
65
|
+
raise SpreadsheetExtractionException.new(err_message)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
output.strip
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
|
|
2
|
+
require 'sysmodb/extractor'
|
|
3
|
+
|
|
4
|
+
module SysMODB
|
|
5
|
+
DEFAULT_MEMORY_ALLOCATION = '512M'
|
|
6
|
+
|
|
7
|
+
# The mixin to allow you to extract from a spreadsheet file using
|
|
8
|
+
# simple_spreadsheet_to_xml to extract to an xml format (see README and schema in doc/schema-v1.xsd)
|
|
9
|
+
# simple_spreadhseet_to_csv to extract to CSV format for a single sheet
|
|
10
|
+
module SpreadsheetExtractor
|
|
11
|
+
|
|
12
|
+
# :call-seq:
|
|
13
|
+
# spreadsheet_to_xml(spreadsheet_data) -> String
|
|
14
|
+
# spreadsheet_to_xml(spreadsheet_data, memory_allocation) -> String
|
|
15
|
+
#
|
|
16
|
+
# reads the incoming data from an IO object and returns the generated XML.
|
|
17
|
+
# it is extracted using java, and the default memory allocation is 512M (passed to -Xmx) this can
|
|
18
|
+
# be changed by passing an option final parameter memory_allocation
|
|
19
|
+
def spreadsheet_to_xml(spreadsheet_data, memory_allocation = DEFAULT_MEMORY_ALLOCATION)
|
|
20
|
+
SysMODB::Extractor.new(memory_allocation).spreadsheet_to_xml(spreadsheet_data)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# :call-seq:
|
|
24
|
+
# spreadsheet_to_csv(spreadsheet_data) -> String
|
|
25
|
+
# spreadsheet_to_csv(spreadsheet_data, sheet) -> String
|
|
26
|
+
# spreadsheet_to_csv(spreadsheet_data, sheet, trim) -> String
|
|
27
|
+
# spreadsheet_to_csv(spreadsheet_data, sheet, trim, memory_allocation) -> String
|
|
28
|
+
#
|
|
29
|
+
# reads the incoming data from an IO object and returns the generated CSV.
|
|
30
|
+
# only 1 sheet is processed, which by default it the first sheet.
|
|
31
|
+
# if trim is set to true, proceeding or trailing cells will be removed whilst keeping the csv uniform.
|
|
32
|
+
# it is extracted using java, and the default memory allocation is 512M (passed to -Xmx) this can
|
|
33
|
+
# be changed by passing an option final parameter memory_allocation
|
|
34
|
+
def spreadsheet_to_csv(spreadsheet_data, sheet = 1, trim = false, memory_allocation = DEFAULT_MEMORY_ALLOCATION)
|
|
35
|
+
SysMODB::Extractor.new(memory_allocation).spreadsheet_to_csv(spreadsheet_data, sheet, trim)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -2,16 +2,16 @@
|
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
|
5
|
-
# stub: simple-spreadsheet-extractor 0.
|
|
5
|
+
# stub: simple-spreadsheet-extractor 0.15.0 ruby lib
|
|
6
6
|
|
|
7
7
|
Gem::Specification.new do |s|
|
|
8
8
|
s.name = "simple-spreadsheet-extractor"
|
|
9
|
-
s.version = "0.
|
|
9
|
+
s.version = "0.15.0"
|
|
10
10
|
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
12
12
|
s.require_paths = ["lib"]
|
|
13
13
|
s.authors = ["Stuart Owen", "Finn Bacall", "Quyen Nguyen"]
|
|
14
|
-
s.date = "2015-
|
|
14
|
+
s.date = "2015-03-13"
|
|
15
15
|
s.description = "Takes a stream to a spreadsheet file and produces an XML or CSV representation of its contents"
|
|
16
16
|
s.email = "stuart.owen@manchester.ac.uk"
|
|
17
17
|
s.extra_rdoc_files = [
|
|
@@ -20,6 +20,7 @@ Gem::Specification.new do |s|
|
|
|
20
20
|
s.files = [
|
|
21
21
|
".ruby-gemspec",
|
|
22
22
|
".ruby-version",
|
|
23
|
+
".travis.yml",
|
|
23
24
|
"Gemfile",
|
|
24
25
|
"Gemfile.lock",
|
|
25
26
|
"LICENSE",
|
|
@@ -29,18 +30,22 @@ Gem::Specification.new do |s|
|
|
|
29
30
|
"doc/schema-v1.xsd",
|
|
30
31
|
"examples/example.rb",
|
|
31
32
|
"jars/lib/dom4j-1.6.1.jar",
|
|
32
|
-
"jars/lib/
|
|
33
|
-
"jars/lib/poi-
|
|
34
|
-
"jars/lib/
|
|
35
|
-
"jars/lib/
|
|
36
|
-
"jars/
|
|
33
|
+
"jars/lib/log4j-1.2.17.jar",
|
|
34
|
+
"jars/lib/poi-3.11.jar",
|
|
35
|
+
"jars/lib/poi-ooxml-3.11.jar",
|
|
36
|
+
"jars/lib/poi-ooxml-schemas-3.11.jar",
|
|
37
|
+
"jars/lib/xmlbeans-2.6.0.jar",
|
|
38
|
+
"jars/simple-spreadsheet-extractor-0.15.0.jar",
|
|
37
39
|
"lib/simple-spreadsheet-extractor.rb",
|
|
40
|
+
"lib/sysmodb/extractor.rb",
|
|
41
|
+
"lib/sysmodb/simple-spreadsheet-extractor.rb",
|
|
38
42
|
"simple-spreadsheet-extractor.gemspec",
|
|
39
43
|
"test/files/not-a-spreadsheet.xls",
|
|
40
44
|
"test/files/problem_with_dv.xls",
|
|
41
45
|
"test/files/test-csv-output1-trimmed.csv",
|
|
42
46
|
"test/files/test-csv-output1.csv",
|
|
43
47
|
"test/files/test-spreadsheet.xls",
|
|
48
|
+
"test/files/test-spreadsheet.xlsx",
|
|
44
49
|
"test/test_extraction.rb"
|
|
45
50
|
]
|
|
46
51
|
s.homepage = "http://github.com/myGrid/simple-spreadsheet-extractor-gem"
|
|
@@ -51,21 +56,27 @@ Gem::Specification.new do |s|
|
|
|
51
56
|
s.specification_version = 4
|
|
52
57
|
|
|
53
58
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
54
|
-
s.add_runtime_dependency(%q<libxml-ruby>, ["~> 2.6
|
|
59
|
+
s.add_runtime_dependency(%q<libxml-ruby>, ["~> 2.6"])
|
|
55
60
|
s.add_runtime_dependency(%q<open4>, ["= 1.3.0"])
|
|
56
61
|
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
|
57
62
|
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
|
63
|
+
s.add_development_dependency(%q<rubocop>, [">= 0"])
|
|
64
|
+
s.add_development_dependency(%q<rubycritic>, [">= 0"])
|
|
58
65
|
else
|
|
59
|
-
s.add_dependency(%q<libxml-ruby>, ["~> 2.6
|
|
66
|
+
s.add_dependency(%q<libxml-ruby>, ["~> 2.6"])
|
|
60
67
|
s.add_dependency(%q<open4>, ["= 1.3.0"])
|
|
61
68
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
|
62
69
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
|
70
|
+
s.add_dependency(%q<rubocop>, [">= 0"])
|
|
71
|
+
s.add_dependency(%q<rubycritic>, [">= 0"])
|
|
63
72
|
end
|
|
64
73
|
else
|
|
65
|
-
s.add_dependency(%q<libxml-ruby>, ["~> 2.6
|
|
74
|
+
s.add_dependency(%q<libxml-ruby>, ["~> 2.6"])
|
|
66
75
|
s.add_dependency(%q<open4>, ["= 1.3.0"])
|
|
67
76
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
|
68
77
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
|
78
|
+
s.add_dependency(%q<rubocop>, [">= 0"])
|
|
79
|
+
s.add_dependency(%q<rubycritic>, [">= 0"])
|
|
69
80
|
end
|
|
70
81
|
end
|
|
71
82
|
|
|
Binary file
|
data/test/test_extraction.rb
CHANGED
|
@@ -7,13 +7,6 @@ class TestExtraction < Test::Unit::TestCase
|
|
|
7
7
|
SCHEMA_FILE_PATH = File.dirname(__FILE__) + "/../doc/schema-v1.xsd"
|
|
8
8
|
|
|
9
9
|
include SysMODB::SpreadsheetExtractor
|
|
10
|
-
|
|
11
|
-
def test_from_file_object
|
|
12
|
-
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
|
13
|
-
f=open(test_sheet,"rb")
|
|
14
|
-
xml = spreadsheet_to_xml(f)
|
|
15
|
-
assert_not_nil xml
|
|
16
|
-
end
|
|
17
10
|
|
|
18
11
|
def test_from_non_file_io_object
|
|
19
12
|
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
|
@@ -29,6 +22,13 @@ class TestExtraction < Test::Unit::TestCase
|
|
|
29
22
|
validate_against_schema(xml)
|
|
30
23
|
end
|
|
31
24
|
|
|
25
|
+
def test_validate_xml_xlsx
|
|
26
|
+
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xlsx"
|
|
27
|
+
f=open(test_sheet,"rb")
|
|
28
|
+
xml = spreadsheet_to_xml(f)
|
|
29
|
+
validate_against_schema(xml)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
32
|
def test_failure
|
|
33
33
|
test_sheet = File.dirname(__FILE__) + "/files/not-a-spreadsheet.xls"
|
|
34
34
|
f=open(test_sheet,"rb")
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: simple-spreadsheet-extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.15.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Stuart Owen
|
|
@@ -10,22 +10,22 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date: 2015-
|
|
13
|
+
date: 2015-03-13 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: libxml-ruby
|
|
17
17
|
requirement: !ruby/object:Gem::Requirement
|
|
18
18
|
requirements:
|
|
19
|
-
- - ~>
|
|
19
|
+
- - "~>"
|
|
20
20
|
- !ruby/object:Gem::Version
|
|
21
|
-
version: 2.6
|
|
21
|
+
version: '2.6'
|
|
22
22
|
type: :runtime
|
|
23
23
|
prerelease: false
|
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
|
25
25
|
requirements:
|
|
26
|
-
- - ~>
|
|
26
|
+
- - "~>"
|
|
27
27
|
- !ruby/object:Gem::Version
|
|
28
|
-
version: 2.6
|
|
28
|
+
version: '2.6'
|
|
29
29
|
- !ruby/object:Gem::Dependency
|
|
30
30
|
name: open4
|
|
31
31
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -44,28 +44,56 @@ dependencies:
|
|
|
44
44
|
name: rdoc
|
|
45
45
|
requirement: !ruby/object:Gem::Requirement
|
|
46
46
|
requirements:
|
|
47
|
-
- -
|
|
47
|
+
- - ">="
|
|
48
48
|
- !ruby/object:Gem::Version
|
|
49
49
|
version: '0'
|
|
50
50
|
type: :development
|
|
51
51
|
prerelease: false
|
|
52
52
|
version_requirements: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements:
|
|
54
|
-
- -
|
|
54
|
+
- - ">="
|
|
55
55
|
- !ruby/object:Gem::Version
|
|
56
56
|
version: '0'
|
|
57
57
|
- !ruby/object:Gem::Dependency
|
|
58
58
|
name: jeweler
|
|
59
59
|
requirement: !ruby/object:Gem::Requirement
|
|
60
60
|
requirements:
|
|
61
|
-
- -
|
|
61
|
+
- - ">="
|
|
62
62
|
- !ruby/object:Gem::Version
|
|
63
63
|
version: '0'
|
|
64
64
|
type: :development
|
|
65
65
|
prerelease: false
|
|
66
66
|
version_requirements: !ruby/object:Gem::Requirement
|
|
67
67
|
requirements:
|
|
68
|
-
- -
|
|
68
|
+
- - ">="
|
|
69
|
+
- !ruby/object:Gem::Version
|
|
70
|
+
version: '0'
|
|
71
|
+
- !ruby/object:Gem::Dependency
|
|
72
|
+
name: rubocop
|
|
73
|
+
requirement: !ruby/object:Gem::Requirement
|
|
74
|
+
requirements:
|
|
75
|
+
- - ">="
|
|
76
|
+
- !ruby/object:Gem::Version
|
|
77
|
+
version: '0'
|
|
78
|
+
type: :development
|
|
79
|
+
prerelease: false
|
|
80
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
81
|
+
requirements:
|
|
82
|
+
- - ">="
|
|
83
|
+
- !ruby/object:Gem::Version
|
|
84
|
+
version: '0'
|
|
85
|
+
- !ruby/object:Gem::Dependency
|
|
86
|
+
name: rubycritic
|
|
87
|
+
requirement: !ruby/object:Gem::Requirement
|
|
88
|
+
requirements:
|
|
89
|
+
- - ">="
|
|
90
|
+
- !ruby/object:Gem::Version
|
|
91
|
+
version: '0'
|
|
92
|
+
type: :development
|
|
93
|
+
prerelease: false
|
|
94
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
95
|
+
requirements:
|
|
96
|
+
- - ">="
|
|
69
97
|
- !ruby/object:Gem::Version
|
|
70
98
|
version: '0'
|
|
71
99
|
description: Takes a stream to a spreadsheet file and produces an XML or CSV representation
|
|
@@ -76,8 +104,9 @@ extensions: []
|
|
|
76
104
|
extra_rdoc_files:
|
|
77
105
|
- README.rdoc
|
|
78
106
|
files:
|
|
79
|
-
- .ruby-gemspec
|
|
80
|
-
- .ruby-version
|
|
107
|
+
- ".ruby-gemspec"
|
|
108
|
+
- ".ruby-version"
|
|
109
|
+
- ".travis.yml"
|
|
81
110
|
- Gemfile
|
|
82
111
|
- Gemfile.lock
|
|
83
112
|
- LICENSE
|
|
@@ -87,18 +116,22 @@ files:
|
|
|
87
116
|
- doc/schema-v1.xsd
|
|
88
117
|
- examples/example.rb
|
|
89
118
|
- jars/lib/dom4j-1.6.1.jar
|
|
90
|
-
- jars/lib/
|
|
91
|
-
- jars/lib/poi-
|
|
92
|
-
- jars/lib/
|
|
93
|
-
- jars/lib/
|
|
94
|
-
- jars/
|
|
119
|
+
- jars/lib/log4j-1.2.17.jar
|
|
120
|
+
- jars/lib/poi-3.11.jar
|
|
121
|
+
- jars/lib/poi-ooxml-3.11.jar
|
|
122
|
+
- jars/lib/poi-ooxml-schemas-3.11.jar
|
|
123
|
+
- jars/lib/xmlbeans-2.6.0.jar
|
|
124
|
+
- jars/simple-spreadsheet-extractor-0.15.0.jar
|
|
95
125
|
- lib/simple-spreadsheet-extractor.rb
|
|
126
|
+
- lib/sysmodb/extractor.rb
|
|
127
|
+
- lib/sysmodb/simple-spreadsheet-extractor.rb
|
|
96
128
|
- simple-spreadsheet-extractor.gemspec
|
|
97
129
|
- test/files/not-a-spreadsheet.xls
|
|
98
130
|
- test/files/problem_with_dv.xls
|
|
99
131
|
- test/files/test-csv-output1-trimmed.csv
|
|
100
132
|
- test/files/test-csv-output1.csv
|
|
101
133
|
- test/files/test-spreadsheet.xls
|
|
134
|
+
- test/files/test-spreadsheet.xlsx
|
|
102
135
|
- test/test_extraction.rb
|
|
103
136
|
homepage: http://github.com/myGrid/simple-spreadsheet-extractor-gem
|
|
104
137
|
licenses: []
|
|
@@ -109,12 +142,12 @@ require_paths:
|
|
|
109
142
|
- lib
|
|
110
143
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
111
144
|
requirements:
|
|
112
|
-
- -
|
|
145
|
+
- - ">="
|
|
113
146
|
- !ruby/object:Gem::Version
|
|
114
147
|
version: '0'
|
|
115
148
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
149
|
requirements:
|
|
117
|
-
- -
|
|
150
|
+
- - ">="
|
|
118
151
|
- !ruby/object:Gem::Version
|
|
119
152
|
version: '0'
|
|
120
153
|
requirements: []
|
data/jars/lib/poi-ooxml-3.9.jar
DELETED
|
Binary file
|
|
Binary file
|
|
Binary file
|