simple-spreadsheet-extractor 0.16.1 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.ruby-version +1 -1
- data/LICENSE +1 -1
- data/README.rdoc +3 -2
- data/doc/schema-v1.xsd +2 -2
- data/jars/lib/SparseBitSet-1.2.jar +0 -0
- data/jars/lib/commons-codec-1.15.jar +0 -0
- data/jars/lib/commons-collections4-4.4.jar +0 -0
- data/jars/lib/commons-compress-1.21.jar +0 -0
- data/jars/lib/commons-io-2.11.0.jar +0 -0
- data/jars/lib/commons-math3-3.6.1.jar +0 -0
- data/jars/lib/{curvesapi-1.04.jar → curvesapi-1.07.jar} +0 -0
- data/jars/lib/log4j-1.2-api-2.20.0.jar +0 -0
- data/jars/lib/log4j-api-2.18.0.jar +0 -0
- data/jars/lib/log4j-core-2.20.0.jar +0 -0
- data/jars/lib/poi-5.2.3.jar +0 -0
- data/jars/lib/poi-ooxml-5.2.3.jar +0 -0
- data/jars/lib/poi-ooxml-lite-5.2.3.jar +0 -0
- data/jars/lib/xercesImpl-2.12.2.jar +0 -0
- data/jars/lib/xmlbeans-5.1.1.jar +0 -0
- data/jars/simple-spreadsheet-extractor-0.18.0.jar +0 -0
- data/lib/simple-spreadsheet-extractor.rb +2 -0
- data/lib/sysmodb/extractor.rb +33 -40
- data/lib/sysmodb/simple-spreadsheet-extractor.rb +2 -4
- data/lib/sysmodb/version.rb +1 -2
- data/simple-spreadsheet-extractor.gemspec +8 -7
- metadata +40 -37
- data/VERSION +0 -1
- data/jars/lib/commons-codec-1.10.jar +0 -0
- data/jars/lib/commons-collections4-4.1.jar +0 -0
- data/jars/lib/dom4j-1.6.1.jar +0 -0
- data/jars/lib/log4j-1.2.17.jar +0 -0
- data/jars/lib/poi-3.17.jar +0 -0
- data/jars/lib/poi-ooxml-3.17.jar +0 -0
- data/jars/lib/poi-ooxml-schemas-3.17.jar +0 -0
- data/jars/lib/xercesImpl-2.11.0.jar +0 -0
- data/jars/lib/xmlbeans-2.6.0.jar +0 -0
- data/jars/simple-spreadsheet-extractor-0.16.0.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 928ed7d7088b8fe16a877bb1bed779ffa02503a48a009bcf2ec084c350443629
|
4
|
+
data.tar.gz: e0f345a7ef579ad5b9068096792127501aced335fe16b73c7e9a169e80b03982
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b4d85813f7aa92d0c77b8897ea007de6346d32a5b11629e1d2e123b4758db266ac27279677fdf7fa9acc978a42e0c306b8ea9c121ba60cb093f54cdd3a0bafd
|
7
|
+
data.tar.gz: c7f88fe40795bb9a91b13cef3eceffe35f20601dc9d1d2a3ab88468468e53c12fe3b5d259cc01e3676d3c01912f2116ad6ce685dd829764cc93e0e99f8c031fe
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-
|
1
|
+
ruby-3.1.4
|
data/LICENSE
CHANGED
data/README.rdoc
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
= Simple Spreadsheet Extractor
|
2
2
|
|
3
3
|
Authors:: Stuart Owen, Finn Bacall
|
4
|
-
Version:: 0.
|
4
|
+
Version:: 0.17.0
|
5
5
|
Contact:: mailto:stuart.owen@manchester.ac.uk
|
6
6
|
Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
|
7
7
|
Copyright:: (c) 2010-2015 The University of Manchester, UK
|
@@ -23,7 +23,7 @@ This is a simple tool developed for use within SysMO-DB[http://www.sysmo-db.org]
|
|
23
23
|
|
24
24
|
== Installation
|
25
25
|
|
26
|
-
Java
|
26
|
+
Java 8 or above (JRE) is required.
|
27
27
|
|
28
28
|
gem install simple-spreadsheet-extractor
|
29
29
|
|
@@ -34,6 +34,7 @@ Java 1.7 (JRE) is required.
|
|
34
34
|
* require 'simple-spreadsheet-extractor'
|
35
35
|
* include the module SysMODB::SpreadsheetExtractor
|
36
36
|
* pass an IO object to the method spreedsheet_to_xml which responds with the XML for the contents of the spreadsheet. Alternatively use spreadsheet_to_csv for CSV.
|
37
|
+
* you can now also pass in the filepath to the Excel file instead of an IO object
|
37
38
|
* if something goes wrong with the extraction then a SysMODB::SpreadsheetExtractionException will be thrown
|
38
39
|
* by default the JVM is allocated 512M of memory, you can override this by passing a string as the last argument. This will be passed to -Xmx in the java command.
|
39
40
|
|
data/doc/schema-v1.xsd
CHANGED
@@ -93,9 +93,9 @@
|
|
93
93
|
<xsd:element name="row" type="Row" minOccurs="0"
|
94
94
|
maxOccurs="unbounded" />
|
95
95
|
</xsd:sequence>
|
96
|
-
<xsd:attribute name="first_row" type="xsd:
|
96
|
+
<xsd:attribute name="first_row" type="xsd:nonNegativeInteger"
|
97
97
|
use="required"></xsd:attribute>
|
98
|
-
<xsd:attribute name="last_row" type="xsd:
|
98
|
+
<xsd:attribute name="last_row" type="xsd:nonNegativeInteger"
|
99
99
|
use="required"></xsd:attribute>
|
100
100
|
</xsd:complexType>
|
101
101
|
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/sysmodb/extractor.rb
CHANGED
@@ -1,38 +1,49 @@
|
|
1
|
-
require '
|
1
|
+
require 'terrapin'
|
2
2
|
|
3
3
|
module SysMODB
|
4
|
-
#Exception that is thrown when a problem occurs during the extraction
|
4
|
+
# Exception that is thrown when a problem occurs during the extraction
|
5
5
|
class SpreadsheetExtractionException < Exception; end
|
6
6
|
|
7
|
-
#handles the delegation to java
|
8
|
-
#input file through STDIN, and reading the results through STDOUT.
|
7
|
+
# handles the delegation to java
|
9
8
|
class Extractor
|
10
|
-
JAR_VERSION=
|
9
|
+
JAR_VERSION = '0.18.0'.freeze
|
11
10
|
DEFAULT_PATH = File.dirname(__FILE__) + "/../../jars/simple-spreadsheet-extractor-#{JAR_VERSION}.jar"
|
12
|
-
BUFFER_SIZE=250000 # 1/4 a megabyte
|
13
11
|
|
14
12
|
def initialize(memory_allocation)
|
15
13
|
@memory_allocation = memory_allocation
|
16
|
-
if is_windows?
|
17
|
-
raise Exception.new("Windows is not currently supported")
|
18
|
-
end
|
14
|
+
raise Exception, 'Windows is not currently supported' if is_windows?
|
19
15
|
end
|
20
16
|
|
17
|
+
# spreadsheet_data can be an IO like object or the path to a file
|
21
18
|
def spreadsheet_to_xml(spreadsheet_data)
|
22
|
-
|
19
|
+
spreadsheet_to_requested_format(spreadsheet_data, 'xml')
|
23
20
|
end
|
24
21
|
|
25
|
-
|
26
|
-
|
22
|
+
# spreadsheet_data can be an IO like object or the path to a file
|
23
|
+
def spreadsheet_to_csv(spreadsheet_data, sheet = 1, trim = false)
|
24
|
+
spreadsheet_to_requested_format(spreadsheet_data, 'csv', sheet, trim)
|
27
25
|
end
|
28
26
|
|
29
27
|
private
|
30
28
|
|
31
|
-
def
|
29
|
+
def spreadsheet_to_requested_format(spreadsheet_data, format, sheet = nil, trim = nil)
|
30
|
+
if spreadsheet_data.is_a?(IO) || spreadsheet_data.is_a?(StringIO)
|
31
|
+
Tempfile.create('spreadsheet-extraction') do |f|
|
32
|
+
f.write(spreadsheet_data.read)
|
33
|
+
f.flush
|
34
|
+
execute_command_line f.path, format, sheet, trim
|
35
|
+
end
|
36
|
+
elsif spreadsheet_data.is_a?(String)
|
37
|
+
execute_command_line spreadsheet_data, format, sheet, trim
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def spreadsheet_extractor_command(filepath, format = 'xml', sheet = nil, trim = false)
|
32
42
|
command = "java -Xmx#{@memory_allocation} -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
|
33
|
-
command +=
|
43
|
+
command += " -o #{format}"
|
34
44
|
command += " -s #{sheet}" if sheet
|
35
|
-
command +=
|
45
|
+
command += ' -t' if trim
|
46
|
+
command += " < #{filepath}"
|
36
47
|
command
|
37
48
|
end
|
38
49
|
|
@@ -40,32 +51,14 @@ module SysMODB
|
|
40
51
|
!(RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/).nil?
|
41
52
|
end
|
42
53
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
stdin << line
|
50
|
-
end
|
51
|
-
stdin.close
|
52
|
-
|
53
|
-
while ((line = stdout.gets(BUFFER_SIZE)) != nil) do
|
54
|
-
output << line
|
55
|
-
end
|
56
|
-
stdout.close
|
57
|
-
|
58
|
-
until ((line=stderr.gets((BUFFER_SIZE))).nil?) do
|
59
|
-
err_message << line
|
60
|
-
end
|
61
|
-
stderr.close
|
54
|
+
def execute_command_line(filepath, format = 'xml', sheet = nil, trim = false)
|
55
|
+
command = spreadsheet_extractor_command filepath, format, sheet, trim
|
56
|
+
begin
|
57
|
+
Terrapin::CommandLine.new(command).run.strip
|
58
|
+
rescue Terrapin::ExitStatusError, Terrapin::CommandNotFoundError => e
|
59
|
+
raise SpreadsheetExtractionException, e.message
|
62
60
|
end
|
63
|
-
|
64
|
-
if status.to_i != 0
|
65
|
-
raise SpreadsheetExtractionException.new(err_message)
|
66
|
-
end
|
67
|
-
|
68
|
-
output.strip
|
69
61
|
end
|
62
|
+
|
70
63
|
end
|
71
64
|
end
|
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'sysmodb/extractor'
|
3
2
|
|
4
3
|
module SysMODB
|
@@ -8,12 +7,11 @@ module SysMODB
|
|
8
7
|
# simple_spreadsheet_to_xml to extract to an xml format (see README and schema in doc/schema-v1.xsd)
|
9
8
|
# simple_spreadhseet_to_csv to extract to CSV format for a single sheet
|
10
9
|
module SpreadsheetExtractor
|
11
|
-
|
12
10
|
# :call-seq:
|
13
11
|
# spreadsheet_to_xml(spreadsheet_data) -> String
|
14
12
|
# spreadsheet_to_xml(spreadsheet_data, memory_allocation) -> String
|
15
13
|
#
|
16
|
-
# reads the incoming data from an IO object and returns the generated XML.
|
14
|
+
# reads the incoming data from an IO object or path to a file and returns the generated XML.
|
17
15
|
# it is extracted using java, and the default memory allocation is 512M (passed to -Xmx) this can
|
18
16
|
# be changed by passing an option final parameter memory_allocation
|
19
17
|
def spreadsheet_to_xml(spreadsheet_data, memory_allocation = DEFAULT_MEMORY_ALLOCATION)
|
@@ -26,7 +24,7 @@ module SysMODB
|
|
26
24
|
# spreadsheet_to_csv(spreadsheet_data, sheet, trim) -> String
|
27
25
|
# spreadsheet_to_csv(spreadsheet_data, sheet, trim, memory_allocation) -> String
|
28
26
|
#
|
29
|
-
# reads the incoming data from an IO object and returns the generated CSV.
|
27
|
+
# reads the incoming data from an IO object or path to a file and returns the generated CSV.
|
30
28
|
# only 1 sheet is processed, which by default it the first sheet.
|
31
29
|
# if trim is set to true, proceeding or trailing cells will be removed whilst keeping the csv uniform.
|
32
30
|
# it is extracted using java, and the default memory allocation is 512M (passed to -Xmx) this can
|
data/lib/sysmodb/version.rb
CHANGED
@@ -19,12 +19,13 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.add_dependency('libxml-ruby',
|
23
|
-
spec.add_dependency('
|
22
|
+
spec.add_dependency('libxml-ruby', '~> 2.9')
|
23
|
+
spec.add_dependency('terrapin','~>0.6')
|
24
24
|
|
25
|
-
spec.add_development_dependency('rubocop',
|
26
|
-
spec.add_development_dependency('rubycritic'
|
27
|
-
spec.add_development_dependency
|
28
|
-
|
29
|
-
spec.add_development_dependency
|
25
|
+
spec.add_development_dependency('rubocop','~>1.25')
|
26
|
+
spec.add_development_dependency('rubycritic')
|
27
|
+
spec.add_development_dependency('bundler')
|
28
|
+
|
29
|
+
spec.add_development_dependency('rake','~>13.0')
|
30
|
+
spec.add_development_dependency('minitest','~>5.1')
|
30
31
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-spreadsheet-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stuart Owen
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2023-05-22 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: libxml-ruby
|
@@ -18,98 +18,98 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - "~>"
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: '2.
|
21
|
+
version: '2.9'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
requirements:
|
26
26
|
- - "~>"
|
27
27
|
- !ruby/object:Gem::Version
|
28
|
-
version: '2.
|
28
|
+
version: '2.9'
|
29
29
|
- !ruby/object:Gem::Dependency
|
30
|
-
name:
|
30
|
+
name: terrapin
|
31
31
|
requirement: !ruby/object:Gem::Requirement
|
32
32
|
requirements:
|
33
|
-
- -
|
33
|
+
- - "~>"
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version:
|
35
|
+
version: '0.6'
|
36
36
|
type: :runtime
|
37
37
|
prerelease: false
|
38
38
|
version_requirements: !ruby/object:Gem::Requirement
|
39
39
|
requirements:
|
40
|
-
- -
|
40
|
+
- - "~>"
|
41
41
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
42
|
+
version: '0.6'
|
43
43
|
- !ruby/object:Gem::Dependency
|
44
44
|
name: rubocop
|
45
45
|
requirement: !ruby/object:Gem::Requirement
|
46
46
|
requirements:
|
47
47
|
- - "~>"
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version: '
|
49
|
+
version: '1.25'
|
50
50
|
type: :development
|
51
51
|
prerelease: false
|
52
52
|
version_requirements: !ruby/object:Gem::Requirement
|
53
53
|
requirements:
|
54
54
|
- - "~>"
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: '
|
56
|
+
version: '1.25'
|
57
57
|
- !ruby/object:Gem::Dependency
|
58
58
|
name: rubycritic
|
59
59
|
requirement: !ruby/object:Gem::Requirement
|
60
60
|
requirements:
|
61
|
-
- - "
|
61
|
+
- - ">="
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: '
|
63
|
+
version: '0'
|
64
64
|
type: :development
|
65
65
|
prerelease: false
|
66
66
|
version_requirements: !ruby/object:Gem::Requirement
|
67
67
|
requirements:
|
68
|
-
- - "
|
68
|
+
- - ">="
|
69
69
|
- !ruby/object:Gem::Version
|
70
|
-
version: '
|
70
|
+
version: '0'
|
71
71
|
- !ruby/object:Gem::Dependency
|
72
72
|
name: bundler
|
73
73
|
requirement: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
|
-
- - "
|
75
|
+
- - ">="
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: '
|
77
|
+
version: '0'
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
80
|
version_requirements: !ruby/object:Gem::Requirement
|
81
81
|
requirements:
|
82
|
-
- - "
|
82
|
+
- - ">="
|
83
83
|
- !ruby/object:Gem::Version
|
84
|
-
version: '
|
84
|
+
version: '0'
|
85
85
|
- !ruby/object:Gem::Dependency
|
86
86
|
name: rake
|
87
87
|
requirement: !ruby/object:Gem::Requirement
|
88
88
|
requirements:
|
89
89
|
- - "~>"
|
90
90
|
- !ruby/object:Gem::Version
|
91
|
-
version: '
|
91
|
+
version: '13.0'
|
92
92
|
type: :development
|
93
93
|
prerelease: false
|
94
94
|
version_requirements: !ruby/object:Gem::Requirement
|
95
95
|
requirements:
|
96
96
|
- - "~>"
|
97
97
|
- !ruby/object:Gem::Version
|
98
|
-
version: '
|
98
|
+
version: '13.0'
|
99
99
|
- !ruby/object:Gem::Dependency
|
100
100
|
name: minitest
|
101
101
|
requirement: !ruby/object:Gem::Requirement
|
102
102
|
requirements:
|
103
103
|
- - "~>"
|
104
104
|
- !ruby/object:Gem::Version
|
105
|
-
version: '5.
|
105
|
+
version: '5.1'
|
106
106
|
type: :development
|
107
107
|
prerelease: false
|
108
108
|
version_requirements: !ruby/object:Gem::Requirement
|
109
109
|
requirements:
|
110
110
|
- - "~>"
|
111
111
|
- !ruby/object:Gem::Version
|
112
|
-
version: '5.
|
112
|
+
version: '5.1'
|
113
113
|
description: Takes a stream to a spreadsheet file and produces an XML or CSV representation
|
114
114
|
of its contents
|
115
115
|
email:
|
@@ -126,21 +126,25 @@ files:
|
|
126
126
|
- LICENSE
|
127
127
|
- README.rdoc
|
128
128
|
- Rakefile
|
129
|
-
- VERSION
|
130
129
|
- doc/schema-v1.xsd
|
131
130
|
- examples/example.rb
|
132
|
-
- jars/lib/
|
133
|
-
- jars/lib/commons-
|
134
|
-
- jars/lib/
|
135
|
-
- jars/lib/
|
136
|
-
- jars/lib/
|
137
|
-
- jars/lib/
|
138
|
-
- jars/lib/
|
139
|
-
- jars/lib/
|
140
|
-
- jars/lib/
|
131
|
+
- jars/lib/SparseBitSet-1.2.jar
|
132
|
+
- jars/lib/commons-codec-1.15.jar
|
133
|
+
- jars/lib/commons-collections4-4.4.jar
|
134
|
+
- jars/lib/commons-compress-1.21.jar
|
135
|
+
- jars/lib/commons-io-2.11.0.jar
|
136
|
+
- jars/lib/commons-math3-3.6.1.jar
|
137
|
+
- jars/lib/curvesapi-1.07.jar
|
138
|
+
- jars/lib/log4j-1.2-api-2.20.0.jar
|
139
|
+
- jars/lib/log4j-api-2.18.0.jar
|
140
|
+
- jars/lib/log4j-core-2.20.0.jar
|
141
|
+
- jars/lib/poi-5.2.3.jar
|
142
|
+
- jars/lib/poi-ooxml-5.2.3.jar
|
143
|
+
- jars/lib/poi-ooxml-lite-5.2.3.jar
|
144
|
+
- jars/lib/xercesImpl-2.12.2.jar
|
141
145
|
- jars/lib/xml-apis-1.4.01.jar
|
142
|
-
- jars/lib/xmlbeans-
|
143
|
-
- jars/simple-spreadsheet-extractor-0.
|
146
|
+
- jars/lib/xmlbeans-5.1.1.jar
|
147
|
+
- jars/simple-spreadsheet-extractor-0.18.0.jar
|
144
148
|
- lib/simple-spreadsheet-extractor.rb
|
145
149
|
- lib/sysmodb/extractor.rb
|
146
150
|
- lib/sysmodb/simple-spreadsheet-extractor.rb
|
@@ -165,8 +169,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
165
169
|
- !ruby/object:Gem::Version
|
166
170
|
version: '0'
|
167
171
|
requirements: []
|
168
|
-
|
169
|
-
rubygems_version: 2.6.13
|
172
|
+
rubygems_version: 3.3.26
|
170
173
|
signing_key:
|
171
174
|
specification_version: 4
|
172
175
|
summary: Basic spreadsheet content extraction using Apache POI
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.15.2
|
Binary file
|
Binary file
|
data/jars/lib/dom4j-1.6.1.jar
DELETED
Binary file
|
data/jars/lib/log4j-1.2.17.jar
DELETED
Binary file
|
data/jars/lib/poi-3.17.jar
DELETED
Binary file
|
data/jars/lib/poi-ooxml-3.17.jar
DELETED
Binary file
|
Binary file
|
Binary file
|
data/jars/lib/xmlbeans-2.6.0.jar
DELETED
Binary file
|
Binary file
|