simple-spreadsheet-extractor 0.16.1 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.ruby-version +1 -1
- data/LICENSE +1 -1
- data/README.rdoc +3 -2
- data/lib/simple-spreadsheet-extractor.rb +2 -0
- data/lib/sysmodb/extractor.rb +33 -40
- data/lib/sysmodb/simple-spreadsheet-extractor.rb +2 -4
- data/lib/sysmodb/version.rb +1 -2
- data/simple-spreadsheet-extractor.gemspec +8 -7
- metadata +24 -26
- data/VERSION +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8d5351032c2b34dfcce4bed0469415ab1b98e8f11f305d8e76bca707ed2554e1
|
4
|
+
data.tar.gz: 01d1abb6c2f8a025ab489f1a83e9b9c627bbc67f151fb3ace3968175b4c69cb2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29bea0ec37798888a1e414ed947015c8c991450e8e017b0f4dde822c88ce5ceea32197cd3c93b5f8886bc704be2946c82edb9c7410235f17c53150509485e846
|
7
|
+
data.tar.gz: aad58ac8c66c8091ae3d9f1d05366c889727cc3f07981d2c9841a02e25dfc289060c57327d6d79b087ae9df8685ecf98ce3f63a9b1ea23300b5c870114802515
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-2.
|
1
|
+
ruby-2.7.6
|
data/LICENSE
CHANGED
data/README.rdoc
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
= Simple Spreadsheet Extractor
|
2
2
|
|
3
3
|
Authors:: Stuart Owen, Finn Bacall
|
4
|
-
Version:: 0.
|
4
|
+
Version:: 0.17.0
|
5
5
|
Contact:: mailto:stuart.owen@manchester.ac.uk
|
6
6
|
Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
|
7
7
|
Copyright:: (c) 2010-2015 The University of Manchester, UK
|
@@ -23,7 +23,7 @@ This is a simple tool developed for use within SysMO-DB[http://www.sysmo-db.org]
|
|
23
23
|
|
24
24
|
== Installation
|
25
25
|
|
26
|
-
Java
|
26
|
+
Java 8 or above (JRE) is required.
|
27
27
|
|
28
28
|
gem install simple-spreadsheet-extractor
|
29
29
|
|
@@ -34,6 +34,7 @@ Java 1.7 (JRE) is required.
|
|
34
34
|
* require 'simple-spreadsheet-extractor'
|
35
35
|
* include the module SysMODB::SpreadsheetExtractor
|
36
36
|
* pass an IO object to the method spreedsheet_to_xml which responds with the XML for the contents of the spreadsheet. Alternatively use spreadsheet_to_csv for CSV.
|
37
|
+
* you can now also pass in the filepath to the Excel file instead of an IO object
|
37
38
|
* if something goes wrong with the extraction then a SysMODB::SpreadsheetExtractionException will be thrown
|
38
39
|
* by default the JVM is allocated 512M of memory, you can override this by passing a string as the last argument. This will be passed to -Xmx in the java command.
|
39
40
|
|
data/lib/sysmodb/extractor.rb
CHANGED
@@ -1,38 +1,49 @@
|
|
1
|
-
require '
|
1
|
+
require 'terrapin'
|
2
2
|
|
3
3
|
module SysMODB
|
4
|
-
#Exception that is thrown when a problem occurs during the extraction
|
4
|
+
# Exception that is thrown when a problem occurs during the extraction
|
5
5
|
class SpreadsheetExtractionException < Exception; end
|
6
6
|
|
7
|
-
#handles the delegation to java
|
8
|
-
#input file through STDIN, and reading the results through STDOUT.
|
7
|
+
# handles the delegation to java
|
9
8
|
class Extractor
|
10
|
-
JAR_VERSION=
|
9
|
+
JAR_VERSION = '0.16.0'.freeze
|
11
10
|
DEFAULT_PATH = File.dirname(__FILE__) + "/../../jars/simple-spreadsheet-extractor-#{JAR_VERSION}.jar"
|
12
|
-
BUFFER_SIZE=250000 # 1/4 a megabyte
|
13
11
|
|
14
12
|
def initialize(memory_allocation)
|
15
13
|
@memory_allocation = memory_allocation
|
16
|
-
if is_windows?
|
17
|
-
raise Exception.new("Windows is not currently supported")
|
18
|
-
end
|
14
|
+
raise Exception, 'Windows is not currently supported' if is_windows?
|
19
15
|
end
|
20
16
|
|
17
|
+
# spreadsheet_data can be an IO like object or the path to a file
|
21
18
|
def spreadsheet_to_xml(spreadsheet_data)
|
22
|
-
|
19
|
+
spreadsheet_to_requested_format(spreadsheet_data, 'xml')
|
23
20
|
end
|
24
21
|
|
25
|
-
|
26
|
-
|
22
|
+
# spreadsheet_data can be an IO like object or the path to a file
|
23
|
+
def spreadsheet_to_csv(spreadsheet_data, sheet = 1, trim = false)
|
24
|
+
spreadsheet_to_requested_format(spreadsheet_data, 'csv', sheet, trim)
|
27
25
|
end
|
28
26
|
|
29
27
|
private
|
30
28
|
|
31
|
-
def
|
29
|
+
def spreadsheet_to_requested_format(spreadsheet_data, format, sheet = nil, trim = nil)
|
30
|
+
if spreadsheet_data.is_a?(IO) || spreadsheet_data.is_a?(StringIO)
|
31
|
+
Tempfile.create('spreadsheet-extraction') do |f|
|
32
|
+
f.write(spreadsheet_data.read)
|
33
|
+
f.flush
|
34
|
+
execute_command_line f.path, format, sheet, trim
|
35
|
+
end
|
36
|
+
elsif spreadsheet_data.is_a?(String)
|
37
|
+
execute_command_line spreadsheet_data, format, sheet, trim
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def spreadsheet_extractor_command(filepath, format = 'xml', sheet = nil, trim = false)
|
32
42
|
command = "java -Xmx#{@memory_allocation} -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
|
33
|
-
command +=
|
43
|
+
command += " -o #{format}"
|
34
44
|
command += " -s #{sheet}" if sheet
|
35
|
-
command +=
|
45
|
+
command += ' -t' if trim
|
46
|
+
command += " < #{filepath}"
|
36
47
|
command
|
37
48
|
end
|
38
49
|
|
@@ -40,32 +51,14 @@ module SysMODB
|
|
40
51
|
!(RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/).nil?
|
41
52
|
end
|
42
53
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
stdin << line
|
50
|
-
end
|
51
|
-
stdin.close
|
52
|
-
|
53
|
-
while ((line = stdout.gets(BUFFER_SIZE)) != nil) do
|
54
|
-
output << line
|
55
|
-
end
|
56
|
-
stdout.close
|
57
|
-
|
58
|
-
until ((line=stderr.gets((BUFFER_SIZE))).nil?) do
|
59
|
-
err_message << line
|
60
|
-
end
|
61
|
-
stderr.close
|
54
|
+
def execute_command_line(filepath, format = 'xml', sheet = nil, trim = false)
|
55
|
+
command = spreadsheet_extractor_command filepath, format, sheet, trim
|
56
|
+
begin
|
57
|
+
Terrapin::CommandLine.new(command).run.strip
|
58
|
+
rescue Terrapin::ExitStatusError, Terrapin::CommandNotFoundError => e
|
59
|
+
raise SpreadsheetExtractionException, e.message
|
62
60
|
end
|
63
|
-
|
64
|
-
if status.to_i != 0
|
65
|
-
raise SpreadsheetExtractionException.new(err_message)
|
66
|
-
end
|
67
|
-
|
68
|
-
output.strip
|
69
61
|
end
|
62
|
+
|
70
63
|
end
|
71
64
|
end
|
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'sysmodb/extractor'
|
3
2
|
|
4
3
|
module SysMODB
|
@@ -8,12 +7,11 @@ module SysMODB
|
|
8
7
|
# simple_spreadsheet_to_xml to extract to an xml format (see README and schema in doc/schema-v1.xsd)
|
9
8
|
# simple_spreadhseet_to_csv to extract to CSV format for a single sheet
|
10
9
|
module SpreadsheetExtractor
|
11
|
-
|
12
10
|
# :call-seq:
|
13
11
|
# spreadsheet_to_xml(spreadsheet_data) -> String
|
14
12
|
# spreadsheet_to_xml(spreadsheet_data, memory_allocation) -> String
|
15
13
|
#
|
16
|
-
# reads the incoming data from an IO object and returns the generated XML.
|
14
|
+
# reads the incoming data from an IO object or path to a file and returns the generated XML.
|
17
15
|
# it is extracted using java, and the default memory allocation is 512M (passed to -Xmx) this can
|
18
16
|
# be changed by passing an option final parameter memory_allocation
|
19
17
|
def spreadsheet_to_xml(spreadsheet_data, memory_allocation = DEFAULT_MEMORY_ALLOCATION)
|
@@ -26,7 +24,7 @@ module SysMODB
|
|
26
24
|
# spreadsheet_to_csv(spreadsheet_data, sheet, trim) -> String
|
27
25
|
# spreadsheet_to_csv(spreadsheet_data, sheet, trim, memory_allocation) -> String
|
28
26
|
#
|
29
|
-
# reads the incoming data from an IO object and returns the generated CSV.
|
27
|
+
# reads the incoming data from an IO object or path to a file and returns the generated CSV.
|
30
28
|
# only 1 sheet is processed, which by default it the first sheet.
|
31
29
|
# if trim is set to true, proceeding or trailing cells will be removed whilst keeping the csv uniform.
|
32
30
|
# it is extracted using java, and the default memory allocation is 512M (passed to -Xmx) this can
|
data/lib/sysmodb/version.rb
CHANGED
@@ -19,12 +19,13 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.add_dependency('libxml-ruby',
|
23
|
-
spec.add_dependency('
|
22
|
+
spec.add_dependency('libxml-ruby', '~> 2.9')
|
23
|
+
spec.add_dependency('terrapin', '~> 0.6')
|
24
24
|
|
25
|
-
spec.add_development_dependency('rubocop',
|
26
|
-
spec.add_development_dependency('rubycritic'
|
27
|
-
spec.add_development_dependency
|
28
|
-
|
29
|
-
spec.add_development_dependency
|
25
|
+
spec.add_development_dependency('rubocop','~> 1.25')
|
26
|
+
spec.add_development_dependency('rubycritic')
|
27
|
+
spec.add_development_dependency('bundler','~> 2.3')
|
28
|
+
|
29
|
+
spec.add_development_dependency('rake')
|
30
|
+
spec.add_development_dependency('minitest','~>5.14')
|
30
31
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-spreadsheet-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stuart Owen
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2022-12-16 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: libxml-ruby
|
@@ -18,98 +18,98 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - "~>"
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: '2.
|
21
|
+
version: '2.9'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
requirements:
|
26
26
|
- - "~>"
|
27
27
|
- !ruby/object:Gem::Version
|
28
|
-
version: '2.
|
28
|
+
version: '2.9'
|
29
29
|
- !ruby/object:Gem::Dependency
|
30
|
-
name:
|
30
|
+
name: terrapin
|
31
31
|
requirement: !ruby/object:Gem::Requirement
|
32
32
|
requirements:
|
33
|
-
- -
|
33
|
+
- - "~>"
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version:
|
35
|
+
version: '0.6'
|
36
36
|
type: :runtime
|
37
37
|
prerelease: false
|
38
38
|
version_requirements: !ruby/object:Gem::Requirement
|
39
39
|
requirements:
|
40
|
-
- -
|
40
|
+
- - "~>"
|
41
41
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
42
|
+
version: '0.6'
|
43
43
|
- !ruby/object:Gem::Dependency
|
44
44
|
name: rubocop
|
45
45
|
requirement: !ruby/object:Gem::Requirement
|
46
46
|
requirements:
|
47
47
|
- - "~>"
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version: '
|
49
|
+
version: '1.25'
|
50
50
|
type: :development
|
51
51
|
prerelease: false
|
52
52
|
version_requirements: !ruby/object:Gem::Requirement
|
53
53
|
requirements:
|
54
54
|
- - "~>"
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: '
|
56
|
+
version: '1.25'
|
57
57
|
- !ruby/object:Gem::Dependency
|
58
58
|
name: rubycritic
|
59
59
|
requirement: !ruby/object:Gem::Requirement
|
60
60
|
requirements:
|
61
|
-
- - "
|
61
|
+
- - ">="
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: '
|
63
|
+
version: '0'
|
64
64
|
type: :development
|
65
65
|
prerelease: false
|
66
66
|
version_requirements: !ruby/object:Gem::Requirement
|
67
67
|
requirements:
|
68
|
-
- - "
|
68
|
+
- - ">="
|
69
69
|
- !ruby/object:Gem::Version
|
70
|
-
version: '
|
70
|
+
version: '0'
|
71
71
|
- !ruby/object:Gem::Dependency
|
72
72
|
name: bundler
|
73
73
|
requirement: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - "~>"
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: '
|
77
|
+
version: '2.3'
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
80
|
version_requirements: !ruby/object:Gem::Requirement
|
81
81
|
requirements:
|
82
82
|
- - "~>"
|
83
83
|
- !ruby/object:Gem::Version
|
84
|
-
version: '
|
84
|
+
version: '2.3'
|
85
85
|
- !ruby/object:Gem::Dependency
|
86
86
|
name: rake
|
87
87
|
requirement: !ruby/object:Gem::Requirement
|
88
88
|
requirements:
|
89
|
-
- - "
|
89
|
+
- - ">="
|
90
90
|
- !ruby/object:Gem::Version
|
91
|
-
version: '
|
91
|
+
version: '0'
|
92
92
|
type: :development
|
93
93
|
prerelease: false
|
94
94
|
version_requirements: !ruby/object:Gem::Requirement
|
95
95
|
requirements:
|
96
|
-
- - "
|
96
|
+
- - ">="
|
97
97
|
- !ruby/object:Gem::Version
|
98
|
-
version: '
|
98
|
+
version: '0'
|
99
99
|
- !ruby/object:Gem::Dependency
|
100
100
|
name: minitest
|
101
101
|
requirement: !ruby/object:Gem::Requirement
|
102
102
|
requirements:
|
103
103
|
- - "~>"
|
104
104
|
- !ruby/object:Gem::Version
|
105
|
-
version: '5.
|
105
|
+
version: '5.14'
|
106
106
|
type: :development
|
107
107
|
prerelease: false
|
108
108
|
version_requirements: !ruby/object:Gem::Requirement
|
109
109
|
requirements:
|
110
110
|
- - "~>"
|
111
111
|
- !ruby/object:Gem::Version
|
112
|
-
version: '5.
|
112
|
+
version: '5.14'
|
113
113
|
description: Takes a stream to a spreadsheet file and produces an XML or CSV representation
|
114
114
|
of its contents
|
115
115
|
email:
|
@@ -126,7 +126,6 @@ files:
|
|
126
126
|
- LICENSE
|
127
127
|
- README.rdoc
|
128
128
|
- Rakefile
|
129
|
-
- VERSION
|
130
129
|
- doc/schema-v1.xsd
|
131
130
|
- examples/example.rb
|
132
131
|
- jars/lib/commons-codec-1.10.jar
|
@@ -165,8 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
165
164
|
- !ruby/object:Gem::Version
|
166
165
|
version: '0'
|
167
166
|
requirements: []
|
168
|
-
|
169
|
-
rubygems_version: 2.6.13
|
167
|
+
rubygems_version: 3.3.26
|
170
168
|
signing_key:
|
171
169
|
specification_version: 4
|
172
170
|
summary: Basic spreadsheet content extraction using Apache POI
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.15.2
|