simple-spreadsheet-extractor 0.16.1 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.ruby-version +1 -1
- data/LICENSE +1 -1
- data/README.rdoc +3 -2
- data/lib/simple-spreadsheet-extractor.rb +2 -0
- data/lib/sysmodb/extractor.rb +33 -40
- data/lib/sysmodb/simple-spreadsheet-extractor.rb +2 -4
- data/lib/sysmodb/version.rb +1 -2
- data/simple-spreadsheet-extractor.gemspec +8 -7
- metadata +24 -26
- data/VERSION +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8d5351032c2b34dfcce4bed0469415ab1b98e8f11f305d8e76bca707ed2554e1
|
4
|
+
data.tar.gz: 01d1abb6c2f8a025ab489f1a83e9b9c627bbc67f151fb3ace3968175b4c69cb2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29bea0ec37798888a1e414ed947015c8c991450e8e017b0f4dde822c88ce5ceea32197cd3c93b5f8886bc704be2946c82edb9c7410235f17c53150509485e846
|
7
|
+
data.tar.gz: aad58ac8c66c8091ae3d9f1d05366c889727cc3f07981d2c9841a02e25dfc289060c57327d6d79b087ae9df8685ecf98ce3f63a9b1ea23300b5c870114802515
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-2.
|
1
|
+
ruby-2.7.6
|
data/LICENSE
CHANGED
data/README.rdoc
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
= Simple Spreadsheet Extractor
|
2
2
|
|
3
3
|
Authors:: Stuart Owen, Finn Bacall
|
4
|
-
Version:: 0.
|
4
|
+
Version:: 0.17.0
|
5
5
|
Contact:: mailto:stuart.owen@manchester.ac.uk
|
6
6
|
Licence:: BSD (See LICENCE or http://www.opensource.org/licenses/bsd-license.php)
|
7
7
|
Copyright:: (c) 2010-2015 The University of Manchester, UK
|
@@ -23,7 +23,7 @@ This is a simple tool developed for use within SysMO-DB[http://www.sysmo-db.org]
|
|
23
23
|
|
24
24
|
== Installation
|
25
25
|
|
26
|
-
Java
|
26
|
+
Java 8 or above (JRE) is required.
|
27
27
|
|
28
28
|
gem install simple-spreadsheet-extractor
|
29
29
|
|
@@ -34,6 +34,7 @@ Java 1.7 (JRE) is required.
|
|
34
34
|
* require 'simple-spreadsheet-extractor'
|
35
35
|
* include the module SysMODB::SpreadsheetExtractor
|
36
36
|
* pass an IO object to the method spreedsheet_to_xml which responds with the XML for the contents of the spreadsheet. Alternatively use spreadsheet_to_csv for CSV.
|
37
|
+
* you can now also pass in the filepath to the Excel file instead of an IO object
|
37
38
|
* if something goes wrong with the extraction then a SysMODB::SpreadsheetExtractionException will be thrown
|
38
39
|
* by default the JVM is allocated 512M of memory, you can override this by passing a string as the last argument. This will be passed to -Xmx in the java command.
|
39
40
|
|
data/lib/sysmodb/extractor.rb
CHANGED
@@ -1,38 +1,49 @@
|
|
1
|
-
require '
|
1
|
+
require 'terrapin'
|
2
2
|
|
3
3
|
module SysMODB
|
4
|
-
#Exception that is thrown when a problem occurs during the extraction
|
4
|
+
# Exception that is thrown when a problem occurs during the extraction
|
5
5
|
class SpreadsheetExtractionException < Exception; end
|
6
6
|
|
7
|
-
#handles the delegation to java
|
8
|
-
#input file through STDIN, and reading the results through STDOUT.
|
7
|
+
# handles the delegation to java
|
9
8
|
class Extractor
|
10
|
-
JAR_VERSION=
|
9
|
+
JAR_VERSION = '0.16.0'.freeze
|
11
10
|
DEFAULT_PATH = File.dirname(__FILE__) + "/../../jars/simple-spreadsheet-extractor-#{JAR_VERSION}.jar"
|
12
|
-
BUFFER_SIZE=250000 # 1/4 a megabyte
|
13
11
|
|
14
12
|
def initialize(memory_allocation)
|
15
13
|
@memory_allocation = memory_allocation
|
16
|
-
if is_windows?
|
17
|
-
raise Exception.new("Windows is not currently supported")
|
18
|
-
end
|
14
|
+
raise Exception, 'Windows is not currently supported' if is_windows?
|
19
15
|
end
|
20
16
|
|
17
|
+
# spreadsheet_data can be an IO like object or the path to a file
|
21
18
|
def spreadsheet_to_xml(spreadsheet_data)
|
22
|
-
|
19
|
+
spreadsheet_to_requested_format(spreadsheet_data, 'xml')
|
23
20
|
end
|
24
21
|
|
25
|
-
|
26
|
-
|
22
|
+
# spreadsheet_data can be an IO like object or the path to a file
|
23
|
+
def spreadsheet_to_csv(spreadsheet_data, sheet = 1, trim = false)
|
24
|
+
spreadsheet_to_requested_format(spreadsheet_data, 'csv', sheet, trim)
|
27
25
|
end
|
28
26
|
|
29
27
|
private
|
30
28
|
|
31
|
-
def
|
29
|
+
def spreadsheet_to_requested_format(spreadsheet_data, format, sheet = nil, trim = nil)
|
30
|
+
if spreadsheet_data.is_a?(IO) || spreadsheet_data.is_a?(StringIO)
|
31
|
+
Tempfile.create('spreadsheet-extraction') do |f|
|
32
|
+
f.write(spreadsheet_data.read)
|
33
|
+
f.flush
|
34
|
+
execute_command_line f.path, format, sheet, trim
|
35
|
+
end
|
36
|
+
elsif spreadsheet_data.is_a?(String)
|
37
|
+
execute_command_line spreadsheet_data, format, sheet, trim
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def spreadsheet_extractor_command(filepath, format = 'xml', sheet = nil, trim = false)
|
32
42
|
command = "java -Xmx#{@memory_allocation} -jar #{(defined? SPREADSHEET_EXTRACTOR_JAR_PATH) ? SPREADSHEET_EXTRACTOR_JAR_PATH : DEFAULT_PATH}"
|
33
|
-
command +=
|
43
|
+
command += " -o #{format}"
|
34
44
|
command += " -s #{sheet}" if sheet
|
35
|
-
command +=
|
45
|
+
command += ' -t' if trim
|
46
|
+
command += " < #{filepath}"
|
36
47
|
command
|
37
48
|
end
|
38
49
|
|
@@ -40,32 +51,14 @@ module SysMODB
|
|
40
51
|
!(RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/).nil?
|
41
52
|
end
|
42
53
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
stdin << line
|
50
|
-
end
|
51
|
-
stdin.close
|
52
|
-
|
53
|
-
while ((line = stdout.gets(BUFFER_SIZE)) != nil) do
|
54
|
-
output << line
|
55
|
-
end
|
56
|
-
stdout.close
|
57
|
-
|
58
|
-
until ((line=stderr.gets((BUFFER_SIZE))).nil?) do
|
59
|
-
err_message << line
|
60
|
-
end
|
61
|
-
stderr.close
|
54
|
+
def execute_command_line(filepath, format = 'xml', sheet = nil, trim = false)
|
55
|
+
command = spreadsheet_extractor_command filepath, format, sheet, trim
|
56
|
+
begin
|
57
|
+
Terrapin::CommandLine.new(command).run.strip
|
58
|
+
rescue Terrapin::ExitStatusError, Terrapin::CommandNotFoundError => e
|
59
|
+
raise SpreadsheetExtractionException, e.message
|
62
60
|
end
|
63
|
-
|
64
|
-
if status.to_i != 0
|
65
|
-
raise SpreadsheetExtractionException.new(err_message)
|
66
|
-
end
|
67
|
-
|
68
|
-
output.strip
|
69
61
|
end
|
62
|
+
|
70
63
|
end
|
71
64
|
end
|
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'sysmodb/extractor'
|
3
2
|
|
4
3
|
module SysMODB
|
@@ -8,12 +7,11 @@ module SysMODB
|
|
8
7
|
# simple_spreadsheet_to_xml to extract to an xml format (see README and schema in doc/schema-v1.xsd)
|
9
8
|
# simple_spreadhseet_to_csv to extract to CSV format for a single sheet
|
10
9
|
module SpreadsheetExtractor
|
11
|
-
|
12
10
|
# :call-seq:
|
13
11
|
# spreadsheet_to_xml(spreadsheet_data) -> String
|
14
12
|
# spreadsheet_to_xml(spreadsheet_data, memory_allocation) -> String
|
15
13
|
#
|
16
|
-
# reads the incoming data from an IO object and returns the generated XML.
|
14
|
+
# reads the incoming data from an IO object or path to a file and returns the generated XML.
|
17
15
|
# it is extracted using java, and the default memory allocation is 512M (passed to -Xmx) this can
|
18
16
|
# be changed by passing an option final parameter memory_allocation
|
19
17
|
def spreadsheet_to_xml(spreadsheet_data, memory_allocation = DEFAULT_MEMORY_ALLOCATION)
|
@@ -26,7 +24,7 @@ module SysMODB
|
|
26
24
|
# spreadsheet_to_csv(spreadsheet_data, sheet, trim) -> String
|
27
25
|
# spreadsheet_to_csv(spreadsheet_data, sheet, trim, memory_allocation) -> String
|
28
26
|
#
|
29
|
-
# reads the incoming data from an IO object and returns the generated CSV.
|
27
|
+
# reads the incoming data from an IO object or path to a file and returns the generated CSV.
|
30
28
|
# only 1 sheet is processed, which by default it the first sheet.
|
31
29
|
# if trim is set to true, proceeding or trailing cells will be removed whilst keeping the csv uniform.
|
32
30
|
# it is extracted using java, and the default memory allocation is 512M (passed to -Xmx) this can
|
data/lib/sysmodb/version.rb
CHANGED
@@ -19,12 +19,13 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.add_dependency('libxml-ruby',
|
23
|
-
spec.add_dependency('
|
22
|
+
spec.add_dependency('libxml-ruby', '~> 2.9')
|
23
|
+
spec.add_dependency('terrapin', '~> 0.6')
|
24
24
|
|
25
|
-
spec.add_development_dependency('rubocop',
|
26
|
-
spec.add_development_dependency('rubycritic'
|
27
|
-
spec.add_development_dependency
|
28
|
-
|
29
|
-
spec.add_development_dependency
|
25
|
+
spec.add_development_dependency('rubocop','~> 1.25')
|
26
|
+
spec.add_development_dependency('rubycritic')
|
27
|
+
spec.add_development_dependency('bundler','~> 2.3')
|
28
|
+
|
29
|
+
spec.add_development_dependency('rake')
|
30
|
+
spec.add_development_dependency('minitest','~>5.14')
|
30
31
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-spreadsheet-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stuart Owen
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2022-12-16 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: libxml-ruby
|
@@ -18,98 +18,98 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - "~>"
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: '2.
|
21
|
+
version: '2.9'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
requirements:
|
26
26
|
- - "~>"
|
27
27
|
- !ruby/object:Gem::Version
|
28
|
-
version: '2.
|
28
|
+
version: '2.9'
|
29
29
|
- !ruby/object:Gem::Dependency
|
30
|
-
name:
|
30
|
+
name: terrapin
|
31
31
|
requirement: !ruby/object:Gem::Requirement
|
32
32
|
requirements:
|
33
|
-
- -
|
33
|
+
- - "~>"
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version:
|
35
|
+
version: '0.6'
|
36
36
|
type: :runtime
|
37
37
|
prerelease: false
|
38
38
|
version_requirements: !ruby/object:Gem::Requirement
|
39
39
|
requirements:
|
40
|
-
- -
|
40
|
+
- - "~>"
|
41
41
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
42
|
+
version: '0.6'
|
43
43
|
- !ruby/object:Gem::Dependency
|
44
44
|
name: rubocop
|
45
45
|
requirement: !ruby/object:Gem::Requirement
|
46
46
|
requirements:
|
47
47
|
- - "~>"
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version: '
|
49
|
+
version: '1.25'
|
50
50
|
type: :development
|
51
51
|
prerelease: false
|
52
52
|
version_requirements: !ruby/object:Gem::Requirement
|
53
53
|
requirements:
|
54
54
|
- - "~>"
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: '
|
56
|
+
version: '1.25'
|
57
57
|
- !ruby/object:Gem::Dependency
|
58
58
|
name: rubycritic
|
59
59
|
requirement: !ruby/object:Gem::Requirement
|
60
60
|
requirements:
|
61
|
-
- - "
|
61
|
+
- - ">="
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: '
|
63
|
+
version: '0'
|
64
64
|
type: :development
|
65
65
|
prerelease: false
|
66
66
|
version_requirements: !ruby/object:Gem::Requirement
|
67
67
|
requirements:
|
68
|
-
- - "
|
68
|
+
- - ">="
|
69
69
|
- !ruby/object:Gem::Version
|
70
|
-
version: '
|
70
|
+
version: '0'
|
71
71
|
- !ruby/object:Gem::Dependency
|
72
72
|
name: bundler
|
73
73
|
requirement: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - "~>"
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: '
|
77
|
+
version: '2.3'
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
80
|
version_requirements: !ruby/object:Gem::Requirement
|
81
81
|
requirements:
|
82
82
|
- - "~>"
|
83
83
|
- !ruby/object:Gem::Version
|
84
|
-
version: '
|
84
|
+
version: '2.3'
|
85
85
|
- !ruby/object:Gem::Dependency
|
86
86
|
name: rake
|
87
87
|
requirement: !ruby/object:Gem::Requirement
|
88
88
|
requirements:
|
89
|
-
- - "
|
89
|
+
- - ">="
|
90
90
|
- !ruby/object:Gem::Version
|
91
|
-
version: '
|
91
|
+
version: '0'
|
92
92
|
type: :development
|
93
93
|
prerelease: false
|
94
94
|
version_requirements: !ruby/object:Gem::Requirement
|
95
95
|
requirements:
|
96
|
-
- - "
|
96
|
+
- - ">="
|
97
97
|
- !ruby/object:Gem::Version
|
98
|
-
version: '
|
98
|
+
version: '0'
|
99
99
|
- !ruby/object:Gem::Dependency
|
100
100
|
name: minitest
|
101
101
|
requirement: !ruby/object:Gem::Requirement
|
102
102
|
requirements:
|
103
103
|
- - "~>"
|
104
104
|
- !ruby/object:Gem::Version
|
105
|
-
version: '5.
|
105
|
+
version: '5.14'
|
106
106
|
type: :development
|
107
107
|
prerelease: false
|
108
108
|
version_requirements: !ruby/object:Gem::Requirement
|
109
109
|
requirements:
|
110
110
|
- - "~>"
|
111
111
|
- !ruby/object:Gem::Version
|
112
|
-
version: '5.
|
112
|
+
version: '5.14'
|
113
113
|
description: Takes a stream to a spreadsheet file and produces an XML or CSV representation
|
114
114
|
of its contents
|
115
115
|
email:
|
@@ -126,7 +126,6 @@ files:
|
|
126
126
|
- LICENSE
|
127
127
|
- README.rdoc
|
128
128
|
- Rakefile
|
129
|
-
- VERSION
|
130
129
|
- doc/schema-v1.xsd
|
131
130
|
- examples/example.rb
|
132
131
|
- jars/lib/commons-codec-1.10.jar
|
@@ -165,8 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
165
164
|
- !ruby/object:Gem::Version
|
166
165
|
version: '0'
|
167
166
|
requirements: []
|
168
|
-
|
169
|
-
rubygems_version: 2.6.13
|
167
|
+
rubygems_version: 3.3.26
|
170
168
|
signing_key:
|
171
169
|
specification_version: 4
|
172
170
|
summary: Basic spreadsheet content extraction using Apache POI
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.15.2
|