simple-spreadsheet-extractor 0.15.2 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/.ruby-version +1 -1
- data/.travis.yml +7 -2
- data/Gemfile +2 -8
- data/Rakefile +7 -35
- data/jars/lib/commons-codec-1.10.jar +0 -0
- data/jars/lib/commons-collections4-4.1.jar +0 -0
- data/jars/lib/curvesapi-1.04.jar +0 -0
- data/jars/lib/poi-3.17.jar +0 -0
- data/jars/lib/poi-ooxml-3.17.jar +0 -0
- data/jars/lib/{poi-ooxml-schemas-3.11.jar → poi-ooxml-schemas-3.17.jar} +0 -0
- data/jars/lib/xercesImpl-2.11.0.jar +0 -0
- data/jars/lib/xml-apis-1.4.01.jar +0 -0
- data/jars/simple-spreadsheet-extractor-0.16.0.jar +0 -0
- data/lib/sysmodb/extractor.rb +1 -1
- data/lib/sysmodb/version.rb +4 -0
- data/simple-spreadsheet-extractor.gemspec +25 -80
- metadata +54 -43
- data/Gemfile.lock +0 -127
- data/jars/lib/poi-3.11.jar +0 -0
- data/jars/lib/poi-ooxml-3.11.jar +0 -0
- data/jars/lib/xercesImpl-2.6.2.jar +0 -0
- data/jars/simple-spreadsheet-extractor-0.15.1.jar +0 -0
- data/test/files/not-a-spreadsheet.xls +0 -1
- data/test/files/problem_with_dv.xls +0 -0
- data/test/files/test-csv-output1-trimmed.csv +0 -2
- data/test/files/test-csv-output1.csv +0 -4
- data/test/files/test-spreadsheet.xls +0 -0
- data/test/files/test-spreadsheet.xlsx +0 -0
- data/test/files/xml-unfriendly-chars.xlsx +0 -0
- data/test/test_extraction.rb +0 -99
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2f28b562ed84e674ad6279c9f3ddea76f120a3ca
|
|
4
|
+
data.tar.gz: 2a3597b460f7866aea45b080ce00d368990d261f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a551347822b07f8d9d2243089940227e5526fb428e9a6f19002f92f598fdb1dc81767cd746747a05aa97619197a547d2213f737c1741765b26edfa76e83dd077
|
|
7
|
+
data.tar.gz: 3001e98aaef97217e2abd8c2c4201723d8bef943cf0b5125e4509563bddb0545655ad74d25487cb48c58e0d7ab49e29b320506f7111625f507003c5e1e491376
|
data/.gitignore
ADDED
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
ruby-2.
|
|
1
|
+
ruby-2.2.8
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
1
|
source 'https://rubygems.org'
|
|
2
|
-
source 'http://gems.github.com'
|
|
3
2
|
|
|
4
|
-
gem
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
gem "rdoc",:group=>:development
|
|
8
|
-
gem "jeweler",:group=>:development
|
|
9
|
-
gem "rubocop", :group=>:development,:require=>false
|
|
10
|
-
gem "rubycritic", :group=>:development,:require=>false
|
|
3
|
+
# Specify your gem's dependencies in sample-template-generator-gem.gemspec
|
|
4
|
+
gemspec
|
data/Rakefile
CHANGED
|
@@ -1,38 +1,10 @@
|
|
|
1
|
-
require
|
|
2
|
-
require
|
|
3
|
-
require 'rdoc/task'
|
|
4
|
-
require 'rubygems'
|
|
1
|
+
require "bundler/gem_tasks"
|
|
2
|
+
require "rake/testtask"
|
|
5
3
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
begin
|
|
11
|
-
require 'jeweler'
|
|
12
|
-
Jeweler::Tasks.new do |gemspec|
|
|
13
|
-
gemspec.name = "simple-spreadsheet-extractor"
|
|
14
|
-
gemspec.summary = "Basic spreadsheet content extraction using Apache POI"
|
|
15
|
-
gemspec.description = "Takes a stream to a spreadsheet file and produces an XML or CSV representation of its contents"
|
|
16
|
-
gemspec.email = "stuart.owen@manchester.ac.uk"
|
|
17
|
-
gemspec.homepage = "http://github.com/myGrid/simple-spreadsheet-extractor-gem"
|
|
18
|
-
gemspec.authors = ["Stuart Owen","Finn Bacall", "Quyen Nguyen"]
|
|
19
|
-
gemspec.license = "BSD-3-Clause" # https://spdx.org/licenses/BSD-3-Clause.html#licenseText
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
gemspec.has_rdoc = true
|
|
23
|
-
gemspec.files.include %w(jars)
|
|
24
|
-
gemspec.extra_rdoc_files = ["README.rdoc", "LICENCE"]
|
|
25
|
-
end
|
|
26
|
-
rescue LoadError
|
|
27
|
-
puts "Jeweler not available. Install it with: gem install jeweler"
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
task:test do
|
|
31
|
-
Rake::TestTask.new do |t|
|
|
32
|
-
t.libs << "test"
|
|
33
|
-
t.test_files = FileList['test/test*.rb']
|
|
34
|
-
t.verbose = true
|
|
35
|
-
end
|
|
4
|
+
Rake::TestTask.new(:test) do |t|
|
|
5
|
+
t.libs << "test"
|
|
6
|
+
t.libs << "lib"
|
|
7
|
+
t.test_files = FileList['test/**/*_test.rb']
|
|
36
8
|
end
|
|
37
9
|
|
|
38
|
-
|
|
10
|
+
task :default => :test
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/sysmodb/extractor.rb
CHANGED
|
@@ -7,7 +7,7 @@ module SysMODB
|
|
|
7
7
|
#handles the delegation to java, and executes the extraction passing the
|
|
8
8
|
#input file through STDIN, and reading the results through STDOUT.
|
|
9
9
|
class Extractor
|
|
10
|
-
JAR_VERSION="0.
|
|
10
|
+
JAR_VERSION="0.16.0".freeze
|
|
11
11
|
DEFAULT_PATH = File.dirname(__FILE__) + "/../../jars/simple-spreadsheet-extractor-#{JAR_VERSION}.jar"
|
|
12
12
|
BUFFER_SIZE=250000 # 1/4 a megabyte
|
|
13
13
|
|
|
@@ -1,85 +1,30 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
# stub: simple-spreadsheet-extractor 0.15.2 ruby lib
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require 'sysmodb/version'
|
|
6
5
|
|
|
7
|
-
Gem::Specification.new do |
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
Gem::Specification.new do |spec|
|
|
7
|
+
spec.name = 'simple-spreadsheet-extractor'
|
|
8
|
+
spec.version = SysMODB::VERSION
|
|
9
|
+
spec.authors = ["Stuart Owen","Finn Bacall", "Quyen Nguyen"]
|
|
10
|
+
spec.email = ['stuart.owen@manchester.ac.uk']
|
|
10
11
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
s.description = "Takes a stream to a spreadsheet file and produces an XML or CSV representation of its contents"
|
|
16
|
-
s.email = "stuart.owen@manchester.ac.uk"
|
|
17
|
-
s.extra_rdoc_files = [
|
|
18
|
-
"README.rdoc"
|
|
19
|
-
]
|
|
20
|
-
s.files = [
|
|
21
|
-
".ruby-gemspec",
|
|
22
|
-
".ruby-version",
|
|
23
|
-
".travis.yml",
|
|
24
|
-
"Gemfile",
|
|
25
|
-
"Gemfile.lock",
|
|
26
|
-
"LICENSE",
|
|
27
|
-
"README.rdoc",
|
|
28
|
-
"Rakefile",
|
|
29
|
-
"VERSION",
|
|
30
|
-
"doc/schema-v1.xsd",
|
|
31
|
-
"examples/example.rb",
|
|
32
|
-
"jars/lib/dom4j-1.6.1.jar",
|
|
33
|
-
"jars/lib/log4j-1.2.17.jar",
|
|
34
|
-
"jars/lib/poi-3.11.jar",
|
|
35
|
-
"jars/lib/poi-ooxml-3.11.jar",
|
|
36
|
-
"jars/lib/poi-ooxml-schemas-3.11.jar",
|
|
37
|
-
"jars/lib/xercesImpl-2.6.2.jar",
|
|
38
|
-
"jars/lib/xmlbeans-2.6.0.jar",
|
|
39
|
-
"jars/simple-spreadsheet-extractor-0.15.1.jar",
|
|
40
|
-
"lib/simple-spreadsheet-extractor.rb",
|
|
41
|
-
"lib/sysmodb/extractor.rb",
|
|
42
|
-
"lib/sysmodb/simple-spreadsheet-extractor.rb",
|
|
43
|
-
"simple-spreadsheet-extractor.gemspec",
|
|
44
|
-
"test/files/not-a-spreadsheet.xls",
|
|
45
|
-
"test/files/problem_with_dv.xls",
|
|
46
|
-
"test/files/test-csv-output1-trimmed.csv",
|
|
47
|
-
"test/files/test-csv-output1.csv",
|
|
48
|
-
"test/files/test-spreadsheet.xls",
|
|
49
|
-
"test/files/test-spreadsheet.xlsx",
|
|
50
|
-
"test/files/xml-unfriendly-chars.xlsx",
|
|
51
|
-
"test/test_extraction.rb"
|
|
52
|
-
]
|
|
53
|
-
s.homepage = "http://github.com/myGrid/simple-spreadsheet-extractor-gem"
|
|
54
|
-
s.licenses = ["BSD-3-Clause"]
|
|
55
|
-
s.rubygems_version = "2.4.8"
|
|
56
|
-
s.summary = "Basic spreadsheet content extraction using Apache POI"
|
|
12
|
+
spec.summary = 'Basic spreadsheet content extraction using Apache POI'
|
|
13
|
+
spec.description = 'Takes a stream to a spreadsheet file and produces an XML or CSV representation of its contents'
|
|
14
|
+
spec.homepage = 'http://github.com/myGrid/simple-spreadsheet-extractor-gem'
|
|
15
|
+
spec.license = 'BSD-3-Clause'
|
|
57
16
|
|
|
58
|
-
|
|
59
|
-
|
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
18
|
+
spec.bindir = 'exe'
|
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
20
|
+
spec.require_paths = ['lib']
|
|
60
21
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
s.add_runtime_dependency(%q<open4>, ["= 1.3.0"])
|
|
64
|
-
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
|
65
|
-
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
|
66
|
-
s.add_development_dependency(%q<rubocop>, [">= 0"])
|
|
67
|
-
s.add_development_dependency(%q<rubycritic>, [">= 0"])
|
|
68
|
-
else
|
|
69
|
-
s.add_dependency(%q<libxml-ruby>, ["~> 2.6"])
|
|
70
|
-
s.add_dependency(%q<open4>, ["= 1.3.0"])
|
|
71
|
-
s.add_dependency(%q<rdoc>, [">= 0"])
|
|
72
|
-
s.add_dependency(%q<jeweler>, [">= 0"])
|
|
73
|
-
s.add_dependency(%q<rubocop>, [">= 0"])
|
|
74
|
-
s.add_dependency(%q<rubycritic>, [">= 0"])
|
|
75
|
-
end
|
|
76
|
-
else
|
|
77
|
-
s.add_dependency(%q<libxml-ruby>, ["~> 2.6"])
|
|
78
|
-
s.add_dependency(%q<open4>, ["= 1.3.0"])
|
|
79
|
-
s.add_dependency(%q<rdoc>, [">= 0"])
|
|
80
|
-
s.add_dependency(%q<jeweler>, [">= 0"])
|
|
81
|
-
s.add_dependency(%q<rubocop>, [">= 0"])
|
|
82
|
-
s.add_dependency(%q<rubycritic>, [">= 0"])
|
|
83
|
-
end
|
|
84
|
-
end
|
|
22
|
+
spec.add_dependency('libxml-ruby', ['~> 2.9'])
|
|
23
|
+
spec.add_dependency('open4', ['1.3.0'])
|
|
85
24
|
|
|
25
|
+
spec.add_development_dependency('rubocop', ['~> 0.48'])
|
|
26
|
+
spec.add_development_dependency('rubycritic', ['~> 3.2'])
|
|
27
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
|
28
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
|
29
|
+
spec.add_development_dependency 'minitest', '~> 5.0'
|
|
30
|
+
end
|
metadata
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: simple-spreadsheet-extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.16.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Stuart Owen
|
|
8
8
|
- Finn Bacall
|
|
9
9
|
- Quyen Nguyen
|
|
10
10
|
autorequire:
|
|
11
|
-
bindir:
|
|
11
|
+
bindir: exe
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date:
|
|
13
|
+
date: 2018-04-30 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: libxml-ruby
|
|
@@ -18,14 +18,14 @@ dependencies:
|
|
|
18
18
|
requirements:
|
|
19
19
|
- - "~>"
|
|
20
20
|
- !ruby/object:Gem::Version
|
|
21
|
-
version: '2.
|
|
21
|
+
version: '2.9'
|
|
22
22
|
type: :runtime
|
|
23
23
|
prerelease: false
|
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
|
25
25
|
requirements:
|
|
26
26
|
- - "~>"
|
|
27
27
|
- !ruby/object:Gem::Version
|
|
28
|
-
version: '2.
|
|
28
|
+
version: '2.9'
|
|
29
29
|
- !ruby/object:Gem::Dependency
|
|
30
30
|
name: open4
|
|
31
31
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -41,100 +41,111 @@ dependencies:
|
|
|
41
41
|
- !ruby/object:Gem::Version
|
|
42
42
|
version: 1.3.0
|
|
43
43
|
- !ruby/object:Gem::Dependency
|
|
44
|
-
name:
|
|
44
|
+
name: rubocop
|
|
45
45
|
requirement: !ruby/object:Gem::Requirement
|
|
46
46
|
requirements:
|
|
47
|
-
- - "
|
|
47
|
+
- - "~>"
|
|
48
48
|
- !ruby/object:Gem::Version
|
|
49
|
-
version: '0'
|
|
49
|
+
version: '0.48'
|
|
50
50
|
type: :development
|
|
51
51
|
prerelease: false
|
|
52
52
|
version_requirements: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements:
|
|
54
|
-
- - "
|
|
54
|
+
- - "~>"
|
|
55
55
|
- !ruby/object:Gem::Version
|
|
56
|
-
version: '0'
|
|
56
|
+
version: '0.48'
|
|
57
57
|
- !ruby/object:Gem::Dependency
|
|
58
|
-
name:
|
|
58
|
+
name: rubycritic
|
|
59
59
|
requirement: !ruby/object:Gem::Requirement
|
|
60
60
|
requirements:
|
|
61
|
-
- - "
|
|
61
|
+
- - "~>"
|
|
62
62
|
- !ruby/object:Gem::Version
|
|
63
|
-
version: '
|
|
63
|
+
version: '3.2'
|
|
64
64
|
type: :development
|
|
65
65
|
prerelease: false
|
|
66
66
|
version_requirements: !ruby/object:Gem::Requirement
|
|
67
67
|
requirements:
|
|
68
|
-
- - "
|
|
68
|
+
- - "~>"
|
|
69
69
|
- !ruby/object:Gem::Version
|
|
70
|
-
version: '
|
|
70
|
+
version: '3.2'
|
|
71
71
|
- !ruby/object:Gem::Dependency
|
|
72
|
-
name:
|
|
72
|
+
name: bundler
|
|
73
73
|
requirement: !ruby/object:Gem::Requirement
|
|
74
74
|
requirements:
|
|
75
|
-
- - "
|
|
75
|
+
- - "~>"
|
|
76
76
|
- !ruby/object:Gem::Version
|
|
77
|
-
version: '
|
|
77
|
+
version: '1.16'
|
|
78
78
|
type: :development
|
|
79
79
|
prerelease: false
|
|
80
80
|
version_requirements: !ruby/object:Gem::Requirement
|
|
81
81
|
requirements:
|
|
82
|
-
- - "
|
|
82
|
+
- - "~>"
|
|
83
83
|
- !ruby/object:Gem::Version
|
|
84
|
-
version: '
|
|
84
|
+
version: '1.16'
|
|
85
85
|
- !ruby/object:Gem::Dependency
|
|
86
|
-
name:
|
|
86
|
+
name: rake
|
|
87
|
+
requirement: !ruby/object:Gem::Requirement
|
|
88
|
+
requirements:
|
|
89
|
+
- - "~>"
|
|
90
|
+
- !ruby/object:Gem::Version
|
|
91
|
+
version: '10.0'
|
|
92
|
+
type: :development
|
|
93
|
+
prerelease: false
|
|
94
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
95
|
+
requirements:
|
|
96
|
+
- - "~>"
|
|
97
|
+
- !ruby/object:Gem::Version
|
|
98
|
+
version: '10.0'
|
|
99
|
+
- !ruby/object:Gem::Dependency
|
|
100
|
+
name: minitest
|
|
87
101
|
requirement: !ruby/object:Gem::Requirement
|
|
88
102
|
requirements:
|
|
89
|
-
- - "
|
|
103
|
+
- - "~>"
|
|
90
104
|
- !ruby/object:Gem::Version
|
|
91
|
-
version: '0'
|
|
105
|
+
version: '5.0'
|
|
92
106
|
type: :development
|
|
93
107
|
prerelease: false
|
|
94
108
|
version_requirements: !ruby/object:Gem::Requirement
|
|
95
109
|
requirements:
|
|
96
|
-
- - "
|
|
110
|
+
- - "~>"
|
|
97
111
|
- !ruby/object:Gem::Version
|
|
98
|
-
version: '0'
|
|
112
|
+
version: '5.0'
|
|
99
113
|
description: Takes a stream to a spreadsheet file and produces an XML or CSV representation
|
|
100
114
|
of its contents
|
|
101
|
-
email:
|
|
115
|
+
email:
|
|
116
|
+
- stuart.owen@manchester.ac.uk
|
|
102
117
|
executables: []
|
|
103
118
|
extensions: []
|
|
104
|
-
extra_rdoc_files:
|
|
105
|
-
- README.rdoc
|
|
119
|
+
extra_rdoc_files: []
|
|
106
120
|
files:
|
|
121
|
+
- ".gitignore"
|
|
107
122
|
- ".ruby-gemspec"
|
|
108
123
|
- ".ruby-version"
|
|
109
124
|
- ".travis.yml"
|
|
110
125
|
- Gemfile
|
|
111
|
-
- Gemfile.lock
|
|
112
126
|
- LICENSE
|
|
113
127
|
- README.rdoc
|
|
114
128
|
- Rakefile
|
|
115
129
|
- VERSION
|
|
116
130
|
- doc/schema-v1.xsd
|
|
117
131
|
- examples/example.rb
|
|
132
|
+
- jars/lib/commons-codec-1.10.jar
|
|
133
|
+
- jars/lib/commons-collections4-4.1.jar
|
|
134
|
+
- jars/lib/curvesapi-1.04.jar
|
|
118
135
|
- jars/lib/dom4j-1.6.1.jar
|
|
119
136
|
- jars/lib/log4j-1.2.17.jar
|
|
120
|
-
- jars/lib/poi-3.
|
|
121
|
-
- jars/lib/poi-ooxml-3.
|
|
122
|
-
- jars/lib/poi-ooxml-schemas-3.
|
|
123
|
-
- jars/lib/xercesImpl-2.
|
|
137
|
+
- jars/lib/poi-3.17.jar
|
|
138
|
+
- jars/lib/poi-ooxml-3.17.jar
|
|
139
|
+
- jars/lib/poi-ooxml-schemas-3.17.jar
|
|
140
|
+
- jars/lib/xercesImpl-2.11.0.jar
|
|
141
|
+
- jars/lib/xml-apis-1.4.01.jar
|
|
124
142
|
- jars/lib/xmlbeans-2.6.0.jar
|
|
125
|
-
- jars/simple-spreadsheet-extractor-0.
|
|
143
|
+
- jars/simple-spreadsheet-extractor-0.16.0.jar
|
|
126
144
|
- lib/simple-spreadsheet-extractor.rb
|
|
127
145
|
- lib/sysmodb/extractor.rb
|
|
128
146
|
- lib/sysmodb/simple-spreadsheet-extractor.rb
|
|
147
|
+
- lib/sysmodb/version.rb
|
|
129
148
|
- simple-spreadsheet-extractor.gemspec
|
|
130
|
-
- test/files/not-a-spreadsheet.xls
|
|
131
|
-
- test/files/problem_with_dv.xls
|
|
132
|
-
- test/files/test-csv-output1-trimmed.csv
|
|
133
|
-
- test/files/test-csv-output1.csv
|
|
134
|
-
- test/files/test-spreadsheet.xls
|
|
135
|
-
- test/files/test-spreadsheet.xlsx
|
|
136
|
-
- test/files/xml-unfriendly-chars.xlsx
|
|
137
|
-
- test/test_extraction.rb
|
|
138
149
|
homepage: http://github.com/myGrid/simple-spreadsheet-extractor-gem
|
|
139
150
|
licenses:
|
|
140
151
|
- BSD-3-Clause
|
|
@@ -155,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
155
166
|
version: '0'
|
|
156
167
|
requirements: []
|
|
157
168
|
rubyforge_project:
|
|
158
|
-
rubygems_version: 2.
|
|
169
|
+
rubygems_version: 2.6.13
|
|
159
170
|
signing_key:
|
|
160
171
|
specification_version: 4
|
|
161
172
|
summary: Basic spreadsheet content extraction using Apache POI
|
data/Gemfile.lock
DELETED
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
GEM
|
|
2
|
-
remote: https://rubygems.org/
|
|
3
|
-
remote: http://gems.github.com/
|
|
4
|
-
specs:
|
|
5
|
-
abstract_type (0.0.7)
|
|
6
|
-
adamantium (0.2.0)
|
|
7
|
-
ice_nine (~> 0.11.0)
|
|
8
|
-
memoizable (~> 0.4.0)
|
|
9
|
-
addressable (2.3.5)
|
|
10
|
-
ast (2.0.0)
|
|
11
|
-
astrolabe (1.3.0)
|
|
12
|
-
parser (>= 2.2.0.pre.3, < 3.0)
|
|
13
|
-
axiom-types (0.1.1)
|
|
14
|
-
descendants_tracker (~> 0.0.4)
|
|
15
|
-
ice_nine (~> 0.11.0)
|
|
16
|
-
thread_safe (~> 0.3, >= 0.3.1)
|
|
17
|
-
builder (3.2.2)
|
|
18
|
-
coercible (1.0.0)
|
|
19
|
-
descendants_tracker (~> 0.0.1)
|
|
20
|
-
concord (0.1.5)
|
|
21
|
-
adamantium (~> 0.2.0)
|
|
22
|
-
equalizer (~> 0.0.9)
|
|
23
|
-
descendants_tracker (0.0.4)
|
|
24
|
-
thread_safe (~> 0.3, >= 0.3.1)
|
|
25
|
-
diff-lcs (1.2.5)
|
|
26
|
-
equalizer (0.0.9)
|
|
27
|
-
faraday (0.8.8)
|
|
28
|
-
multipart-post (~> 1.2.0)
|
|
29
|
-
flay (2.4.0)
|
|
30
|
-
ruby_parser (~> 3.0)
|
|
31
|
-
sexp_processor (~> 4.0)
|
|
32
|
-
flog (4.2.1)
|
|
33
|
-
ruby_parser (~> 3.1, > 3.1.0)
|
|
34
|
-
sexp_processor (~> 4.4)
|
|
35
|
-
git (1.2.6)
|
|
36
|
-
github_api (0.10.1)
|
|
37
|
-
addressable
|
|
38
|
-
faraday (~> 0.8.1)
|
|
39
|
-
hashie (>= 1.2)
|
|
40
|
-
multi_json (~> 1.4)
|
|
41
|
-
nokogiri (~> 1.5.2)
|
|
42
|
-
oauth2
|
|
43
|
-
hashie (2.0.5)
|
|
44
|
-
highline (1.6.20)
|
|
45
|
-
httpauth (0.2.0)
|
|
46
|
-
ice_nine (0.11.1)
|
|
47
|
-
jeweler (1.8.8)
|
|
48
|
-
builder
|
|
49
|
-
bundler (~> 1.0)
|
|
50
|
-
git (>= 1.2.5)
|
|
51
|
-
github_api (= 0.10.1)
|
|
52
|
-
highline (>= 1.6.15)
|
|
53
|
-
nokogiri (= 1.5.10)
|
|
54
|
-
rake
|
|
55
|
-
rdoc
|
|
56
|
-
json (1.8.1)
|
|
57
|
-
jwt (0.1.8)
|
|
58
|
-
multi_json (>= 1.5)
|
|
59
|
-
libxml-ruby (2.6.0)
|
|
60
|
-
memoizable (0.4.2)
|
|
61
|
-
thread_safe (~> 0.3, >= 0.3.1)
|
|
62
|
-
multi_json (1.8.2)
|
|
63
|
-
multi_xml (0.5.5)
|
|
64
|
-
multipart-post (1.2.0)
|
|
65
|
-
nokogiri (1.5.10)
|
|
66
|
-
oauth2 (0.9.2)
|
|
67
|
-
faraday (~> 0.8)
|
|
68
|
-
httpauth (~> 0.2)
|
|
69
|
-
jwt (~> 0.1.4)
|
|
70
|
-
multi_json (~> 1.0)
|
|
71
|
-
multi_xml (~> 0.5)
|
|
72
|
-
rack (~> 1.2)
|
|
73
|
-
open4 (1.3.0)
|
|
74
|
-
parser (2.2.0.3)
|
|
75
|
-
ast (>= 1.1, < 3.0)
|
|
76
|
-
powerpack (0.1.0)
|
|
77
|
-
procto (0.0.2)
|
|
78
|
-
rack (1.5.2)
|
|
79
|
-
rainbow (2.0.0)
|
|
80
|
-
rake (10.1.0)
|
|
81
|
-
rdoc (4.0.1)
|
|
82
|
-
json (~> 1.4)
|
|
83
|
-
reek (1.6.5)
|
|
84
|
-
parser (~> 2.2.0.pre.7)
|
|
85
|
-
rainbow (>= 1.99, < 3.0)
|
|
86
|
-
unparser (~> 0.2.2)
|
|
87
|
-
rubocop (0.29.1)
|
|
88
|
-
astrolabe (~> 1.3)
|
|
89
|
-
parser (>= 2.2.0.1, < 3.0)
|
|
90
|
-
powerpack (~> 0.1)
|
|
91
|
-
rainbow (>= 1.99.1, < 3.0)
|
|
92
|
-
ruby-progressbar (~> 1.4)
|
|
93
|
-
ruby-progressbar (1.7.1)
|
|
94
|
-
ruby_parser (3.6.4)
|
|
95
|
-
sexp_processor (~> 4.1)
|
|
96
|
-
rubycritic (1.3.0)
|
|
97
|
-
flay (= 2.4.0)
|
|
98
|
-
flog (= 4.2.1)
|
|
99
|
-
parser (>= 2.2.0, < 3.0)
|
|
100
|
-
reek (= 1.6.5)
|
|
101
|
-
virtus (~> 1.0)
|
|
102
|
-
sexp_processor (4.4.5)
|
|
103
|
-
thread_safe (0.3.4)
|
|
104
|
-
unparser (0.2.2)
|
|
105
|
-
abstract_type (~> 0.0.7)
|
|
106
|
-
adamantium (~> 0.2.0)
|
|
107
|
-
concord (~> 0.1.5)
|
|
108
|
-
diff-lcs (~> 1.2.5)
|
|
109
|
-
equalizer (~> 0.0.9)
|
|
110
|
-
parser (~> 2.2.0.2)
|
|
111
|
-
procto (~> 0.0.2)
|
|
112
|
-
virtus (1.0.4)
|
|
113
|
-
axiom-types (~> 0.1)
|
|
114
|
-
coercible (~> 1.0)
|
|
115
|
-
descendants_tracker (~> 0.0, >= 0.0.3)
|
|
116
|
-
equalizer (~> 0.0, >= 0.0.9)
|
|
117
|
-
|
|
118
|
-
PLATFORMS
|
|
119
|
-
ruby
|
|
120
|
-
|
|
121
|
-
DEPENDENCIES
|
|
122
|
-
jeweler
|
|
123
|
-
libxml-ruby (~> 2.6)
|
|
124
|
-
open4 (= 1.3.0)
|
|
125
|
-
rdoc
|
|
126
|
-
rubocop
|
|
127
|
-
rubycritic
|
data/jars/lib/poi-3.11.jar
DELETED
|
Binary file
|
data/jars/lib/poi-ooxml-3.11.jar
DELETED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
this is not a spreadsheet
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/test/test_extraction.rb
DELETED
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
require 'test/unit'
|
|
2
|
-
require 'simple-spreadsheet-extractor'
|
|
3
|
-
require 'libxml'
|
|
4
|
-
|
|
5
|
-
class TestExtraction < Test::Unit::TestCase
|
|
6
|
-
|
|
7
|
-
SCHEMA_FILE_PATH = File.dirname(__FILE__) + "/../doc/schema-v1.xsd"
|
|
8
|
-
|
|
9
|
-
include SysMODB::SpreadsheetExtractor
|
|
10
|
-
|
|
11
|
-
def test_from_non_file_io_object
|
|
12
|
-
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
|
13
|
-
contents = open(test_sheet, "rb") {|io| io.read }
|
|
14
|
-
io=StringIO.new contents
|
|
15
|
-
xml = spreadsheet_to_xml(io)
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def test_validate_xml
|
|
19
|
-
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
|
20
|
-
f=open(test_sheet,"rb")
|
|
21
|
-
xml = spreadsheet_to_xml(f)
|
|
22
|
-
validate_against_schema(xml)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def test_validate_xml_xlsx
|
|
26
|
-
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xlsx"
|
|
27
|
-
f=open(test_sheet,"rb")
|
|
28
|
-
xml = spreadsheet_to_xml(f)
|
|
29
|
-
validate_against_schema(xml)
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
def test_failure
|
|
33
|
-
test_sheet = File.dirname(__FILE__) + "/files/not-a-spreadsheet.xls"
|
|
34
|
-
f=open(test_sheet,"rb")
|
|
35
|
-
assert_raise SysMODB::SpreadsheetExtractionException do
|
|
36
|
-
spreadsheet_to_xml(f)
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
def test_problem_with_dv
|
|
41
|
-
test_sheet = File.dirname(__FILE__) + "/files/problem_with_dv.xls"
|
|
42
|
-
f=open(test_sheet,"rb")
|
|
43
|
-
xml = spreadsheet_to_xml(f)
|
|
44
|
-
validate_against_schema(xml)
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
def test_invalid_xml_chars
|
|
48
|
-
test_sheet = File.dirname(__FILE__) + "/files/xml-unfriendly-chars.xlsx"
|
|
49
|
-
f=open(test_sheet,"rb")
|
|
50
|
-
xml = spreadsheet_to_xml(f)
|
|
51
|
-
validate_against_schema(xml)
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def validate_against_schema xml
|
|
55
|
-
document = LibXML::XML::Document.string(xml)
|
|
56
|
-
schema = LibXML::XML::Schema.new(SCHEMA_FILE_PATH)
|
|
57
|
-
begin
|
|
58
|
-
document.validate_schema(schema)
|
|
59
|
-
rescue LibXML::XML::Error => e
|
|
60
|
-
puts xml
|
|
61
|
-
assert false,"Error validating against schema: #{e.message}"
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
def test_csv_output
|
|
66
|
-
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
|
67
|
-
expected_file = File.dirname(__FILE__) + "/files/test-csv-output1.csv"
|
|
68
|
-
expected = open(expected_file,"rb").read.strip
|
|
69
|
-
|
|
70
|
-
f=open(test_sheet,"rb")
|
|
71
|
-
csv = spreadsheet_to_csv(f,2)
|
|
72
|
-
assert_equal expected,csv
|
|
73
|
-
|
|
74
|
-
#try sheet as a string
|
|
75
|
-
f=open(test_sheet,"rb")
|
|
76
|
-
csv = spreadsheet_to_csv(f,"2")
|
|
77
|
-
assert_equal expected,csv
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
# def test_csv_output_trimmed
|
|
81
|
-
# test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
|
82
|
-
# expected_file = File.dirname(__FILE__) + "/files/test-csv-output1-trimmed.csv"
|
|
83
|
-
# expected = open(expected_file,"rb").read
|
|
84
|
-
#
|
|
85
|
-
# f=open(test_sheet,"rb")
|
|
86
|
-
# csv = spreadsheet_to_csv(f,2,true)
|
|
87
|
-
# assert_equal expected,csv
|
|
88
|
-
# end
|
|
89
|
-
|
|
90
|
-
def test_for_segfault
|
|
91
|
-
test_sheet = File.dirname(__FILE__) + "/files/test-spreadsheet.xls"
|
|
92
|
-
5.times do |x|
|
|
93
|
-
f=open(test_sheet,"rb")
|
|
94
|
-
xml = spreadsheet_to_xml(f)
|
|
95
|
-
end
|
|
96
|
-
true
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
end
|