csvpp 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.gitlab-ci.yml +13 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +51 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/config.ru +4 -0
- data/csvpp.gemspec +42 -0
- data/exe/csvpp +29 -0
- data/lib/csvpp/api.rb +25 -0
- data/lib/csvpp/conversions.rb +102 -0
- data/lib/csvpp/format.rb +126 -0
- data/lib/csvpp/parser.rb +162 -0
- data/lib/csvpp/version.rb +3 -0
- data/lib/csvpp.rb +49 -0
- data/ui/.gitignore +21 -0
- data/ui/README.md +2229 -0
- data/ui/package.json +19 -0
- data/ui/public/favicon.ico +0 -0
- data/ui/public/index.html +40 -0
- data/ui/public/manifest.json +15 -0
- data/ui/src/App.css +7 -0
- data/ui/src/App.js +105 -0
- data/ui/src/App.test.js +8 -0
- data/ui/src/Editor.js +20 -0
- data/ui/src/index.css +5 -0
- data/ui/src/index.js +8 -0
- data/ui/src/logo.svg +7 -0
- data/ui/src/registerServiceWorker.js +108 -0
- data/ui/yarn.lock +6642 -0
- metadata +192 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 96198044193b9b7a62d2e35810217bfc5b603c7b
|
4
|
+
data.tar.gz: c5f025bd90f45294986eb3892c5db6d81f2b5498
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3b35fd7ed568851367faf5b16c329271fa4d7d5935161037ceb1e8fba739d84515c8fec63c0382394156914e3d6eb2d4116b3c75ed0a88986e6554487b3b16db
|
7
|
+
data.tar.gz: 211dc3241614ba542156221574054264528a2baed9f99c8875c3e01d041d0a04065723cb5bc5e4cf04f539822131c96ed92789fdf3d4c72c152cbab55d7a0830
|
data/.gitignore
ADDED
data/.gitlab-ci.yml
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.4.0
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
csvpp (0.3.0)
|
5
|
+
oj (~> 3.3.9)
|
6
|
+
sinatra (~> 2.0)
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
byebug (9.1.0)
|
12
|
+
coderay (1.1.2)
|
13
|
+
method_source (0.9.0)
|
14
|
+
minitest (5.10.3)
|
15
|
+
mustermann (1.0.1)
|
16
|
+
oj (3.3.10)
|
17
|
+
pry (0.11.3)
|
18
|
+
coderay (~> 1.1.0)
|
19
|
+
method_source (~> 0.9.0)
|
20
|
+
pry-byebug (3.5.0)
|
21
|
+
byebug (~> 9.1)
|
22
|
+
pry (~> 0.10)
|
23
|
+
pry-doc (0.11.1)
|
24
|
+
pry (~> 0.9)
|
25
|
+
yard (~> 0.9)
|
26
|
+
rack (2.0.3)
|
27
|
+
rack-protection (2.0.0)
|
28
|
+
rack
|
29
|
+
rake (12.3.0)
|
30
|
+
sinatra (2.0.0)
|
31
|
+
mustermann (~> 1.0)
|
32
|
+
rack (~> 2.0)
|
33
|
+
rack-protection (= 2.0.0)
|
34
|
+
tilt (~> 2.0)
|
35
|
+
tilt (2.0.8)
|
36
|
+
yard (0.9.11)
|
37
|
+
|
38
|
+
PLATFORMS
|
39
|
+
ruby
|
40
|
+
|
41
|
+
DEPENDENCIES
|
42
|
+
bundler
|
43
|
+
csvpp!
|
44
|
+
minitest
|
45
|
+
pry
|
46
|
+
pry-byebug
|
47
|
+
pry-doc
|
48
|
+
rake
|
49
|
+
|
50
|
+
BUNDLED WITH
|
51
|
+
1.14.6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Rathesan Iyadurai
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# CSV++
|
2
|
+
|
3
|
+
CSV++ takes a `<DELIMITER>` separated input file and a JSON format specification
|
4
|
+
and turns it into Ruby Objects. See `test/sample_inputs/simple.txt` and
|
5
|
+
`test/sample_formats/simple.json` for example.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'csvpp'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install csvpp
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
CSVPP.parse(
|
27
|
+
input: 'test/sample_inputs/simple.txt',
|
28
|
+
format: 'test/sample_formats/simple.json'
|
29
|
+
) # => [{"v1"=>34, "line_number"=>1, "v2"=>"foobar"}, {"v1"=>99, "line_number"=>2, "v2"=>"hi there"}]
|
30
|
+
```
|
31
|
+
|
32
|
+
## Development
|
33
|
+
|
34
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
35
|
+
`rake test` to run the tests. You can also run `bin/console` for an interactive
|
36
|
+
prompt that will allow you to experiment.
|
37
|
+
|
38
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To
|
39
|
+
release a new version, update the version number in `version.rb`, and then run
|
40
|
+
`bundle exec rake release`, which will create a git tag for the version, push
|
41
|
+
git commits and tags, and push the `.gem` file to
|
42
|
+
[rubygems.org](https://rubygems.org).
|
43
|
+
|
44
|
+
## License
|
45
|
+
|
46
|
+
The gem is available as open source under the terms of the [MIT
|
47
|
+
License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "csvpp"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "pry"
|
14
|
+
Pry.start(CSVPP)
|
data/bin/setup
ADDED
data/config.ru
ADDED
data/csvpp.gemspec
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "csvpp/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "csvpp"
|
8
|
+
spec.version = CSVPP::VERSION
|
9
|
+
spec.authors = ["SwissDRG AG"]
|
10
|
+
spec.email = ["rathesan.iyadurai@swissdrg.org"]
|
11
|
+
|
12
|
+
spec.summary = %q{CSV++}
|
13
|
+
spec.description = %q{CSV++}
|
14
|
+
spec.homepage = "https://www.swissdrg.org"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
# else
|
22
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
23
|
+
# "public gem pushes."
|
24
|
+
# end
|
25
|
+
|
26
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
f.match(%r{^(test|spec|features)/})
|
28
|
+
end
|
29
|
+
spec.bindir = "exe"
|
30
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
|
+
spec.require_paths = ["lib"]
|
32
|
+
|
33
|
+
spec.add_dependency 'sinatra', '~> 2.0'
|
34
|
+
spec.add_dependency 'oj', '~> 3.3.9'
|
35
|
+
|
36
|
+
spec.add_development_dependency "bundler"
|
37
|
+
spec.add_development_dependency "rake"
|
38
|
+
spec.add_development_dependency "minitest"
|
39
|
+
spec.add_development_dependency "pry"
|
40
|
+
spec.add_development_dependency "pry-byebug"
|
41
|
+
spec.add_development_dependency "pry-doc"
|
42
|
+
end
|
data/exe/csvpp
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require_relative '../lib/csvpp'
|
5
|
+
|
6
|
+
$options = {}
|
7
|
+
option_parser = OptionParser.new do |opts|
|
8
|
+
opts.on('-f', '--format FORMAT') do |format|
|
9
|
+
$options[:format] = format.strip
|
10
|
+
end
|
11
|
+
|
12
|
+
opts.on_tail('-h', '--help', 'Show this message') do
|
13
|
+
puts opts
|
14
|
+
exit
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
HELP = option_parser.help
|
19
|
+
option_parser.parse!
|
20
|
+
|
21
|
+
format = $options[:format]
|
22
|
+
|
23
|
+
json = CSVPP.json(
|
24
|
+
input: ARGF.read,
|
25
|
+
format: File.read(format)
|
26
|
+
)
|
27
|
+
|
28
|
+
# Forgive me Matz for I have sinned
|
29
|
+
system %{echo '#{json}' | python -m json.tool}
|
data/lib/csvpp/api.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
|
3
|
+
module CSVPP
|
4
|
+
class API < Sinatra::Base
|
5
|
+
before do
|
6
|
+
headers 'Access-Control-Allow-Origin' => '*'
|
7
|
+
|
8
|
+
content_type :json
|
9
|
+
end
|
10
|
+
|
11
|
+
post '/parse' do
|
12
|
+
body = Oj.load(request.body.read)
|
13
|
+
input = body.fetch('input')
|
14
|
+
format = body.fetch('format')
|
15
|
+
|
16
|
+
CSVPP.json(input: input, format: format)
|
17
|
+
end
|
18
|
+
|
19
|
+
options "*" do
|
20
|
+
response.headers["Allow"] = "HEAD,GET,PUT,POST,DELETE,OPTIONS"
|
21
|
+
response.headers["Access-Control-Allow-Headers"] = "X-Requested-With, X-HTTP-Method-Override, Content-Type, Cache-Control, Accept"
|
22
|
+
200
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'bigdecimal'
|
2
|
+
|
3
|
+
module CSVPP
|
4
|
+
module Conversions
|
5
|
+
module_function
|
6
|
+
|
7
|
+
# @param obj [Object] object to parse
|
8
|
+
# @param to [String] a type, e.g. "int"
|
9
|
+
# @missings [Array] list of values that are treated as missings, e.g. ['NA', '-', -999]
|
10
|
+
# @params options [Hash] options passed on to parsing methods for specific types
|
11
|
+
# @return parsed value, read from `obj`, interpreted as type given by `to`
|
12
|
+
def convert(obj, to:, missings: [], **options)
|
13
|
+
return nil if missing?(obj, missings)
|
14
|
+
|
15
|
+
send("parse_#{to}", obj, **options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_string(str, **options)
|
19
|
+
str.to_s
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse_int(str, **options)
|
23
|
+
return nil if str.to_s.empty?
|
24
|
+
|
25
|
+
cleaned = if str.is_a?(String)
|
26
|
+
val = str.strip
|
27
|
+
.gsub(/['`\s]?/, '') # remove thousand separators
|
28
|
+
.sub(/\.\d*/, '') # remove decimal point and everything thereafter
|
29
|
+
.sub(/[\sa-zA-Z]*$/, '') # remove trailing words like "mg"
|
30
|
+
.sub(/^-0*(.+)$/, '-\1') # remove 0 after negative sign: -003 => -3
|
31
|
+
val =~ /^0+$/ ? '0' : val.gsub( /^0*/, '') # remove leading zeros
|
32
|
+
else
|
33
|
+
str
|
34
|
+
end
|
35
|
+
Integer(cleaned) rescue nil
|
36
|
+
end
|
37
|
+
|
38
|
+
def parse_float(str, **options)
|
39
|
+
return nil if str.to_s.empty?
|
40
|
+
Float(clean_decimal(str)) rescue nil
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse_decimal(str)
|
44
|
+
return nil if str.to_s.empty?
|
45
|
+
|
46
|
+
cleaned = clean_decimal(str).to_s
|
47
|
+
|
48
|
+
if cleaned.empty?
|
49
|
+
nil
|
50
|
+
else
|
51
|
+
BigDecimal(cleaned)
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_date(str, **options)
|
57
|
+
Date.parse(str.to_s)
|
58
|
+
end
|
59
|
+
|
60
|
+
# @param true_values [Array]: list of values that are interpreted as `true`
|
61
|
+
# @param false_values [Array]: list of values that are interpreted as `false`
|
62
|
+
# @return true or false, or
|
63
|
+
# nil if `str` doesn't match any value interpreted as `true` or `false`
|
64
|
+
def parse_boolean(str, true_values: [], false_values: [])
|
65
|
+
cleaned = str.to_s.strip.downcase
|
66
|
+
|
67
|
+
trues = if true_values.empty?
|
68
|
+
['1', 't', 'true']
|
69
|
+
else
|
70
|
+
true_values.map(&:to_s).map(&:downcase)
|
71
|
+
end
|
72
|
+
return true if trues.include? cleaned
|
73
|
+
|
74
|
+
falses = if false_values.empty?
|
75
|
+
['0', 'f', 'false']
|
76
|
+
else
|
77
|
+
false_values.map(&:to_s).map(&:downcase)
|
78
|
+
end
|
79
|
+
return false if falses.include? cleaned
|
80
|
+
|
81
|
+
nil
|
82
|
+
end
|
83
|
+
|
84
|
+
def missing?(obj, missings)
|
85
|
+
missings.map(&:to_s).include?(obj.to_s)
|
86
|
+
end
|
87
|
+
|
88
|
+
def clean_decimal(str)
|
89
|
+
return str unless str.is_a?(String)
|
90
|
+
|
91
|
+
val = str.strip
|
92
|
+
.gsub(/['`\s]?/, '') # remove thousand separators
|
93
|
+
.sub(/[\sa-zA-Z]*$/, '') # remove trailing words like "mg"
|
94
|
+
.sub(/^-0*(.+)$/, '-\1') # remove 0 after negative sign: -003 => -3
|
95
|
+
if val =~ /^0+$/ # remove leading zeros
|
96
|
+
'0'
|
97
|
+
else
|
98
|
+
val.gsub( /^0*/, '')
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
data/lib/csvpp/format.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
module CSVPP
|
2
|
+
class Format
|
3
|
+
attr_reader :name, :skip
|
4
|
+
|
5
|
+
class << self
|
6
|
+
# @param name [String] unique name of the format
|
7
|
+
# @param format [Format]
|
8
|
+
def add(name, format)
|
9
|
+
store[name] = format
|
10
|
+
end
|
11
|
+
|
12
|
+
# @param name [String] unique name of the format
|
13
|
+
def find(name)
|
14
|
+
store.fetch(name)
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param path [String] path to format file
|
18
|
+
# @return [Format]
|
19
|
+
def load(path)
|
20
|
+
load_from_str File.read(path)
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param json [String]
|
24
|
+
# @return [Format]
|
25
|
+
def load_from_str(json)
|
26
|
+
new Oj.load(json)
|
27
|
+
end
|
28
|
+
|
29
|
+
def all
|
30
|
+
store.values
|
31
|
+
end
|
32
|
+
|
33
|
+
def store
|
34
|
+
@store ||= {}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param format [Hash]
|
39
|
+
def initialize(format)
|
40
|
+
@name = format['name']
|
41
|
+
@multiline = format['multiline'].to_s.strip.downcase == 'true'
|
42
|
+
@skip = format['skip'].to_i
|
43
|
+
@vars = format.fetch('vars')
|
44
|
+
|
45
|
+
if multiline?
|
46
|
+
@vars_grouped_by_line = Hash[
|
47
|
+
vars.group_by { |var, meta| meta['line'] }.map do |line_id, vars|
|
48
|
+
[line_id, vars.map { |var, *| var }]
|
49
|
+
end
|
50
|
+
]
|
51
|
+
|
52
|
+
@multiline_start = format.fetch('start')
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def var_names
|
57
|
+
vars.keys
|
58
|
+
end
|
59
|
+
|
60
|
+
def length
|
61
|
+
var_names.count
|
62
|
+
end
|
63
|
+
|
64
|
+
def index(var)
|
65
|
+
position(var) - 1
|
66
|
+
end
|
67
|
+
|
68
|
+
def position(var)
|
69
|
+
vars.fetch(var)['position']
|
70
|
+
end
|
71
|
+
|
72
|
+
def type(var)
|
73
|
+
vars.fetch(var)['type']
|
74
|
+
end
|
75
|
+
|
76
|
+
# @param var [String]: name of the variable for which the missings are required
|
77
|
+
# @return [Array] an array of missing values (can be empty if no missings were defined)
|
78
|
+
def missings(var)
|
79
|
+
array_from(var, 'missings')
|
80
|
+
end
|
81
|
+
|
82
|
+
# Returns the values that are defined as `true` in the the format's json
|
83
|
+
# definition for the given variable.
|
84
|
+
# @return [Array] all values that should be interpreted as true for this variable
|
85
|
+
def true_values(var)
|
86
|
+
return [] unless type(var) == "boolean"
|
87
|
+
array_from(var, 'true_values')
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
# Returns the values that are defined as `false` in the the format's json
|
92
|
+
# definition for the given variable.
|
93
|
+
# @return [Array] all values that should be interpreted as `false` for this variable
|
94
|
+
def false_values(var)
|
95
|
+
return [] unless type(var) == "boolean"
|
96
|
+
array_from(var, 'false_values')
|
97
|
+
end
|
98
|
+
|
99
|
+
def vars_for_line(line_id)
|
100
|
+
vars_grouped_by_line.fetch(line_id)
|
101
|
+
end
|
102
|
+
|
103
|
+
def multiline_start?(line_id)
|
104
|
+
multiline_start == line_id
|
105
|
+
end
|
106
|
+
|
107
|
+
def multiline?
|
108
|
+
@multiline
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
# Returns the value or values specified for the given attribute of the given
|
114
|
+
# variable in the formats json. An empty array if no such attribute was
|
115
|
+
# defined for the given variable.
|
116
|
+
# @return [Array] value(s) defined for given attribute for given variable
|
117
|
+
def array_from(var, attribute)
|
118
|
+
value = vars.fetch(var)[attribute]
|
119
|
+
return [] if value.nil?
|
120
|
+
return value if value.is_a?(Array)
|
121
|
+
[value]
|
122
|
+
end
|
123
|
+
|
124
|
+
attr_reader :vars, :vars_grouped_by_line, :multiline_start
|
125
|
+
end
|
126
|
+
end
|