csvpp 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.gitlab-ci.yml +1 -1
- data/.rubocop.yml +78 -0
- data/.ruby-version +1 -1
- data/Gemfile.lock +24 -4
- data/README.md +28 -29
- data/bin/profile +19 -0
- data/csvpp.gemspec +7 -1
- data/documentation/Technisches_Begleitblatt_2017_d.pdf +0 -0
- data/exe/csvpp +2 -27
- data/lib/csvpp.rb +36 -8
- data/lib/csvpp/cli.rb +80 -0
- data/lib/csvpp/conversions.rb +60 -6
- data/lib/csvpp/core_extensions.rb +41 -0
- data/lib/csvpp/format.rb +24 -33
- data/lib/csvpp/formats_client.rb +37 -0
- data/lib/csvpp/os.rb +50 -0
- data/lib/csvpp/parser.rb +30 -33
- data/lib/csvpp/sqlite_importer.rb +47 -0
- data/lib/csvpp/version.rb +1 -1
- metadata +103 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f70243c72416b66f569849a07a20845019889f990cfb3b6ad3859886b0c5a387
|
4
|
+
data.tar.gz: da8124bf9974a3ebc0855c9a799e8b17c55c139879c46600c9597712af7c4a61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f33f39b8ef836fd0fa54af8f5a185095bb0daabd060476c1f71c790b28606e3009e2a1aa69ba2ffdd0652ce9a84ce007e5d5c13757afd7d209868bd52b932402
|
7
|
+
data.tar.gz: 907a4e1d8fc6a1933629632a4fc5a2ac724f6d3f1498e769639e598ca6b749e9c03a4ab1d0099c56d5e89491a788e0d5c91ac27dd2716d7c9a6449e49f7bda41
|
data/.gitignore
CHANGED
data/.gitlab-ci.yml
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
AllCops:
|
2
|
+
DisplayCopNames: true
|
3
|
+
DisplayStyleGuide: true
|
4
|
+
TargetRubyVersion: 2.5
|
5
|
+
Include:
|
6
|
+
- Rakefile
|
7
|
+
- config.ru
|
8
|
+
Exclude:
|
9
|
+
- Gemfile
|
10
|
+
- csvpp.gemspec
|
11
|
+
|
12
|
+
Metrics/LineLength:
|
13
|
+
Max: 80
|
14
|
+
Exclude:
|
15
|
+
- test/**/**
|
16
|
+
|
17
|
+
Style/ClassAndModuleChildren:
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
Style/PercentLiteralDelimiters:
|
21
|
+
Enabled: false
|
22
|
+
|
23
|
+
Style/NumericLiteralPrefix:
|
24
|
+
Enabled: false
|
25
|
+
|
26
|
+
Style/Documentation:
|
27
|
+
Enabled: false
|
28
|
+
|
29
|
+
Style/EmptyMethod:
|
30
|
+
Enabled: false
|
31
|
+
|
32
|
+
Style/DoubleNegation:
|
33
|
+
Enabled: false
|
34
|
+
|
35
|
+
Style/WordArray:
|
36
|
+
Exclude:
|
37
|
+
- test/**/*
|
38
|
+
|
39
|
+
Metrics/ClassLength:
|
40
|
+
Enabled: false
|
41
|
+
|
42
|
+
Metrics/ModuleLength:
|
43
|
+
Enabled: false
|
44
|
+
|
45
|
+
Metrics/MethodLength:
|
46
|
+
Enabled: false
|
47
|
+
|
48
|
+
Metrics/BlockLength:
|
49
|
+
Enabled: false
|
50
|
+
|
51
|
+
Metrics/CyclomaticComplexity:
|
52
|
+
Enabled: false
|
53
|
+
|
54
|
+
Metrics/PerceivedComplexity:
|
55
|
+
Enabled: false
|
56
|
+
|
57
|
+
Metrics/AbcSize:
|
58
|
+
Enabled: false
|
59
|
+
|
60
|
+
Naming/HeredocDelimiterNaming:
|
61
|
+
Enabled: false
|
62
|
+
|
63
|
+
Layout/MultilineMethodCallIndentation:
|
64
|
+
EnforcedStyle: indented_relative_to_receiver
|
65
|
+
|
66
|
+
Layout/AlignHash:
|
67
|
+
Exclude:
|
68
|
+
- lib/csvpp/cli.rb
|
69
|
+
|
70
|
+
Lint/UnusedBlockArgument:
|
71
|
+
Exclude:
|
72
|
+
- lib/csvpp/cli.rb
|
73
|
+
- lib/csvpp/conversions.rb
|
74
|
+
|
75
|
+
Lint/UnusedMethodArgument:
|
76
|
+
Exclude:
|
77
|
+
- lib/csvpp/cli.rb
|
78
|
+
- lib/csvpp/conversions.rb
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.5.1
|
data/Gemfile.lock
CHANGED
@@ -1,17 +1,27 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
csvpp (0.
|
5
|
-
|
4
|
+
csvpp (0.4.0)
|
5
|
+
gli (~> 2.17.1)
|
6
|
+
httparty (~> 0.16.2)
|
7
|
+
oj (~> 3.6.2)
|
8
|
+
sequel (~> 5.9)
|
9
|
+
sqlite3 (~> 1.3, >= 1.3.13)
|
6
10
|
|
7
11
|
GEM
|
8
12
|
remote: https://rubygems.org/
|
9
13
|
specs:
|
10
14
|
byebug (9.1.0)
|
11
15
|
coderay (1.1.2)
|
16
|
+
docile (1.3.1)
|
17
|
+
gli (2.17.1)
|
18
|
+
httparty (0.16.2)
|
19
|
+
multi_xml (>= 0.5.2)
|
20
|
+
json (2.1.0)
|
12
21
|
method_source (0.9.0)
|
13
22
|
minitest (5.10.3)
|
14
|
-
|
23
|
+
multi_xml (0.6.0)
|
24
|
+
oj (3.6.3)
|
15
25
|
pry (0.11.3)
|
16
26
|
coderay (~> 1.1.0)
|
17
27
|
method_source (~> 0.9.0)
|
@@ -22,6 +32,14 @@ GEM
|
|
22
32
|
pry (~> 0.9)
|
23
33
|
yard (~> 0.9)
|
24
34
|
rake (12.3.0)
|
35
|
+
ruby-prof (0.16.2)
|
36
|
+
sequel (5.9.0)
|
37
|
+
simplecov (0.16.1)
|
38
|
+
docile (~> 1.1)
|
39
|
+
json (>= 1.8, < 3)
|
40
|
+
simplecov-html (~> 0.10.0)
|
41
|
+
simplecov-html (0.10.2)
|
42
|
+
sqlite3 (1.3.13)
|
25
43
|
yard (0.9.11)
|
26
44
|
|
27
45
|
PLATFORMS
|
@@ -35,6 +53,8 @@ DEPENDENCIES
|
|
35
53
|
pry-byebug
|
36
54
|
pry-doc
|
37
55
|
rake
|
56
|
+
ruby-prof
|
57
|
+
simplecov
|
38
58
|
|
39
59
|
BUNDLED WITH
|
40
|
-
1.
|
60
|
+
1.16.2
|
data/README.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# CSV++
|
2
2
|
|
3
|
+
![build](http://gitlab/gems/csvpp/badges/master/build.svg)
|
4
|
+
![coverage](http://gitlab/gems/csvpp/badges/master/coverage.svg)
|
5
|
+
|
3
6
|
CSV++ takes a `<DELIMITER>` separated input file and a JSON format specification
|
4
7
|
and turns it into Ruby Objects. See `test/sample_inputs/simple.txt` and
|
5
8
|
`test/sample_formats/simple.json` for example.
|
@@ -24,6 +27,15 @@ Or install it yourself as:
|
|
24
27
|
$ gem install csvpp
|
25
28
|
```
|
26
29
|
|
30
|
+
### For development of CSV++
|
31
|
+
|
32
|
+
First make sure that you have bundler and the Ruby version installed that is
|
33
|
+
specified in `.ruby-version`. Then run:
|
34
|
+
|
35
|
+
```
|
36
|
+
$ bundle
|
37
|
+
```
|
38
|
+
|
27
39
|
## Usage
|
28
40
|
|
29
41
|
```ruby
|
@@ -35,39 +47,26 @@ CSVPP.parse(
|
|
35
47
|
|
36
48
|
### CLI
|
37
49
|
|
38
|
-
CSV++ comes with a CLI
|
39
|
-
|
50
|
+
CSV++ comes with a CLI that speaks with the [Formats API](http://formats.iapps.swissdrg.local/api/formats).
|
51
|
+
|
52
|
+
To print a list of known formats run:
|
40
53
|
|
41
54
|
```
|
42
|
-
$ csvpp
|
43
|
-
|
44
|
-
{
|
45
|
-
"vars": [
|
46
|
-
{
|
47
|
-
"line_number": 1,
|
48
|
-
"v1": 34,
|
49
|
-
"v2": "foobar",
|
50
|
-
"v3": 1.1,
|
51
|
-
"v4": false
|
52
|
-
},
|
53
|
-
{
|
54
|
-
"line_number": 2,
|
55
|
-
"v1": 99,
|
56
|
-
"v2": "hi there",
|
57
|
-
"v3": 2.2,
|
58
|
-
"v4": true
|
59
|
-
},
|
60
|
-
{
|
61
|
-
"line_number": 3,
|
62
|
-
"v1": null,
|
63
|
-
"v2": "Missing",
|
64
|
-
"v3": null,
|
65
|
-
"v4": true
|
66
|
-
}
|
67
|
-
]
|
68
|
-
}
|
55
|
+
$ csvpp formats
|
69
56
|
```
|
70
57
|
|
58
|
+
The `parse` command can be used to parse an input file with a given format,
|
59
|
+
either fetched from the API or from a local JSON specification file.
|
60
|
+
|
61
|
+
For example, to import some REKOLE cost data to a local sqlite DB for querying,
|
62
|
+
run:
|
63
|
+
|
64
|
+
```
|
65
|
+
$ csvpp parse --format fk_2017 --output costs.db test/sample_inputs/fk_2017.txt
|
66
|
+
```
|
67
|
+
|
68
|
+
Run `csvpp help parse` for more details.
|
69
|
+
|
71
70
|
## Development
|
72
71
|
|
73
72
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
data/bin/profile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'csvpp'
|
6
|
+
|
7
|
+
require 'ruby-prof'
|
8
|
+
|
9
|
+
f = CSVPP::Format.load(ARGV[0])
|
10
|
+
result = RubyProf.profile do
|
11
|
+
CSVPP::Parser.parse(
|
12
|
+
input: ARGV[1],
|
13
|
+
format: f,
|
14
|
+
convert_type: false
|
15
|
+
)
|
16
|
+
end
|
17
|
+
|
18
|
+
printer = RubyProf::FlatPrinter.new(result)
|
19
|
+
printer.print(STDOUT)
|
data/csvpp.gemspec
CHANGED
@@ -30,7 +30,11 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ["lib"]
|
32
32
|
|
33
|
-
spec.add_dependency 'oj', '~> 3.
|
33
|
+
spec.add_dependency 'oj', '~> 3.6.2'
|
34
|
+
spec.add_dependency 'gli', '~> 2.17.1'
|
35
|
+
spec.add_dependency 'httparty', '~> 0.16.2'
|
36
|
+
spec.add_dependency 'sqlite3', '~> 1.3', '>= 1.3.13'
|
37
|
+
spec.add_dependency 'sequel', '~> 5.9'
|
34
38
|
|
35
39
|
spec.add_development_dependency "bundler"
|
36
40
|
spec.add_development_dependency "rake"
|
@@ -38,4 +42,6 @@ Gem::Specification.new do |spec|
|
|
38
42
|
spec.add_development_dependency "pry"
|
39
43
|
spec.add_development_dependency "pry-byebug"
|
40
44
|
spec.add_development_dependency "pry-doc"
|
45
|
+
spec.add_development_dependency "ruby-prof"
|
46
|
+
spec.add_development_dependency "simplecov"
|
41
47
|
end
|
Binary file
|
data/exe/csvpp
CHANGED
@@ -1,29 +1,4 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
$options = {}
|
7
|
-
option_parser = OptionParser.new do |opts|
|
8
|
-
opts.on('-f', '--format FORMAT') do |format|
|
9
|
-
$options[:format] = format.strip
|
10
|
-
end
|
11
|
-
|
12
|
-
opts.on_tail('-h', '--help', 'Show this message') do
|
13
|
-
puts opts
|
14
|
-
exit
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
HELP = option_parser.help
|
19
|
-
option_parser.parse!
|
20
|
-
|
21
|
-
format = $options[:format]
|
22
|
-
|
23
|
-
json = CSVPP.json(
|
24
|
-
input: ARGF.read,
|
25
|
-
format: File.read(format)
|
26
|
-
)
|
27
|
-
|
28
|
-
# Forgive me Matz for I have sinned
|
29
|
-
system %{echo '#{json}' | python -m json.tool}
|
3
|
+
require_relative '../lib/csvpp/cli'
|
4
|
+
exit CSVPP::CLI.run(ARGV)
|
data/lib/csvpp.rb
CHANGED
@@ -1,49 +1,77 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'oj'
|
4
|
+
Oj.default_options = { mode: :compat }
|
2
5
|
|
3
6
|
require_relative './csvpp/version'
|
7
|
+
require_relative './csvpp/os'
|
4
8
|
require_relative './csvpp/conversions'
|
5
9
|
require_relative './csvpp/format'
|
6
10
|
require_relative './csvpp/parser'
|
11
|
+
require_relative './csvpp/sqlite_importer'
|
12
|
+
require_relative './csvpp/formats_client'
|
7
13
|
|
8
14
|
module CSVPP
|
9
|
-
|
10
15
|
DEFAULT_COL_SEP = '|'
|
11
16
|
|
12
17
|
# @param input [String] path to input file
|
13
|
-
# @param format [String] path to format file
|
18
|
+
# @param format [String, Format] path to format file
|
14
19
|
# @param col_sep [String]
|
15
20
|
#
|
16
21
|
# @return [Array<Object>]
|
17
|
-
def self.parse(input:,
|
22
|
+
def self.parse(input:,
|
23
|
+
format:,
|
24
|
+
col_sep: DEFAULT_COL_SEP,
|
25
|
+
convert_type: true,
|
26
|
+
&block)
|
27
|
+
|
18
28
|
Parser.parse(
|
19
29
|
input: input,
|
20
30
|
format: Format.load(format),
|
21
31
|
col_sep: col_sep,
|
32
|
+
convert_type: convert_type,
|
22
33
|
&block
|
23
34
|
)
|
24
35
|
end
|
25
36
|
|
26
37
|
# @param input [String] input string
|
27
|
-
# @param format [String] format string
|
38
|
+
# @param format [String, Format] format string
|
28
39
|
# @param col_sep [String]
|
29
40
|
#
|
30
41
|
# @return [Array<Object>]
|
31
|
-
def self.parse_str(input:,
|
42
|
+
def self.parse_str(input:,
|
43
|
+
format:,
|
44
|
+
col_sep: DEFAULT_COL_SEP,
|
45
|
+
convert_type: true,
|
46
|
+
&block)
|
47
|
+
|
32
48
|
Parser.parse_str(
|
33
49
|
input: input,
|
34
50
|
format: Format.load_from_str(format),
|
35
51
|
col_sep: col_sep,
|
52
|
+
convert_type: convert_type,
|
36
53
|
&block
|
37
54
|
)
|
38
55
|
end
|
39
56
|
|
40
57
|
# @param input [String] input string
|
41
|
-
# @param format [String] format string
|
58
|
+
# @param format [String, Format] format string
|
42
59
|
# @param col_sep [String]
|
43
60
|
#
|
44
61
|
# @return [String]
|
45
|
-
def self.json(input:,
|
46
|
-
|
62
|
+
def self.json(input:,
|
63
|
+
format:,
|
64
|
+
convert_type: true,
|
65
|
+
col_sep: DEFAULT_COL_SEP)
|
66
|
+
h = {
|
67
|
+
'vars' => parse_str(
|
68
|
+
input: input,
|
69
|
+
format: format,
|
70
|
+
convert_type: convert_type,
|
71
|
+
col_sep: col_sep
|
72
|
+
)
|
73
|
+
}
|
74
|
+
|
47
75
|
Oj.dump(h)
|
48
76
|
end
|
49
77
|
end
|
data/lib/csvpp/cli.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'gli'
|
4
|
+
require_relative '../csvpp'
|
5
|
+
require_relative '../csvpp/core_extensions'
|
6
|
+
|
7
|
+
module CSVPP
|
8
|
+
# CSV++ command line interface.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# exit CSVPP::CLI.run(ARGV)
|
12
|
+
#
|
13
|
+
class CLI
|
14
|
+
using CoreExtensions
|
15
|
+
extend GLI::App
|
16
|
+
|
17
|
+
CLIENT = FormatsClient.new
|
18
|
+
|
19
|
+
program_desc 'CSV++ Command Line Interface'
|
20
|
+
|
21
|
+
desc 'Parse input files'
|
22
|
+
command :parse do |c|
|
23
|
+
c.switch %i[convert-types], default_value: true, negatable: true
|
24
|
+
c.switch %i[open], default_value: true, negatable: true,
|
25
|
+
desc: 'Whether to open the output with the default application'
|
26
|
+
|
27
|
+
c.flag %i[f format], required: true,
|
28
|
+
desc: 'Format identifier or local file path to a JSON format'
|
29
|
+
|
30
|
+
c.flag %i[o output], required: true, desc: 'Output file'
|
31
|
+
c.flag %i[s separator], default_value: '|'
|
32
|
+
c.flag %i[open-cmd], default_value: OS.open_cmd
|
33
|
+
|
34
|
+
c.action do |global_options, options, args|
|
35
|
+
format = if File.exist?(options[:format])
|
36
|
+
Format.load(options[:format])
|
37
|
+
else
|
38
|
+
CLIENT.format(options[:format])
|
39
|
+
end
|
40
|
+
|
41
|
+
convert_type = options[:'convert-types']
|
42
|
+
output = options[:output].strip
|
43
|
+
|
44
|
+
case output
|
45
|
+
when /\.db$/
|
46
|
+
importer = SqliteImporter.new(
|
47
|
+
format: format,
|
48
|
+
db_path: output
|
49
|
+
)
|
50
|
+
importer.import(ARGF.read)
|
51
|
+
when /\.json$/
|
52
|
+
File.open(output, 'w') do |file|
|
53
|
+
file.puts CSVPP.json(
|
54
|
+
input: ARGF.read,
|
55
|
+
format: format,
|
56
|
+
convert_type: convert_type
|
57
|
+
)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
OS.open(output, open_cmd: options[:'open-cmd']) if options[:open]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
desc 'List formats'
|
66
|
+
command :formats do |c|
|
67
|
+
c.switch %i[web], default: false,
|
68
|
+
desc: 'Whether to view the formats in the web app'
|
69
|
+
|
70
|
+
c.action do |global_options, options, args|
|
71
|
+
if options[:web]
|
72
|
+
OS.open(CLIENT.base_uri)
|
73
|
+
next
|
74
|
+
end
|
75
|
+
|
76
|
+
puts CLIENT.formats.map(&:to_s)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
data/lib/csvpp/conversions.rb
CHANGED
@@ -4,19 +4,70 @@ module CSVPP
|
|
4
4
|
module Conversions
|
5
5
|
module_function
|
6
6
|
|
7
|
+
ARRAY_TYPE_RGX = /(?<array_type>\w+),\s*(?<array_delimiter>\W)/
|
8
|
+
|
7
9
|
# @param obj [Object] object to parse
|
8
10
|
# @param to [String] a type, e.g. "int"
|
9
|
-
# @missings [Array] list of values that are treated as missings,
|
10
|
-
#
|
11
|
+
# @param missings [Array] list of values that are treated as missings,
|
12
|
+
# e.g. ['NA', '-', -999]
|
13
|
+
# @param options [Hash] options passed on to parsing methods for specific types
|
11
14
|
# @return parsed value, read from `obj`, interpreted as type given by `to`
|
12
15
|
def convert(obj, to:, missings: [], **options)
|
13
16
|
return nil if missing?(obj, missings)
|
14
17
|
|
18
|
+
if to.start_with?('array')
|
19
|
+
to, rest = to.split('<')
|
20
|
+
rest = rest.tr('>', '')
|
21
|
+
match = rest.match(ARRAY_TYPE_RGX)
|
22
|
+
options = options.merge(
|
23
|
+
type: match[:array_type],
|
24
|
+
delimiter: match[:array_delimiter]
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
15
28
|
send("parse_#{to}", obj, **options)
|
16
29
|
end
|
17
30
|
|
31
|
+
def parse_array(str, type:, delimiter:, **options)
|
32
|
+
str.split(delimiter).map { |entry| send("parse_#{type}", entry) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_chop(str, delimiter: ':', **options)
|
36
|
+
code, laterality, date = str.split(delimiter)
|
37
|
+
code = parse_string(code)
|
38
|
+
laterality = parse_string(laterality) if laterality
|
39
|
+
laterality = nil if laterality&.empty?
|
40
|
+
date = parse_date(date) if date
|
41
|
+
|
42
|
+
{
|
43
|
+
code: code,
|
44
|
+
laterality: laterality,
|
45
|
+
date: date
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
# See page 3 in documentation/Technisches_Begleitblatt_2017_d.pdf more more
|
50
|
+
# info on the medi data type.
|
51
|
+
def parse_medi(str, delimiter: ':', **options)
|
52
|
+
atc_code, annex, application, dose, unit = str.split(delimiter)
|
53
|
+
atc_code = parse_string(atc_code)
|
54
|
+
annex = parse_string(annex) if annex
|
55
|
+
annex = nil if annex&.empty?
|
56
|
+
application = parse_string(application)
|
57
|
+
dose = parse_decimal(dose)
|
58
|
+
unit = parse_string(unit)
|
59
|
+
|
60
|
+
{
|
61
|
+
atc_code: atc_code,
|
62
|
+
annex: annex,
|
63
|
+
application: application,
|
64
|
+
dose: dose,
|
65
|
+
unit: unit
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
18
69
|
def parse_string(str, **options)
|
19
|
-
str.to_s
|
70
|
+
str.to_s.strip
|
20
71
|
end
|
21
72
|
|
22
73
|
def parse_int(str, **options)
|
@@ -40,7 +91,7 @@ module CSVPP
|
|
40
91
|
Float(clean_decimal(str)) rescue nil
|
41
92
|
end
|
42
93
|
|
43
|
-
def parse_decimal(str)
|
94
|
+
def parse_decimal(str, **options)
|
44
95
|
return nil if str.to_s.empty?
|
45
96
|
|
46
97
|
cleaned = clean_decimal(str).to_s
|
@@ -50,7 +101,6 @@ module CSVPP
|
|
50
101
|
else
|
51
102
|
BigDecimal(cleaned)
|
52
103
|
end
|
53
|
-
|
54
104
|
end
|
55
105
|
|
56
106
|
def parse_date(str, **options)
|
@@ -61,7 +111,11 @@ module CSVPP
|
|
61
111
|
# @param false_values [Array]: list of values that are interpreted as `false`
|
62
112
|
# @return true or false, or
|
63
113
|
# nil if `str` doesn't match any value interpreted as `true` or `false`
|
64
|
-
def parse_boolean(str,
|
114
|
+
def parse_boolean(str,
|
115
|
+
true_values: [],
|
116
|
+
false_values: [],
|
117
|
+
**options)
|
118
|
+
|
65
119
|
cleaned = str.to_s.strip.downcase
|
66
120
|
|
67
121
|
trues = if true_values.empty?
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CSVPP
|
4
|
+
# Add core extensions here as refinements. Only load them when necessary, so
|
5
|
+
# that we don't monkeypatch applications that load CSV++.
|
6
|
+
#
|
7
|
+
# @example Loading the core extensions to the current lexical scope
|
8
|
+
#
|
9
|
+
# module MyScope
|
10
|
+
# using CSVPP::CoreExtensions
|
11
|
+
#
|
12
|
+
# # Extensions exist here
|
13
|
+
#
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# # Extensions no longer exist here
|
17
|
+
#
|
18
|
+
module CoreExtensions
|
19
|
+
refine String do
|
20
|
+
def colorize(color_code)
|
21
|
+
"\e[#{color_code}m#{self}\e[0m"
|
22
|
+
end
|
23
|
+
|
24
|
+
def green
|
25
|
+
colorize 32
|
26
|
+
end
|
27
|
+
|
28
|
+
def blue
|
29
|
+
colorize 34
|
30
|
+
end
|
31
|
+
|
32
|
+
def pink
|
33
|
+
colorize 35
|
34
|
+
end
|
35
|
+
|
36
|
+
def yellow
|
37
|
+
colorize 33
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/csvpp/format.rb
CHANGED
@@ -1,44 +1,27 @@
|
|
1
1
|
module CSVPP
|
2
2
|
class Format
|
3
|
-
attr_reader :name, :skip
|
3
|
+
attr_reader :name, :description, :skip, :col_sep
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# @param name [String] unique name of the format
|
13
|
-
def find(name)
|
14
|
-
store.fetch(name)
|
15
|
-
end
|
16
|
-
|
17
|
-
# @param path [String] path to format file
|
18
|
-
# @return [Format]
|
19
|
-
def load(path)
|
20
|
-
load_from_str File.read(path)
|
21
|
-
end
|
22
|
-
|
23
|
-
# @param json [String]
|
24
|
-
# @return [Format]
|
25
|
-
def load_from_str(json)
|
26
|
-
new Oj.load(json)
|
27
|
-
end
|
28
|
-
|
29
|
-
def all
|
30
|
-
store.values
|
31
|
-
end
|
5
|
+
# @param path [String] path to format file
|
6
|
+
# @return [Format]
|
7
|
+
def self.load(path)
|
8
|
+
return path if path.is_a? Format
|
9
|
+
load_from_str File.read(path)
|
10
|
+
end
|
32
11
|
|
33
|
-
|
34
|
-
|
35
|
-
|
12
|
+
# @param json [String]
|
13
|
+
# @return [Format]
|
14
|
+
def self.load_from_str(json)
|
15
|
+
return json if json.is_a? Format
|
16
|
+
new Oj.load(json)
|
36
17
|
end
|
37
18
|
|
38
19
|
# @param format [Hash]
|
39
20
|
def initialize(format)
|
40
21
|
@name = format['name']
|
22
|
+
@description = format['description']
|
41
23
|
@multiline = format['multiline'].to_s.strip.downcase == 'true'
|
24
|
+
@col_sep = format['column_separator']
|
42
25
|
@skip = format['skip'].to_i
|
43
26
|
@vars = format.fetch('vars')
|
44
27
|
|
@@ -51,6 +34,11 @@ module CSVPP
|
|
51
34
|
|
52
35
|
@multiline_start = format.fetch('start')
|
53
36
|
end
|
37
|
+
|
38
|
+
# Cache for actual indices because formats provide 1-based human readable
|
39
|
+
# positions. Only matters when parsing files with 30k+ line files. See
|
40
|
+
# #index(var).
|
41
|
+
@indices = {}
|
54
42
|
end
|
55
43
|
|
56
44
|
def var_names
|
@@ -62,7 +50,7 @@ module CSVPP
|
|
62
50
|
end
|
63
51
|
|
64
52
|
def index(var)
|
65
|
-
position(var) - 1
|
53
|
+
@indices[var] ||= position(var) - 1
|
66
54
|
end
|
67
55
|
|
68
56
|
def position(var)
|
@@ -87,7 +75,6 @@ module CSVPP
|
|
87
75
|
array_from(var, 'true_values')
|
88
76
|
end
|
89
77
|
|
90
|
-
|
91
78
|
# Returns the values that are defined as `false` in the the format's json
|
92
79
|
# definition for the given variable.
|
93
80
|
# @return [Array] all values that should be interpreted as `false` for this variable
|
@@ -108,6 +95,10 @@ module CSVPP
|
|
108
95
|
@multiline
|
109
96
|
end
|
110
97
|
|
98
|
+
def to_s
|
99
|
+
"#{name.ljust(30)}\t| #{description}"
|
100
|
+
end
|
101
|
+
|
111
102
|
private
|
112
103
|
|
113
104
|
# Returns the value or values specified for the given attribute of the given
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'httparty'
|
4
|
+
|
5
|
+
module CSVPP
|
6
|
+
class FormatsClient
|
7
|
+
include HTTParty
|
8
|
+
|
9
|
+
DEFAULT_HOST = 'http://formats.iapps.swissdrg.local'
|
10
|
+
|
11
|
+
def initialize(host: DEFAULT_HOST)
|
12
|
+
self.class.base_uri ENV['FORMATS_HOST'] || host
|
13
|
+
end
|
14
|
+
|
15
|
+
# @return [String] e.g. "http://formats.iapps.swissdrg.local"
|
16
|
+
def base_uri
|
17
|
+
self.class.base_uri
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Array<Format>]
|
21
|
+
def formats
|
22
|
+
self.class.get('/api/formats').map { |hash| Format.new(hash) }
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param name [String]
|
26
|
+
# @return [Format]
|
27
|
+
def format(name)
|
28
|
+
response = self.class.get("/api/formats/#{name}")
|
29
|
+
|
30
|
+
if (error = response['error'])
|
31
|
+
raise ArgumentError, %{#{error} "#{name}"}
|
32
|
+
end
|
33
|
+
|
34
|
+
Format.new(response)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/csvpp/os.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CSVPP
|
4
|
+
# Provides utility functions for determining OS and OS-specific system calls.
|
5
|
+
module OS
|
6
|
+
module_function
|
7
|
+
|
8
|
+
# http://stackoverflow.com/a/171011/1314848.
|
9
|
+
def windows?
|
10
|
+
!!(/cygwin|mswin|mingw|bccwin|wince|emx/ =~ ruby_platform)
|
11
|
+
end
|
12
|
+
|
13
|
+
def unix?
|
14
|
+
!windows?
|
15
|
+
end
|
16
|
+
|
17
|
+
def mac?
|
18
|
+
!!(/darwin/ =~ ruby_platform)
|
19
|
+
end
|
20
|
+
|
21
|
+
def linux?
|
22
|
+
unix? && !mac?
|
23
|
+
end
|
24
|
+
|
25
|
+
def open(str, open_cmd: self.open_cmd)
|
26
|
+
system "#{open_cmd} #{str}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def open_cmd
|
30
|
+
if mac?
|
31
|
+
'open'
|
32
|
+
elsif linux?
|
33
|
+
'xdg-open'
|
34
|
+
elsif windows?
|
35
|
+
'START ""'
|
36
|
+
else
|
37
|
+
raise 'Unsupported OS'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def pager
|
42
|
+
return nil if windows?
|
43
|
+
ENV['PAGER'] || 'less'
|
44
|
+
end
|
45
|
+
|
46
|
+
def ruby_platform
|
47
|
+
RUBY_PLATFORM
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/csvpp/parser.rb
CHANGED
@@ -18,7 +18,7 @@ module CSVPP
|
|
18
18
|
new(
|
19
19
|
format: format,
|
20
20
|
col_sep: col_sep,
|
21
|
-
convert_type: convert_type
|
21
|
+
convert_type: convert_type
|
22
22
|
).parse(input, &block)
|
23
23
|
end
|
24
24
|
|
@@ -28,28 +28,24 @@ module CSVPP
|
|
28
28
|
#
|
29
29
|
# @return [Array<Object>]
|
30
30
|
def self.parse_str(input:,
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
format:,
|
32
|
+
col_sep: DEFAULT_COL_SEP,
|
33
|
+
convert_type: true,
|
34
|
+
&block)
|
35
35
|
|
36
36
|
new(
|
37
37
|
format: format,
|
38
38
|
col_sep: col_sep,
|
39
|
-
convert_type: convert_type
|
39
|
+
convert_type: convert_type
|
40
40
|
).parse_str(input, &block)
|
41
41
|
end
|
42
42
|
|
43
43
|
def initialize(format:, col_sep: DEFAULT_COL_SEP, convert_type: true)
|
44
44
|
@format = format
|
45
|
-
@col_sep = col_sep
|
45
|
+
@col_sep = format.col_sep || col_sep
|
46
46
|
@convert_type = convert_type
|
47
47
|
end
|
48
48
|
|
49
|
-
def convert_type?
|
50
|
-
!!@convert_type
|
51
|
-
end
|
52
|
-
|
53
49
|
def parse(path, &block)
|
54
50
|
parse_io(File.open(path), &block)
|
55
51
|
end
|
@@ -66,25 +62,26 @@ module CSVPP
|
|
66
62
|
|
67
63
|
def set_value!(hash, var, value)
|
68
64
|
hash[var] = value
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
65
|
+
return unless @convert_type
|
66
|
+
|
67
|
+
type = format.type(var)
|
68
|
+
return if type.nil?
|
69
|
+
|
70
|
+
hash[var] = convert(
|
71
|
+
value,
|
72
|
+
to: type,
|
73
|
+
missings: format.missings(var),
|
74
|
+
true_values: format.true_values(var),
|
75
|
+
false_values: format.false_values(var)
|
76
|
+
)
|
80
77
|
end
|
81
78
|
|
82
|
-
def add_result!(results, hash
|
83
|
-
if block_given? && (obj =
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
79
|
+
def add_result!(results, hash)
|
80
|
+
results << if block_given? && (obj = yield(hash))
|
81
|
+
obj
|
82
|
+
else
|
83
|
+
hash
|
84
|
+
end
|
88
85
|
end
|
89
86
|
|
90
87
|
def parse_io(io, &block)
|
@@ -98,10 +95,11 @@ module CSVPP
|
|
98
95
|
|
99
96
|
hash = {}
|
100
97
|
format.var_names.each do |var|
|
101
|
-
hash[
|
98
|
+
hash['line_number'] = line_number
|
102
99
|
|
103
100
|
index = format.index(var)
|
104
|
-
value = columns[index]
|
101
|
+
value = columns[index]
|
102
|
+
|
105
103
|
set_value!(hash, var, value)
|
106
104
|
end
|
107
105
|
|
@@ -127,14 +125,14 @@ module CSVPP
|
|
127
125
|
|
128
126
|
# ...and start building a new one.
|
129
127
|
hash = {}
|
130
|
-
hash[
|
128
|
+
hash['line_number'] = line_number
|
131
129
|
end
|
132
130
|
|
133
131
|
next if hash.nil?
|
134
132
|
|
135
133
|
format.vars_for_line(line_id).each do |var|
|
136
134
|
index = format.index(var)
|
137
|
-
value = columns[index]
|
135
|
+
value = columns[index]
|
138
136
|
set_value!(hash, var, value)
|
139
137
|
end
|
140
138
|
end
|
@@ -157,6 +155,5 @@ module CSVPP
|
|
157
155
|
yield(line, index) unless index < offset
|
158
156
|
end
|
159
157
|
end
|
160
|
-
|
161
158
|
end
|
162
159
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sequel'
|
4
|
+
|
5
|
+
module CSVPP
|
6
|
+
# Imports data into an Sqlite database.
|
7
|
+
class SqliteImporter
|
8
|
+
attr_reader :format, :data, :db
|
9
|
+
|
10
|
+
# @param format [Format]
|
11
|
+
# @param db_path [String]
|
12
|
+
def initialize(format:, db_path:)
|
13
|
+
@format = format
|
14
|
+
@db = Sequel.sqlite(db_path)
|
15
|
+
|
16
|
+
@db.drop_table? :data
|
17
|
+
@db.create_table :data do
|
18
|
+
Int :line_number
|
19
|
+
|
20
|
+
format.var_names.each do |var|
|
21
|
+
type = format.type(var)
|
22
|
+
|
23
|
+
if type.start_with?('array')
|
24
|
+
type = 'String'
|
25
|
+
else
|
26
|
+
type.capitalize
|
27
|
+
end
|
28
|
+
|
29
|
+
send(type, var)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
@data = @db[:data]
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param input [String] input string
|
37
|
+
def import(input)
|
38
|
+
Parser.parse_str(
|
39
|
+
input: input,
|
40
|
+
format: format,
|
41
|
+
convert_type: false
|
42
|
+
) do |attr|
|
43
|
+
data.insert(attr)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/csvpp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- SwissDRG AG
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|
@@ -16,14 +16,76 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 3.
|
19
|
+
version: 3.6.2
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 3.
|
26
|
+
version: 3.6.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: gli
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.17.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 2.17.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: httparty
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.16.2
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.16.2
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sqlite3
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 1.3.13
|
65
|
+
type: :runtime
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - "~>"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '1.3'
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 1.3.13
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: sequel
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '5.9'
|
82
|
+
type: :runtime
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '5.9'
|
27
89
|
- !ruby/object:Gem::Dependency
|
28
90
|
name: bundler
|
29
91
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +170,34 @@ dependencies:
|
|
108
170
|
- - ">="
|
109
171
|
- !ruby/object:Gem::Version
|
110
172
|
version: '0'
|
173
|
+
- !ruby/object:Gem::Dependency
|
174
|
+
name: ruby-prof
|
175
|
+
requirement: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - ">="
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: '0'
|
180
|
+
type: :development
|
181
|
+
prerelease: false
|
182
|
+
version_requirements: !ruby/object:Gem::Requirement
|
183
|
+
requirements:
|
184
|
+
- - ">="
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: '0'
|
187
|
+
- !ruby/object:Gem::Dependency
|
188
|
+
name: simplecov
|
189
|
+
requirement: !ruby/object:Gem::Requirement
|
190
|
+
requirements:
|
191
|
+
- - ">="
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '0'
|
194
|
+
type: :development
|
195
|
+
prerelease: false
|
196
|
+
version_requirements: !ruby/object:Gem::Requirement
|
197
|
+
requirements:
|
198
|
+
- - ">="
|
199
|
+
- !ruby/object:Gem::Version
|
200
|
+
version: '0'
|
111
201
|
description: CSV++
|
112
202
|
email:
|
113
203
|
- rathesan.iyadurai@swissdrg.org
|
@@ -118,6 +208,7 @@ extra_rdoc_files: []
|
|
118
208
|
files:
|
119
209
|
- ".gitignore"
|
120
210
|
- ".gitlab-ci.yml"
|
211
|
+
- ".rubocop.yml"
|
121
212
|
- ".ruby-version"
|
122
213
|
- ".travis.yml"
|
123
214
|
- Gemfile
|
@@ -126,13 +217,20 @@ files:
|
|
126
217
|
- README.md
|
127
218
|
- Rakefile
|
128
219
|
- bin/console
|
220
|
+
- bin/profile
|
129
221
|
- bin/setup
|
130
222
|
- csvpp.gemspec
|
223
|
+
- documentation/Technisches_Begleitblatt_2017_d.pdf
|
131
224
|
- exe/csvpp
|
132
225
|
- lib/csvpp.rb
|
226
|
+
- lib/csvpp/cli.rb
|
133
227
|
- lib/csvpp/conversions.rb
|
228
|
+
- lib/csvpp/core_extensions.rb
|
134
229
|
- lib/csvpp/format.rb
|
230
|
+
- lib/csvpp/formats_client.rb
|
231
|
+
- lib/csvpp/os.rb
|
135
232
|
- lib/csvpp/parser.rb
|
233
|
+
- lib/csvpp/sqlite_importer.rb
|
136
234
|
- lib/csvpp/version.rb
|
137
235
|
homepage: https://www.swissdrg.org
|
138
236
|
licenses:
|
@@ -154,7 +252,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
252
|
version: '0'
|
155
253
|
requirements: []
|
156
254
|
rubyforge_project:
|
157
|
-
rubygems_version: 2.6
|
255
|
+
rubygems_version: 2.7.6
|
158
256
|
signing_key:
|
159
257
|
specification_version: 4
|
160
258
|
summary: CSV++
|