csvpp 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.gitlab-ci.yml +1 -1
- data/.rubocop.yml +78 -0
- data/.ruby-version +1 -1
- data/Gemfile.lock +24 -4
- data/README.md +28 -29
- data/bin/profile +19 -0
- data/csvpp.gemspec +7 -1
- data/documentation/Technisches_Begleitblatt_2017_d.pdf +0 -0
- data/exe/csvpp +2 -27
- data/lib/csvpp.rb +36 -8
- data/lib/csvpp/cli.rb +80 -0
- data/lib/csvpp/conversions.rb +60 -6
- data/lib/csvpp/core_extensions.rb +41 -0
- data/lib/csvpp/format.rb +24 -33
- data/lib/csvpp/formats_client.rb +37 -0
- data/lib/csvpp/os.rb +50 -0
- data/lib/csvpp/parser.rb +30 -33
- data/lib/csvpp/sqlite_importer.rb +47 -0
- data/lib/csvpp/version.rb +1 -1
- metadata +103 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f70243c72416b66f569849a07a20845019889f990cfb3b6ad3859886b0c5a387
|
4
|
+
data.tar.gz: da8124bf9974a3ebc0855c9a799e8b17c55c139879c46600c9597712af7c4a61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f33f39b8ef836fd0fa54af8f5a185095bb0daabd060476c1f71c790b28606e3009e2a1aa69ba2ffdd0652ce9a84ce007e5d5c13757afd7d209868bd52b932402
|
7
|
+
data.tar.gz: 907a4e1d8fc6a1933629632a4fc5a2ac724f6d3f1498e769639e598ca6b749e9c03a4ab1d0099c56d5e89491a788e0d5c91ac27dd2716d7c9a6449e49f7bda41
|
data/.gitignore
CHANGED
data/.gitlab-ci.yml
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
AllCops:
|
2
|
+
DisplayCopNames: true
|
3
|
+
DisplayStyleGuide: true
|
4
|
+
TargetRubyVersion: 2.5
|
5
|
+
Include:
|
6
|
+
- Rakefile
|
7
|
+
- config.ru
|
8
|
+
Exclude:
|
9
|
+
- Gemfile
|
10
|
+
- csvpp.gemspec
|
11
|
+
|
12
|
+
Metrics/LineLength:
|
13
|
+
Max: 80
|
14
|
+
Exclude:
|
15
|
+
- test/**/**
|
16
|
+
|
17
|
+
Style/ClassAndModuleChildren:
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
Style/PercentLiteralDelimiters:
|
21
|
+
Enabled: false
|
22
|
+
|
23
|
+
Style/NumericLiteralPrefix:
|
24
|
+
Enabled: false
|
25
|
+
|
26
|
+
Style/Documentation:
|
27
|
+
Enabled: false
|
28
|
+
|
29
|
+
Style/EmptyMethod:
|
30
|
+
Enabled: false
|
31
|
+
|
32
|
+
Style/DoubleNegation:
|
33
|
+
Enabled: false
|
34
|
+
|
35
|
+
Style/WordArray:
|
36
|
+
Exclude:
|
37
|
+
- test/**/*
|
38
|
+
|
39
|
+
Metrics/ClassLength:
|
40
|
+
Enabled: false
|
41
|
+
|
42
|
+
Metrics/ModuleLength:
|
43
|
+
Enabled: false
|
44
|
+
|
45
|
+
Metrics/MethodLength:
|
46
|
+
Enabled: false
|
47
|
+
|
48
|
+
Metrics/BlockLength:
|
49
|
+
Enabled: false
|
50
|
+
|
51
|
+
Metrics/CyclomaticComplexity:
|
52
|
+
Enabled: false
|
53
|
+
|
54
|
+
Metrics/PerceivedComplexity:
|
55
|
+
Enabled: false
|
56
|
+
|
57
|
+
Metrics/AbcSize:
|
58
|
+
Enabled: false
|
59
|
+
|
60
|
+
Naming/HeredocDelimiterNaming:
|
61
|
+
Enabled: false
|
62
|
+
|
63
|
+
Layout/MultilineMethodCallIndentation:
|
64
|
+
EnforcedStyle: indented_relative_to_receiver
|
65
|
+
|
66
|
+
Layout/AlignHash:
|
67
|
+
Exclude:
|
68
|
+
- lib/csvpp/cli.rb
|
69
|
+
|
70
|
+
Lint/UnusedBlockArgument:
|
71
|
+
Exclude:
|
72
|
+
- lib/csvpp/cli.rb
|
73
|
+
- lib/csvpp/conversions.rb
|
74
|
+
|
75
|
+
Lint/UnusedMethodArgument:
|
76
|
+
Exclude:
|
77
|
+
- lib/csvpp/cli.rb
|
78
|
+
- lib/csvpp/conversions.rb
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.5.1
|
data/Gemfile.lock
CHANGED
@@ -1,17 +1,27 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
csvpp (0.
|
5
|
-
|
4
|
+
csvpp (0.4.0)
|
5
|
+
gli (~> 2.17.1)
|
6
|
+
httparty (~> 0.16.2)
|
7
|
+
oj (~> 3.6.2)
|
8
|
+
sequel (~> 5.9)
|
9
|
+
sqlite3 (~> 1.3, >= 1.3.13)
|
6
10
|
|
7
11
|
GEM
|
8
12
|
remote: https://rubygems.org/
|
9
13
|
specs:
|
10
14
|
byebug (9.1.0)
|
11
15
|
coderay (1.1.2)
|
16
|
+
docile (1.3.1)
|
17
|
+
gli (2.17.1)
|
18
|
+
httparty (0.16.2)
|
19
|
+
multi_xml (>= 0.5.2)
|
20
|
+
json (2.1.0)
|
12
21
|
method_source (0.9.0)
|
13
22
|
minitest (5.10.3)
|
14
|
-
|
23
|
+
multi_xml (0.6.0)
|
24
|
+
oj (3.6.3)
|
15
25
|
pry (0.11.3)
|
16
26
|
coderay (~> 1.1.0)
|
17
27
|
method_source (~> 0.9.0)
|
@@ -22,6 +32,14 @@ GEM
|
|
22
32
|
pry (~> 0.9)
|
23
33
|
yard (~> 0.9)
|
24
34
|
rake (12.3.0)
|
35
|
+
ruby-prof (0.16.2)
|
36
|
+
sequel (5.9.0)
|
37
|
+
simplecov (0.16.1)
|
38
|
+
docile (~> 1.1)
|
39
|
+
json (>= 1.8, < 3)
|
40
|
+
simplecov-html (~> 0.10.0)
|
41
|
+
simplecov-html (0.10.2)
|
42
|
+
sqlite3 (1.3.13)
|
25
43
|
yard (0.9.11)
|
26
44
|
|
27
45
|
PLATFORMS
|
@@ -35,6 +53,8 @@ DEPENDENCIES
|
|
35
53
|
pry-byebug
|
36
54
|
pry-doc
|
37
55
|
rake
|
56
|
+
ruby-prof
|
57
|
+
simplecov
|
38
58
|
|
39
59
|
BUNDLED WITH
|
40
|
-
1.
|
60
|
+
1.16.2
|
data/README.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# CSV++
|
2
2
|
|
3
|
+

|
4
|
+

|
5
|
+
|
3
6
|
CSV++ takes a `<DELIMITER>` separated input file and a JSON format specification
|
4
7
|
and turns it into Ruby Objects. See `test/sample_inputs/simple.txt` and
|
5
8
|
`test/sample_formats/simple.json` for example.
|
@@ -24,6 +27,15 @@ Or install it yourself as:
|
|
24
27
|
$ gem install csvpp
|
25
28
|
```
|
26
29
|
|
30
|
+
### For development of CSV++
|
31
|
+
|
32
|
+
First make sure that you have bundler and the Ruby version installed that is
|
33
|
+
specified in `.ruby-version`. Then run:
|
34
|
+
|
35
|
+
```
|
36
|
+
$ bundle
|
37
|
+
```
|
38
|
+
|
27
39
|
## Usage
|
28
40
|
|
29
41
|
```ruby
|
@@ -35,39 +47,26 @@ CSVPP.parse(
|
|
35
47
|
|
36
48
|
### CLI
|
37
49
|
|
38
|
-
CSV++ comes with a CLI
|
39
|
-
|
50
|
+
CSV++ comes with a CLI that speaks with the [Formats API](http://formats.iapps.swissdrg.local/api/formats).
|
51
|
+
|
52
|
+
To print a list of known formats run:
|
40
53
|
|
41
54
|
```
|
42
|
-
$ csvpp
|
43
|
-
|
44
|
-
{
|
45
|
-
"vars": [
|
46
|
-
{
|
47
|
-
"line_number": 1,
|
48
|
-
"v1": 34,
|
49
|
-
"v2": "foobar",
|
50
|
-
"v3": 1.1,
|
51
|
-
"v4": false
|
52
|
-
},
|
53
|
-
{
|
54
|
-
"line_number": 2,
|
55
|
-
"v1": 99,
|
56
|
-
"v2": "hi there",
|
57
|
-
"v3": 2.2,
|
58
|
-
"v4": true
|
59
|
-
},
|
60
|
-
{
|
61
|
-
"line_number": 3,
|
62
|
-
"v1": null,
|
63
|
-
"v2": "Missing",
|
64
|
-
"v3": null,
|
65
|
-
"v4": true
|
66
|
-
}
|
67
|
-
]
|
68
|
-
}
|
55
|
+
$ csvpp formats
|
69
56
|
```
|
70
57
|
|
58
|
+
The `parse` command can be used to parse an input file with a given format,
|
59
|
+
either fetched from the API or from a local JSON specification file.
|
60
|
+
|
61
|
+
For example, to import some REKOLE cost data to a local sqlite DB for querying,
|
62
|
+
run:
|
63
|
+
|
64
|
+
```
|
65
|
+
$ csvpp parse --format fk_2017 --output costs.db test/sample_inputs/fk_2017.txt
|
66
|
+
```
|
67
|
+
|
68
|
+
Run `csvpp help parse` for more details.
|
69
|
+
|
71
70
|
## Development
|
72
71
|
|
73
72
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
data/bin/profile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'csvpp'
|
6
|
+
|
7
|
+
require 'ruby-prof'
|
8
|
+
|
9
|
+
f = CSVPP::Format.load(ARGV[0])
|
10
|
+
result = RubyProf.profile do
|
11
|
+
CSVPP::Parser.parse(
|
12
|
+
input: ARGV[1],
|
13
|
+
format: f,
|
14
|
+
convert_type: false
|
15
|
+
)
|
16
|
+
end
|
17
|
+
|
18
|
+
printer = RubyProf::FlatPrinter.new(result)
|
19
|
+
printer.print(STDOUT)
|
data/csvpp.gemspec
CHANGED
@@ -30,7 +30,11 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ["lib"]
|
32
32
|
|
33
|
-
spec.add_dependency 'oj', '~> 3.
|
33
|
+
spec.add_dependency 'oj', '~> 3.6.2'
|
34
|
+
spec.add_dependency 'gli', '~> 2.17.1'
|
35
|
+
spec.add_dependency 'httparty', '~> 0.16.2'
|
36
|
+
spec.add_dependency 'sqlite3', '~> 1.3', '>= 1.3.13'
|
37
|
+
spec.add_dependency 'sequel', '~> 5.9'
|
34
38
|
|
35
39
|
spec.add_development_dependency "bundler"
|
36
40
|
spec.add_development_dependency "rake"
|
@@ -38,4 +42,6 @@ Gem::Specification.new do |spec|
|
|
38
42
|
spec.add_development_dependency "pry"
|
39
43
|
spec.add_development_dependency "pry-byebug"
|
40
44
|
spec.add_development_dependency "pry-doc"
|
45
|
+
spec.add_development_dependency "ruby-prof"
|
46
|
+
spec.add_development_dependency "simplecov"
|
41
47
|
end
|
Binary file
|
data/exe/csvpp
CHANGED
@@ -1,29 +1,4 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
$options = {}
|
7
|
-
option_parser = OptionParser.new do |opts|
|
8
|
-
opts.on('-f', '--format FORMAT') do |format|
|
9
|
-
$options[:format] = format.strip
|
10
|
-
end
|
11
|
-
|
12
|
-
opts.on_tail('-h', '--help', 'Show this message') do
|
13
|
-
puts opts
|
14
|
-
exit
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
HELP = option_parser.help
|
19
|
-
option_parser.parse!
|
20
|
-
|
21
|
-
format = $options[:format]
|
22
|
-
|
23
|
-
json = CSVPP.json(
|
24
|
-
input: ARGF.read,
|
25
|
-
format: File.read(format)
|
26
|
-
)
|
27
|
-
|
28
|
-
# Forgive me Matz for I have sinned
|
29
|
-
system %{echo '#{json}' | python -m json.tool}
|
3
|
+
require_relative '../lib/csvpp/cli'
|
4
|
+
exit CSVPP::CLI.run(ARGV)
|
data/lib/csvpp.rb
CHANGED
@@ -1,49 +1,77 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'oj'
|
4
|
+
Oj.default_options = { mode: :compat }
|
2
5
|
|
3
6
|
require_relative './csvpp/version'
|
7
|
+
require_relative './csvpp/os'
|
4
8
|
require_relative './csvpp/conversions'
|
5
9
|
require_relative './csvpp/format'
|
6
10
|
require_relative './csvpp/parser'
|
11
|
+
require_relative './csvpp/sqlite_importer'
|
12
|
+
require_relative './csvpp/formats_client'
|
7
13
|
|
8
14
|
module CSVPP
|
9
|
-
|
10
15
|
DEFAULT_COL_SEP = '|'
|
11
16
|
|
12
17
|
# @param input [String] path to input file
|
13
|
-
# @param format [String] path to format file
|
18
|
+
# @param format [String, Format] path to format file
|
14
19
|
# @param col_sep [String]
|
15
20
|
#
|
16
21
|
# @return [Array<Object>]
|
17
|
-
def self.parse(input:,
|
22
|
+
def self.parse(input:,
|
23
|
+
format:,
|
24
|
+
col_sep: DEFAULT_COL_SEP,
|
25
|
+
convert_type: true,
|
26
|
+
&block)
|
27
|
+
|
18
28
|
Parser.parse(
|
19
29
|
input: input,
|
20
30
|
format: Format.load(format),
|
21
31
|
col_sep: col_sep,
|
32
|
+
convert_type: convert_type,
|
22
33
|
&block
|
23
34
|
)
|
24
35
|
end
|
25
36
|
|
26
37
|
# @param input [String] input string
|
27
|
-
# @param format [String] format string
|
38
|
+
# @param format [String, Format] format string
|
28
39
|
# @param col_sep [String]
|
29
40
|
#
|
30
41
|
# @return [Array<Object>]
|
31
|
-
def self.parse_str(input:,
|
42
|
+
def self.parse_str(input:,
|
43
|
+
format:,
|
44
|
+
col_sep: DEFAULT_COL_SEP,
|
45
|
+
convert_type: true,
|
46
|
+
&block)
|
47
|
+
|
32
48
|
Parser.parse_str(
|
33
49
|
input: input,
|
34
50
|
format: Format.load_from_str(format),
|
35
51
|
col_sep: col_sep,
|
52
|
+
convert_type: convert_type,
|
36
53
|
&block
|
37
54
|
)
|
38
55
|
end
|
39
56
|
|
40
57
|
# @param input [String] input string
|
41
|
-
# @param format [String] format string
|
58
|
+
# @param format [String, Format] format string
|
42
59
|
# @param col_sep [String]
|
43
60
|
#
|
44
61
|
# @return [String]
|
45
|
-
def self.json(input:,
|
46
|
-
|
62
|
+
def self.json(input:,
|
63
|
+
format:,
|
64
|
+
convert_type: true,
|
65
|
+
col_sep: DEFAULT_COL_SEP)
|
66
|
+
h = {
|
67
|
+
'vars' => parse_str(
|
68
|
+
input: input,
|
69
|
+
format: format,
|
70
|
+
convert_type: convert_type,
|
71
|
+
col_sep: col_sep
|
72
|
+
)
|
73
|
+
}
|
74
|
+
|
47
75
|
Oj.dump(h)
|
48
76
|
end
|
49
77
|
end
|
data/lib/csvpp/cli.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'gli'
|
4
|
+
require_relative '../csvpp'
|
5
|
+
require_relative '../csvpp/core_extensions'
|
6
|
+
|
7
|
+
module CSVPP
|
8
|
+
# CSV++ command line interface.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# exit CSVPP::CLI.run(ARGV)
|
12
|
+
#
|
13
|
+
class CLI
|
14
|
+
using CoreExtensions
|
15
|
+
extend GLI::App
|
16
|
+
|
17
|
+
CLIENT = FormatsClient.new
|
18
|
+
|
19
|
+
program_desc 'CSV++ Command Line Interface'
|
20
|
+
|
21
|
+
desc 'Parse input files'
|
22
|
+
command :parse do |c|
|
23
|
+
c.switch %i[convert-types], default_value: true, negatable: true
|
24
|
+
c.switch %i[open], default_value: true, negatable: true,
|
25
|
+
desc: 'Whether to open the output with the default application'
|
26
|
+
|
27
|
+
c.flag %i[f format], required: true,
|
28
|
+
desc: 'Format identifier or local file path to a JSON format'
|
29
|
+
|
30
|
+
c.flag %i[o output], required: true, desc: 'Output file'
|
31
|
+
c.flag %i[s separator], default_value: '|'
|
32
|
+
c.flag %i[open-cmd], default_value: OS.open_cmd
|
33
|
+
|
34
|
+
c.action do |global_options, options, args|
|
35
|
+
format = if File.exist?(options[:format])
|
36
|
+
Format.load(options[:format])
|
37
|
+
else
|
38
|
+
CLIENT.format(options[:format])
|
39
|
+
end
|
40
|
+
|
41
|
+
convert_type = options[:'convert-types']
|
42
|
+
output = options[:output].strip
|
43
|
+
|
44
|
+
case output
|
45
|
+
when /\.db$/
|
46
|
+
importer = SqliteImporter.new(
|
47
|
+
format: format,
|
48
|
+
db_path: output
|
49
|
+
)
|
50
|
+
importer.import(ARGF.read)
|
51
|
+
when /\.json$/
|
52
|
+
File.open(output, 'w') do |file|
|
53
|
+
file.puts CSVPP.json(
|
54
|
+
input: ARGF.read,
|
55
|
+
format: format,
|
56
|
+
convert_type: convert_type
|
57
|
+
)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
OS.open(output, open_cmd: options[:'open-cmd']) if options[:open]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
desc 'List formats'
|
66
|
+
command :formats do |c|
|
67
|
+
c.switch %i[web], default: false,
|
68
|
+
desc: 'Whether to view the formats in the web app'
|
69
|
+
|
70
|
+
c.action do |global_options, options, args|
|
71
|
+
if options[:web]
|
72
|
+
OS.open(CLIENT.base_uri)
|
73
|
+
next
|
74
|
+
end
|
75
|
+
|
76
|
+
puts CLIENT.formats.map(&:to_s)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
data/lib/csvpp/conversions.rb
CHANGED
@@ -4,19 +4,70 @@ module CSVPP
|
|
4
4
|
module Conversions
|
5
5
|
module_function
|
6
6
|
|
7
|
+
ARRAY_TYPE_RGX = /(?<array_type>\w+),\s*(?<array_delimiter>\W)/
|
8
|
+
|
7
9
|
# @param obj [Object] object to parse
|
8
10
|
# @param to [String] a type, e.g. "int"
|
9
|
-
# @missings [Array] list of values that are treated as missings,
|
10
|
-
#
|
11
|
+
# @param missings [Array] list of values that are treated as missings,
|
12
|
+
# e.g. ['NA', '-', -999]
|
13
|
+
# @param options [Hash] options passed on to parsing methods for specific types
|
11
14
|
# @return parsed value, read from `obj`, interpreted as type given by `to`
|
12
15
|
def convert(obj, to:, missings: [], **options)
|
13
16
|
return nil if missing?(obj, missings)
|
14
17
|
|
18
|
+
if to.start_with?('array')
|
19
|
+
to, rest = to.split('<')
|
20
|
+
rest = rest.tr('>', '')
|
21
|
+
match = rest.match(ARRAY_TYPE_RGX)
|
22
|
+
options = options.merge(
|
23
|
+
type: match[:array_type],
|
24
|
+
delimiter: match[:array_delimiter]
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
15
28
|
send("parse_#{to}", obj, **options)
|
16
29
|
end
|
17
30
|
|
31
|
+
def parse_array(str, type:, delimiter:, **options)
|
32
|
+
str.split(delimiter).map { |entry| send("parse_#{type}", entry) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_chop(str, delimiter: ':', **options)
|
36
|
+
code, laterality, date = str.split(delimiter)
|
37
|
+
code = parse_string(code)
|
38
|
+
laterality = parse_string(laterality) if laterality
|
39
|
+
laterality = nil if laterality&.empty?
|
40
|
+
date = parse_date(date) if date
|
41
|
+
|
42
|
+
{
|
43
|
+
code: code,
|
44
|
+
laterality: laterality,
|
45
|
+
date: date
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
# See page 3 in documentation/Technisches_Begleitblatt_2017_d.pdf more more
|
50
|
+
# info on the medi data type.
|
51
|
+
def parse_medi(str, delimiter: ':', **options)
|
52
|
+
atc_code, annex, application, dose, unit = str.split(delimiter)
|
53
|
+
atc_code = parse_string(atc_code)
|
54
|
+
annex = parse_string(annex) if annex
|
55
|
+
annex = nil if annex&.empty?
|
56
|
+
application = parse_string(application)
|
57
|
+
dose = parse_decimal(dose)
|
58
|
+
unit = parse_string(unit)
|
59
|
+
|
60
|
+
{
|
61
|
+
atc_code: atc_code,
|
62
|
+
annex: annex,
|
63
|
+
application: application,
|
64
|
+
dose: dose,
|
65
|
+
unit: unit
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
18
69
|
def parse_string(str, **options)
|
19
|
-
str.to_s
|
70
|
+
str.to_s.strip
|
20
71
|
end
|
21
72
|
|
22
73
|
def parse_int(str, **options)
|
@@ -40,7 +91,7 @@ module CSVPP
|
|
40
91
|
Float(clean_decimal(str)) rescue nil
|
41
92
|
end
|
42
93
|
|
43
|
-
def parse_decimal(str)
|
94
|
+
def parse_decimal(str, **options)
|
44
95
|
return nil if str.to_s.empty?
|
45
96
|
|
46
97
|
cleaned = clean_decimal(str).to_s
|
@@ -50,7 +101,6 @@ module CSVPP
|
|
50
101
|
else
|
51
102
|
BigDecimal(cleaned)
|
52
103
|
end
|
53
|
-
|
54
104
|
end
|
55
105
|
|
56
106
|
def parse_date(str, **options)
|
@@ -61,7 +111,11 @@ module CSVPP
|
|
61
111
|
# @param false_values [Array]: list of values that are interpreted as `false`
|
62
112
|
# @return true or false, or
|
63
113
|
# nil if `str` doesn't match any value interpreted as `true` or `false`
|
64
|
-
def parse_boolean(str,
|
114
|
+
def parse_boolean(str,
|
115
|
+
true_values: [],
|
116
|
+
false_values: [],
|
117
|
+
**options)
|
118
|
+
|
65
119
|
cleaned = str.to_s.strip.downcase
|
66
120
|
|
67
121
|
trues = if true_values.empty?
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CSVPP
|
4
|
+
# Add core extensions here as refinements. Only load them when necessary, so
|
5
|
+
# that we don't monkeypatch applications that load CSV++.
|
6
|
+
#
|
7
|
+
# @example Loading the core extensions to the current lexical scope
|
8
|
+
#
|
9
|
+
# module MyScope
|
10
|
+
# using CSVPP::CoreExtensions
|
11
|
+
#
|
12
|
+
# # Extensions exist here
|
13
|
+
#
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# # Extensions no longer exist here
|
17
|
+
#
|
18
|
+
module CoreExtensions
|
19
|
+
refine String do
|
20
|
+
def colorize(color_code)
|
21
|
+
"\e[#{color_code}m#{self}\e[0m"
|
22
|
+
end
|
23
|
+
|
24
|
+
def green
|
25
|
+
colorize 32
|
26
|
+
end
|
27
|
+
|
28
|
+
def blue
|
29
|
+
colorize 34
|
30
|
+
end
|
31
|
+
|
32
|
+
def pink
|
33
|
+
colorize 35
|
34
|
+
end
|
35
|
+
|
36
|
+
def yellow
|
37
|
+
colorize 33
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/csvpp/format.rb
CHANGED
@@ -1,44 +1,27 @@
|
|
1
1
|
module CSVPP
|
2
2
|
class Format
|
3
|
-
attr_reader :name, :skip
|
3
|
+
attr_reader :name, :description, :skip, :col_sep
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# @param name [String] unique name of the format
|
13
|
-
def find(name)
|
14
|
-
store.fetch(name)
|
15
|
-
end
|
16
|
-
|
17
|
-
# @param path [String] path to format file
|
18
|
-
# @return [Format]
|
19
|
-
def load(path)
|
20
|
-
load_from_str File.read(path)
|
21
|
-
end
|
22
|
-
|
23
|
-
# @param json [String]
|
24
|
-
# @return [Format]
|
25
|
-
def load_from_str(json)
|
26
|
-
new Oj.load(json)
|
27
|
-
end
|
28
|
-
|
29
|
-
def all
|
30
|
-
store.values
|
31
|
-
end
|
5
|
+
# @param path [String] path to format file
|
6
|
+
# @return [Format]
|
7
|
+
def self.load(path)
|
8
|
+
return path if path.is_a? Format
|
9
|
+
load_from_str File.read(path)
|
10
|
+
end
|
32
11
|
|
33
|
-
|
34
|
-
|
35
|
-
|
12
|
+
# @param json [String]
|
13
|
+
# @return [Format]
|
14
|
+
def self.load_from_str(json)
|
15
|
+
return json if json.is_a? Format
|
16
|
+
new Oj.load(json)
|
36
17
|
end
|
37
18
|
|
38
19
|
# @param format [Hash]
|
39
20
|
def initialize(format)
|
40
21
|
@name = format['name']
|
22
|
+
@description = format['description']
|
41
23
|
@multiline = format['multiline'].to_s.strip.downcase == 'true'
|
24
|
+
@col_sep = format['column_separator']
|
42
25
|
@skip = format['skip'].to_i
|
43
26
|
@vars = format.fetch('vars')
|
44
27
|
|
@@ -51,6 +34,11 @@ module CSVPP
|
|
51
34
|
|
52
35
|
@multiline_start = format.fetch('start')
|
53
36
|
end
|
37
|
+
|
38
|
+
# Cache for actual indices because formats provide 1-based human readable
|
39
|
+
# positions. Only matters when parsing files with 30k+ line files. See
|
40
|
+
# #index(var).
|
41
|
+
@indices = {}
|
54
42
|
end
|
55
43
|
|
56
44
|
def var_names
|
@@ -62,7 +50,7 @@ module CSVPP
|
|
62
50
|
end
|
63
51
|
|
64
52
|
def index(var)
|
65
|
-
position(var) - 1
|
53
|
+
@indices[var] ||= position(var) - 1
|
66
54
|
end
|
67
55
|
|
68
56
|
def position(var)
|
@@ -87,7 +75,6 @@ module CSVPP
|
|
87
75
|
array_from(var, 'true_values')
|
88
76
|
end
|
89
77
|
|
90
|
-
|
91
78
|
# Returns the values that are defined as `false` in the the format's json
|
92
79
|
# definition for the given variable.
|
93
80
|
# @return [Array] all values that should be interpreted as `false` for this variable
|
@@ -108,6 +95,10 @@ module CSVPP
|
|
108
95
|
@multiline
|
109
96
|
end
|
110
97
|
|
98
|
+
def to_s
|
99
|
+
"#{name.ljust(30)}\t| #{description}"
|
100
|
+
end
|
101
|
+
|
111
102
|
private
|
112
103
|
|
113
104
|
# Returns the value or values specified for the given attribute of the given
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'httparty'
|
4
|
+
|
5
|
+
module CSVPP
|
6
|
+
class FormatsClient
|
7
|
+
include HTTParty
|
8
|
+
|
9
|
+
DEFAULT_HOST = 'http://formats.iapps.swissdrg.local'
|
10
|
+
|
11
|
+
def initialize(host: DEFAULT_HOST)
|
12
|
+
self.class.base_uri ENV['FORMATS_HOST'] || host
|
13
|
+
end
|
14
|
+
|
15
|
+
# @return [String] e.g. "http://formats.iapps.swissdrg.local"
|
16
|
+
def base_uri
|
17
|
+
self.class.base_uri
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Array<Format>]
|
21
|
+
def formats
|
22
|
+
self.class.get('/api/formats').map { |hash| Format.new(hash) }
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param name [String]
|
26
|
+
# @return [Format]
|
27
|
+
def format(name)
|
28
|
+
response = self.class.get("/api/formats/#{name}")
|
29
|
+
|
30
|
+
if (error = response['error'])
|
31
|
+
raise ArgumentError, %{#{error} "#{name}"}
|
32
|
+
end
|
33
|
+
|
34
|
+
Format.new(response)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/csvpp/os.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CSVPP
|
4
|
+
# Provides utility functions for determining OS and OS-specific system calls.
|
5
|
+
module OS
|
6
|
+
module_function
|
7
|
+
|
8
|
+
# http://stackoverflow.com/a/171011/1314848.
|
9
|
+
def windows?
|
10
|
+
!!(/cygwin|mswin|mingw|bccwin|wince|emx/ =~ ruby_platform)
|
11
|
+
end
|
12
|
+
|
13
|
+
def unix?
|
14
|
+
!windows?
|
15
|
+
end
|
16
|
+
|
17
|
+
def mac?
|
18
|
+
!!(/darwin/ =~ ruby_platform)
|
19
|
+
end
|
20
|
+
|
21
|
+
def linux?
|
22
|
+
unix? && !mac?
|
23
|
+
end
|
24
|
+
|
25
|
+
def open(str, open_cmd: self.open_cmd)
|
26
|
+
system "#{open_cmd} #{str}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def open_cmd
|
30
|
+
if mac?
|
31
|
+
'open'
|
32
|
+
elsif linux?
|
33
|
+
'xdg-open'
|
34
|
+
elsif windows?
|
35
|
+
'START ""'
|
36
|
+
else
|
37
|
+
raise 'Unsupported OS'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def pager
|
42
|
+
return nil if windows?
|
43
|
+
ENV['PAGER'] || 'less'
|
44
|
+
end
|
45
|
+
|
46
|
+
def ruby_platform
|
47
|
+
RUBY_PLATFORM
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/csvpp/parser.rb
CHANGED
@@ -18,7 +18,7 @@ module CSVPP
|
|
18
18
|
new(
|
19
19
|
format: format,
|
20
20
|
col_sep: col_sep,
|
21
|
-
convert_type: convert_type
|
21
|
+
convert_type: convert_type
|
22
22
|
).parse(input, &block)
|
23
23
|
end
|
24
24
|
|
@@ -28,28 +28,24 @@ module CSVPP
|
|
28
28
|
#
|
29
29
|
# @return [Array<Object>]
|
30
30
|
def self.parse_str(input:,
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
format:,
|
32
|
+
col_sep: DEFAULT_COL_SEP,
|
33
|
+
convert_type: true,
|
34
|
+
&block)
|
35
35
|
|
36
36
|
new(
|
37
37
|
format: format,
|
38
38
|
col_sep: col_sep,
|
39
|
-
convert_type: convert_type
|
39
|
+
convert_type: convert_type
|
40
40
|
).parse_str(input, &block)
|
41
41
|
end
|
42
42
|
|
43
43
|
def initialize(format:, col_sep: DEFAULT_COL_SEP, convert_type: true)
|
44
44
|
@format = format
|
45
|
-
@col_sep = col_sep
|
45
|
+
@col_sep = format.col_sep || col_sep
|
46
46
|
@convert_type = convert_type
|
47
47
|
end
|
48
48
|
|
49
|
-
def convert_type?
|
50
|
-
!!@convert_type
|
51
|
-
end
|
52
|
-
|
53
49
|
def parse(path, &block)
|
54
50
|
parse_io(File.open(path), &block)
|
55
51
|
end
|
@@ -66,25 +62,26 @@ module CSVPP
|
|
66
62
|
|
67
63
|
def set_value!(hash, var, value)
|
68
64
|
hash[var] = value
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
65
|
+
return unless @convert_type
|
66
|
+
|
67
|
+
type = format.type(var)
|
68
|
+
return if type.nil?
|
69
|
+
|
70
|
+
hash[var] = convert(
|
71
|
+
value,
|
72
|
+
to: type,
|
73
|
+
missings: format.missings(var),
|
74
|
+
true_values: format.true_values(var),
|
75
|
+
false_values: format.false_values(var)
|
76
|
+
)
|
80
77
|
end
|
81
78
|
|
82
|
-
def add_result!(results, hash
|
83
|
-
if block_given? && (obj =
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
79
|
+
def add_result!(results, hash)
|
80
|
+
results << if block_given? && (obj = yield(hash))
|
81
|
+
obj
|
82
|
+
else
|
83
|
+
hash
|
84
|
+
end
|
88
85
|
end
|
89
86
|
|
90
87
|
def parse_io(io, &block)
|
@@ -98,10 +95,11 @@ module CSVPP
|
|
98
95
|
|
99
96
|
hash = {}
|
100
97
|
format.var_names.each do |var|
|
101
|
-
hash[
|
98
|
+
hash['line_number'] = line_number
|
102
99
|
|
103
100
|
index = format.index(var)
|
104
|
-
value = columns[index]
|
101
|
+
value = columns[index]
|
102
|
+
|
105
103
|
set_value!(hash, var, value)
|
106
104
|
end
|
107
105
|
|
@@ -127,14 +125,14 @@ module CSVPP
|
|
127
125
|
|
128
126
|
# ...and start building a new one.
|
129
127
|
hash = {}
|
130
|
-
hash[
|
128
|
+
hash['line_number'] = line_number
|
131
129
|
end
|
132
130
|
|
133
131
|
next if hash.nil?
|
134
132
|
|
135
133
|
format.vars_for_line(line_id).each do |var|
|
136
134
|
index = format.index(var)
|
137
|
-
value = columns[index]
|
135
|
+
value = columns[index]
|
138
136
|
set_value!(hash, var, value)
|
139
137
|
end
|
140
138
|
end
|
@@ -157,6 +155,5 @@ module CSVPP
|
|
157
155
|
yield(line, index) unless index < offset
|
158
156
|
end
|
159
157
|
end
|
160
|
-
|
161
158
|
end
|
162
159
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sequel'
|
4
|
+
|
5
|
+
module CSVPP
|
6
|
+
# Imports data into an Sqlite database.
|
7
|
+
class SqliteImporter
|
8
|
+
attr_reader :format, :data, :db
|
9
|
+
|
10
|
+
# @param format [Format]
|
11
|
+
# @param db_path [String]
|
12
|
+
def initialize(format:, db_path:)
|
13
|
+
@format = format
|
14
|
+
@db = Sequel.sqlite(db_path)
|
15
|
+
|
16
|
+
@db.drop_table? :data
|
17
|
+
@db.create_table :data do
|
18
|
+
Int :line_number
|
19
|
+
|
20
|
+
format.var_names.each do |var|
|
21
|
+
type = format.type(var)
|
22
|
+
|
23
|
+
if type.start_with?('array')
|
24
|
+
type = 'String'
|
25
|
+
else
|
26
|
+
type.capitalize
|
27
|
+
end
|
28
|
+
|
29
|
+
send(type, var)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
@data = @db[:data]
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param input [String] input string
|
37
|
+
def import(input)
|
38
|
+
Parser.parse_str(
|
39
|
+
input: input,
|
40
|
+
format: format,
|
41
|
+
convert_type: false
|
42
|
+
) do |attr|
|
43
|
+
data.insert(attr)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/csvpp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- SwissDRG AG
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|
@@ -16,14 +16,76 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 3.
|
19
|
+
version: 3.6.2
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 3.
|
26
|
+
version: 3.6.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: gli
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.17.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 2.17.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: httparty
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.16.2
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.16.2
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sqlite3
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 1.3.13
|
65
|
+
type: :runtime
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - "~>"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '1.3'
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 1.3.13
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: sequel
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '5.9'
|
82
|
+
type: :runtime
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '5.9'
|
27
89
|
- !ruby/object:Gem::Dependency
|
28
90
|
name: bundler
|
29
91
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +170,34 @@ dependencies:
|
|
108
170
|
- - ">="
|
109
171
|
- !ruby/object:Gem::Version
|
110
172
|
version: '0'
|
173
|
+
- !ruby/object:Gem::Dependency
|
174
|
+
name: ruby-prof
|
175
|
+
requirement: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - ">="
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: '0'
|
180
|
+
type: :development
|
181
|
+
prerelease: false
|
182
|
+
version_requirements: !ruby/object:Gem::Requirement
|
183
|
+
requirements:
|
184
|
+
- - ">="
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: '0'
|
187
|
+
- !ruby/object:Gem::Dependency
|
188
|
+
name: simplecov
|
189
|
+
requirement: !ruby/object:Gem::Requirement
|
190
|
+
requirements:
|
191
|
+
- - ">="
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '0'
|
194
|
+
type: :development
|
195
|
+
prerelease: false
|
196
|
+
version_requirements: !ruby/object:Gem::Requirement
|
197
|
+
requirements:
|
198
|
+
- - ">="
|
199
|
+
- !ruby/object:Gem::Version
|
200
|
+
version: '0'
|
111
201
|
description: CSV++
|
112
202
|
email:
|
113
203
|
- rathesan.iyadurai@swissdrg.org
|
@@ -118,6 +208,7 @@ extra_rdoc_files: []
|
|
118
208
|
files:
|
119
209
|
- ".gitignore"
|
120
210
|
- ".gitlab-ci.yml"
|
211
|
+
- ".rubocop.yml"
|
121
212
|
- ".ruby-version"
|
122
213
|
- ".travis.yml"
|
123
214
|
- Gemfile
|
@@ -126,13 +217,20 @@ files:
|
|
126
217
|
- README.md
|
127
218
|
- Rakefile
|
128
219
|
- bin/console
|
220
|
+
- bin/profile
|
129
221
|
- bin/setup
|
130
222
|
- csvpp.gemspec
|
223
|
+
- documentation/Technisches_Begleitblatt_2017_d.pdf
|
131
224
|
- exe/csvpp
|
132
225
|
- lib/csvpp.rb
|
226
|
+
- lib/csvpp/cli.rb
|
133
227
|
- lib/csvpp/conversions.rb
|
228
|
+
- lib/csvpp/core_extensions.rb
|
134
229
|
- lib/csvpp/format.rb
|
230
|
+
- lib/csvpp/formats_client.rb
|
231
|
+
- lib/csvpp/os.rb
|
135
232
|
- lib/csvpp/parser.rb
|
233
|
+
- lib/csvpp/sqlite_importer.rb
|
136
234
|
- lib/csvpp/version.rb
|
137
235
|
homepage: https://www.swissdrg.org
|
138
236
|
licenses:
|
@@ -154,7 +252,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
252
|
version: '0'
|
155
253
|
requirements: []
|
156
254
|
rubyforge_project:
|
157
|
-
rubygems_version: 2.6
|
255
|
+
rubygems_version: 2.7.6
|
158
256
|
signing_key:
|
159
257
|
specification_version: 4
|
160
258
|
summary: CSV++
|