csvpp 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 96198044193b9b7a62d2e35810217bfc5b603c7b
4
+ data.tar.gz: c5f025bd90f45294986eb3892c5db6d81f2b5498
5
+ SHA512:
6
+ metadata.gz: 3b35fd7ed568851367faf5b16c329271fa4d7d5935161037ceb1e8fba739d84515c8fec63c0382394156914e3d6eb2d4116b3c75ed0a88986e6554487b3b16db
7
+ data.tar.gz: 211dc3241614ba542156221574054264528a2baed9f99c8875c3e01d041d0a04065723cb5bc5e4cf04f539822131c96ed92789fdf3d4c72c152cbab55d7a0830
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ .idea
data/.gitlab-ci.yml ADDED
@@ -0,0 +1,13 @@
1
+ image: "ruby:2.4"
2
+
3
+ cache:
4
+ paths:
5
+ - vendor/
6
+
7
+ before_script:
8
+ - gem install bundler --no-ri --no-rdoc
9
+ - bundle install -j $(nproc) --path vendor
10
+
11
+ tests:
12
+ script:
13
+ - bundle exec rake
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.4.0
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.4.2
5
+ before_install: gem install bundler -v 1.16.0.pre.3
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in csvpp.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,51 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ csvpp (0.3.0)
5
+ oj (~> 3.3.9)
6
+ sinatra (~> 2.0)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ byebug (9.1.0)
12
+ coderay (1.1.2)
13
+ method_source (0.9.0)
14
+ minitest (5.10.3)
15
+ mustermann (1.0.1)
16
+ oj (3.3.10)
17
+ pry (0.11.3)
18
+ coderay (~> 1.1.0)
19
+ method_source (~> 0.9.0)
20
+ pry-byebug (3.5.0)
21
+ byebug (~> 9.1)
22
+ pry (~> 0.10)
23
+ pry-doc (0.11.1)
24
+ pry (~> 0.9)
25
+ yard (~> 0.9)
26
+ rack (2.0.3)
27
+ rack-protection (2.0.0)
28
+ rack
29
+ rake (12.3.0)
30
+ sinatra (2.0.0)
31
+ mustermann (~> 1.0)
32
+ rack (~> 2.0)
33
+ rack-protection (= 2.0.0)
34
+ tilt (~> 2.0)
35
+ tilt (2.0.8)
36
+ yard (0.9.11)
37
+
38
+ PLATFORMS
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ bundler
43
+ csvpp!
44
+ minitest
45
+ pry
46
+ pry-byebug
47
+ pry-doc
48
+ rake
49
+
50
+ BUNDLED WITH
51
+ 1.14.6
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 Rathesan Iyadurai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # CSV++
2
+
3
+ CSV++ takes a `<DELIMITER>` separated input file and a JSON format specification
4
+ and turns it into Ruby Objects. See `test/sample_inputs/simple.txt` and
5
+ `test/sample_formats/simple.json` for example.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'csvpp'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install csvpp
22
+
23
+ ## Usage
24
+
25
+ ```ruby
26
+ CSVPP.parse(
27
+ input: 'test/sample_inputs/simple.txt',
28
+ format: 'test/sample_formats/simple.json'
29
+ ) # => [{"v1"=>34, "line_number"=>1, "v2"=>"foobar"}, {"v1"=>99, "line_number"=>2, "v2"=>"hi there"}]
30
+ ```
31
+
32
+ ## Development
33
+
34
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run
35
+ `rake test` to run the tests. You can also run `bin/console` for an interactive
36
+ prompt that will allow you to experiment.
37
+
38
+ To install this gem onto your local machine, run `bundle exec rake install`. To
39
+ release a new version, update the version number in `version.rb`, and then run
40
+ `bundle exec rake release`, which will create a git tag for the version, push
41
+ git commits and tags, and push the `.gem` file to
42
+ [rubygems.org](https://rubygems.org).
43
+
44
+ ## License
45
+
46
+ The gem is available as open source under the terms of the [MIT
47
+ License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/*_test.rb"]
8
+ end
9
+
10
+ task :default => :test
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "csvpp"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "pry"
14
+ Pry.start(CSVPP)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/config.ru ADDED
@@ -0,0 +1,4 @@
1
+ require 'csvpp'
2
+ require 'csvpp/api'
3
+
4
+ run CSVPP::API
data/csvpp.gemspec ADDED
@@ -0,0 +1,42 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "csvpp/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "csvpp"
8
+ spec.version = CSVPP::VERSION
9
+ spec.authors = ["SwissDRG AG"]
10
+ spec.email = ["rathesan.iyadurai@swissdrg.org"]
11
+
12
+ spec.summary = %q{CSV++}
13
+ spec.description = %q{CSV++}
14
+ spec.homepage = "https://www.swissdrg.org"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
21
+ # else
22
+ # raise "RubyGems 2.0 or newer is required to protect against " \
23
+ # "public gem pushes."
24
+ # end
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+ spec.bindir = "exe"
30
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ spec.add_dependency 'sinatra', '~> 2.0'
34
+ spec.add_dependency 'oj', '~> 3.3.9'
35
+
36
+ spec.add_development_dependency "bundler"
37
+ spec.add_development_dependency "rake"
38
+ spec.add_development_dependency "minitest"
39
+ spec.add_development_dependency "pry"
40
+ spec.add_development_dependency "pry-byebug"
41
+ spec.add_development_dependency "pry-doc"
42
+ end
data/exe/csvpp ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require_relative '../lib/csvpp'
5
+
6
+ $options = {}
7
+ option_parser = OptionParser.new do |opts|
8
+ opts.on('-f', '--format FORMAT') do |format|
9
+ $options[:format] = format.strip
10
+ end
11
+
12
+ opts.on_tail('-h', '--help', 'Show this message') do
13
+ puts opts
14
+ exit
15
+ end
16
+ end
17
+
18
+ HELP = option_parser.help
19
+ option_parser.parse!
20
+
21
+ format = $options[:format]
22
+
23
+ json = CSVPP.json(
24
+ input: ARGF.read,
25
+ format: File.read(format)
26
+ )
27
+
28
+ # Forgive me Matz for I have sinned
29
+ system %{echo '#{json}' | python -m json.tool}
data/lib/csvpp/api.rb ADDED
@@ -0,0 +1,25 @@
1
+ require 'sinatra/base'
2
+
3
+ module CSVPP
4
+ class API < Sinatra::Base
5
+ before do
6
+ headers 'Access-Control-Allow-Origin' => '*'
7
+
8
+ content_type :json
9
+ end
10
+
11
+ post '/parse' do
12
+ body = Oj.load(request.body.read)
13
+ input = body.fetch('input')
14
+ format = body.fetch('format')
15
+
16
+ CSVPP.json(input: input, format: format)
17
+ end
18
+
19
+ options "*" do
20
+ response.headers["Allow"] = "HEAD,GET,PUT,POST,DELETE,OPTIONS"
21
+ response.headers["Access-Control-Allow-Headers"] = "X-Requested-With, X-HTTP-Method-Override, Content-Type, Cache-Control, Accept"
22
+ 200
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,102 @@
1
+ require 'bigdecimal'
2
+
3
+ module CSVPP
4
+ module Conversions
5
+ module_function
6
+
7
+ # @param obj [Object] object to parse
8
+ # @param to [String] a type, e.g. "int"
9
+ # @missings [Array] list of values that are treated as missings, e.g. ['NA', '-', -999]
10
+ # @params options [Hash] options passed on to parsing methods for specific types
11
+ # @return parsed value, read from `obj`, interpreted as type given by `to`
12
+ def convert(obj, to:, missings: [], **options)
13
+ return nil if missing?(obj, missings)
14
+
15
+ send("parse_#{to}", obj, **options)
16
+ end
17
+
18
+ def parse_string(str, **options)
19
+ str.to_s
20
+ end
21
+
22
+ def parse_int(str, **options)
23
+ return nil if str.to_s.empty?
24
+
25
+ cleaned = if str.is_a?(String)
26
+ val = str.strip
27
+ .gsub(/['`\s]?/, '') # remove thousand separators
28
+ .sub(/\.\d*/, '') # remove decimal point and everything thereafter
29
+ .sub(/[\sa-zA-Z]*$/, '') # remove trailing words like "mg"
30
+ .sub(/^-0*(.+)$/, '-\1') # remove 0 after negative sign: -003 => -3
31
+ val =~ /^0+$/ ? '0' : val.gsub( /^0*/, '') # remove leading zeros
32
+ else
33
+ str
34
+ end
35
+ Integer(cleaned) rescue nil
36
+ end
37
+
38
+ def parse_float(str, **options)
39
+ return nil if str.to_s.empty?
40
+ Float(clean_decimal(str)) rescue nil
41
+ end
42
+
43
+ def parse_decimal(str)
44
+ return nil if str.to_s.empty?
45
+
46
+ cleaned = clean_decimal(str).to_s
47
+
48
+ if cleaned.empty?
49
+ nil
50
+ else
51
+ BigDecimal(cleaned)
52
+ end
53
+
54
+ end
55
+
56
+ def parse_date(str, **options)
57
+ Date.parse(str.to_s)
58
+ end
59
+
60
+ # @param true_values [Array]: list of values that are interpreted as `true`
61
+ # @param false_values [Array]: list of values that are interpreted as `false`
62
+ # @return true or false, or
63
+ # nil if `str` doesn't match any value interpreted as `true` or `false`
64
+ def parse_boolean(str, true_values: [], false_values: [])
65
+ cleaned = str.to_s.strip.downcase
66
+
67
+ trues = if true_values.empty?
68
+ ['1', 't', 'true']
69
+ else
70
+ true_values.map(&:to_s).map(&:downcase)
71
+ end
72
+ return true if trues.include? cleaned
73
+
74
+ falses = if false_values.empty?
75
+ ['0', 'f', 'false']
76
+ else
77
+ false_values.map(&:to_s).map(&:downcase)
78
+ end
79
+ return false if falses.include? cleaned
80
+
81
+ nil
82
+ end
83
+
84
+ def missing?(obj, missings)
85
+ missings.map(&:to_s).include?(obj.to_s)
86
+ end
87
+
88
+ def clean_decimal(str)
89
+ return str unless str.is_a?(String)
90
+
91
+ val = str.strip
92
+ .gsub(/['`\s]?/, '') # remove thousand separators
93
+ .sub(/[\sa-zA-Z]*$/, '') # remove trailing words like "mg"
94
+ .sub(/^-0*(.+)$/, '-\1') # remove 0 after negative sign: -003 => -3
95
+ if val =~ /^0+$/ # remove leading zeros
96
+ '0'
97
+ else
98
+ val.gsub( /^0*/, '')
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,126 @@
1
+ module CSVPP
2
+ class Format
3
+ attr_reader :name, :skip
4
+
5
+ class << self
6
+ # @param name [String] unique name of the format
7
+ # @param format [Format]
8
+ def add(name, format)
9
+ store[name] = format
10
+ end
11
+
12
+ # @param name [String] unique name of the format
13
+ def find(name)
14
+ store.fetch(name)
15
+ end
16
+
17
+ # @param path [String] path to format file
18
+ # @return [Format]
19
+ def load(path)
20
+ load_from_str File.read(path)
21
+ end
22
+
23
+ # @param json [String]
24
+ # @return [Format]
25
+ def load_from_str(json)
26
+ new Oj.load(json)
27
+ end
28
+
29
+ def all
30
+ store.values
31
+ end
32
+
33
+ def store
34
+ @store ||= {}
35
+ end
36
+ end
37
+
38
+ # @param format [Hash]
39
+ def initialize(format)
40
+ @name = format['name']
41
+ @multiline = format['multiline'].to_s.strip.downcase == 'true'
42
+ @skip = format['skip'].to_i
43
+ @vars = format.fetch('vars')
44
+
45
+ if multiline?
46
+ @vars_grouped_by_line = Hash[
47
+ vars.group_by { |var, meta| meta['line'] }.map do |line_id, vars|
48
+ [line_id, vars.map { |var, *| var }]
49
+ end
50
+ ]
51
+
52
+ @multiline_start = format.fetch('start')
53
+ end
54
+ end
55
+
56
+ def var_names
57
+ vars.keys
58
+ end
59
+
60
+ def length
61
+ var_names.count
62
+ end
63
+
64
+ def index(var)
65
+ position(var) - 1
66
+ end
67
+
68
+ def position(var)
69
+ vars.fetch(var)['position']
70
+ end
71
+
72
+ def type(var)
73
+ vars.fetch(var)['type']
74
+ end
75
+
76
+ # @param var [String]: name of the variable for which the missings are required
77
+ # @return [Array] an array of missing values (can be empty if no missings were defined)
78
+ def missings(var)
79
+ array_from(var, 'missings')
80
+ end
81
+
82
+ # Returns the values that are defined as `true` in the the format's json
83
+ # definition for the given variable.
84
+ # @return [Array] all values that should be interpreted as true for this variable
85
+ def true_values(var)
86
+ return [] unless type(var) == "boolean"
87
+ array_from(var, 'true_values')
88
+ end
89
+
90
+
91
+ # Returns the values that are defined as `false` in the the format's json
92
+ # definition for the given variable.
93
+ # @return [Array] all values that should be interpreted as `false` for this variable
94
+ def false_values(var)
95
+ return [] unless type(var) == "boolean"
96
+ array_from(var, 'false_values')
97
+ end
98
+
99
+ def vars_for_line(line_id)
100
+ vars_grouped_by_line.fetch(line_id)
101
+ end
102
+
103
+ def multiline_start?(line_id)
104
+ multiline_start == line_id
105
+ end
106
+
107
+ def multiline?
108
+ @multiline
109
+ end
110
+
111
+ private
112
+
113
+ # Returns the value or values specified for the given attribute of the given
114
+ # variable in the formats json. An empty array if no such attribute was
115
+ # defined for the given variable.
116
+ # @return [Array] value(s) defined for given attribute for given variable
117
+ def array_from(var, attribute)
118
+ value = vars.fetch(var)[attribute]
119
+ return [] if value.nil?
120
+ return value if value.is_a?(Array)
121
+ [value]
122
+ end
123
+
124
+ attr_reader :vars, :vars_grouped_by_line, :multiline_start
125
+ end
126
+ end