csvpp 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 96198044193b9b7a62d2e35810217bfc5b603c7b
4
+ data.tar.gz: c5f025bd90f45294986eb3892c5db6d81f2b5498
5
+ SHA512:
6
+ metadata.gz: 3b35fd7ed568851367faf5b16c329271fa4d7d5935161037ceb1e8fba739d84515c8fec63c0382394156914e3d6eb2d4116b3c75ed0a88986e6554487b3b16db
7
+ data.tar.gz: 211dc3241614ba542156221574054264528a2baed9f99c8875c3e01d041d0a04065723cb5bc5e4cf04f539822131c96ed92789fdf3d4c72c152cbab55d7a0830
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ .idea
data/.gitlab-ci.yml ADDED
@@ -0,0 +1,13 @@
1
+ image: "ruby:2.4"
2
+
3
+ cache:
4
+ paths:
5
+ - vendor/
6
+
7
+ before_script:
8
+ - gem install bundler --no-ri --no-rdoc
9
+ - bundle install -j $(nproc) --path vendor
10
+
11
+ tests:
12
+ script:
13
+ - bundle exec rake
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.4.0
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.4.2
5
+ before_install: gem install bundler -v 1.16.0.pre.3
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in csvpp.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,51 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ csvpp (0.3.0)
5
+ oj (~> 3.3.9)
6
+ sinatra (~> 2.0)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ byebug (9.1.0)
12
+ coderay (1.1.2)
13
+ method_source (0.9.0)
14
+ minitest (5.10.3)
15
+ mustermann (1.0.1)
16
+ oj (3.3.10)
17
+ pry (0.11.3)
18
+ coderay (~> 1.1.0)
19
+ method_source (~> 0.9.0)
20
+ pry-byebug (3.5.0)
21
+ byebug (~> 9.1)
22
+ pry (~> 0.10)
23
+ pry-doc (0.11.1)
24
+ pry (~> 0.9)
25
+ yard (~> 0.9)
26
+ rack (2.0.3)
27
+ rack-protection (2.0.0)
28
+ rack
29
+ rake (12.3.0)
30
+ sinatra (2.0.0)
31
+ mustermann (~> 1.0)
32
+ rack (~> 2.0)
33
+ rack-protection (= 2.0.0)
34
+ tilt (~> 2.0)
35
+ tilt (2.0.8)
36
+ yard (0.9.11)
37
+
38
+ PLATFORMS
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ bundler
43
+ csvpp!
44
+ minitest
45
+ pry
46
+ pry-byebug
47
+ pry-doc
48
+ rake
49
+
50
+ BUNDLED WITH
51
+ 1.14.6
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 Rathesan Iyadurai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # CSV++
2
+
3
+ CSV++ takes a `<DELIMITER>` separated input file and a JSON format specification
4
+ and turns it into Ruby Objects. See `test/sample_inputs/simple.txt` and
5
+ `test/sample_formats/simple.json` for example.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'csvpp'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install csvpp
22
+
23
+ ## Usage
24
+
25
+ ```ruby
26
+ CSVPP.parse(
27
+ input: 'test/sample_inputs/simple.txt',
28
+ format: 'test/sample_formats/simple.json'
29
+ ) # => [{"v1"=>34, "line_number"=>1, "v2"=>"foobar"}, {"v1"=>99, "line_number"=>2, "v2"=>"hi there"}]
30
+ ```
31
+
32
+ ## Development
33
+
34
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run
35
+ `rake test` to run the tests. You can also run `bin/console` for an interactive
36
+ prompt that will allow you to experiment.
37
+
38
+ To install this gem onto your local machine, run `bundle exec rake install`. To
39
+ release a new version, update the version number in `version.rb`, and then run
40
+ `bundle exec rake release`, which will create a git tag for the version, push
41
+ git commits and tags, and push the `.gem` file to
42
+ [rubygems.org](https://rubygems.org).
43
+
44
+ ## License
45
+
46
+ The gem is available as open source under the terms of the [MIT
47
+ License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/*_test.rb"]
8
+ end
9
+
10
+ task :default => :test
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "csvpp"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "pry"
14
+ Pry.start(CSVPP)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/config.ru ADDED
@@ -0,0 +1,4 @@
1
+ require 'csvpp'
2
+ require 'csvpp/api'
3
+
4
+ run CSVPP::API
data/csvpp.gemspec ADDED
@@ -0,0 +1,42 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "csvpp/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "csvpp"
8
+ spec.version = CSVPP::VERSION
9
+ spec.authors = ["SwissDRG AG"]
10
+ spec.email = ["rathesan.iyadurai@swissdrg.org"]
11
+
12
+ spec.summary = %q{CSV++}
13
+ spec.description = %q{CSV++}
14
+ spec.homepage = "https://www.swissdrg.org"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
21
+ # else
22
+ # raise "RubyGems 2.0 or newer is required to protect against " \
23
+ # "public gem pushes."
24
+ # end
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+ spec.bindir = "exe"
30
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ spec.add_dependency 'sinatra', '~> 2.0'
34
+ spec.add_dependency 'oj', '~> 3.3.9'
35
+
36
+ spec.add_development_dependency "bundler"
37
+ spec.add_development_dependency "rake"
38
+ spec.add_development_dependency "minitest"
39
+ spec.add_development_dependency "pry"
40
+ spec.add_development_dependency "pry-byebug"
41
+ spec.add_development_dependency "pry-doc"
42
+ end
data/exe/csvpp ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require_relative '../lib/csvpp'
5
+
6
+ $options = {}
7
+ option_parser = OptionParser.new do |opts|
8
+ opts.on('-f', '--format FORMAT') do |format|
9
+ $options[:format] = format.strip
10
+ end
11
+
12
+ opts.on_tail('-h', '--help', 'Show this message') do
13
+ puts opts
14
+ exit
15
+ end
16
+ end
17
+
18
+ HELP = option_parser.help
19
+ option_parser.parse!
20
+
21
+ format = $options[:format]
22
+
23
+ json = CSVPP.json(
24
+ input: ARGF.read,
25
+ format: File.read(format)
26
+ )
27
+
28
+ # Forgive me Matz for I have sinned
29
+ system %{echo '#{json}' | python -m json.tool}
data/lib/csvpp/api.rb ADDED
@@ -0,0 +1,25 @@
1
+ require 'sinatra/base'
2
+
3
+ module CSVPP
4
+ class API < Sinatra::Base
5
+ before do
6
+ headers 'Access-Control-Allow-Origin' => '*'
7
+
8
+ content_type :json
9
+ end
10
+
11
+ post '/parse' do
12
+ body = Oj.load(request.body.read)
13
+ input = body.fetch('input')
14
+ format = body.fetch('format')
15
+
16
+ CSVPP.json(input: input, format: format)
17
+ end
18
+
19
+ options "*" do
20
+ response.headers["Allow"] = "HEAD,GET,PUT,POST,DELETE,OPTIONS"
21
+ response.headers["Access-Control-Allow-Headers"] = "X-Requested-With, X-HTTP-Method-Override, Content-Type, Cache-Control, Accept"
22
+ 200
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,102 @@
1
+ require 'bigdecimal'
2
+
3
+ module CSVPP
4
+ module Conversions
5
+ module_function
6
+
7
+ # @param obj [Object] object to parse
8
+ # @param to [String] a type, e.g. "int"
9
+ # @missings [Array] list of values that are treated as missings, e.g. ['NA', '-', -999]
10
+ # @params options [Hash] options passed on to parsing methods for specific types
11
+ # @return parsed value, read from `obj`, interpreted as type given by `to`
12
+ def convert(obj, to:, missings: [], **options)
13
+ return nil if missing?(obj, missings)
14
+
15
+ send("parse_#{to}", obj, **options)
16
+ end
17
+
18
+ def parse_string(str, **options)
19
+ str.to_s
20
+ end
21
+
22
+ def parse_int(str, **options)
23
+ return nil if str.to_s.empty?
24
+
25
+ cleaned = if str.is_a?(String)
26
+ val = str.strip
27
+ .gsub(/['`\s]?/, '') # remove thousand separators
28
+ .sub(/\.\d*/, '') # remove decimal point and everything thereafter
29
+ .sub(/[\sa-zA-Z]*$/, '') # remove trailing words like "mg"
30
+ .sub(/^-0*(.+)$/, '-\1') # remove 0 after negative sign: -003 => -3
31
+ val =~ /^0+$/ ? '0' : val.gsub( /^0*/, '') # remove leading zeros
32
+ else
33
+ str
34
+ end
35
+ Integer(cleaned) rescue nil
36
+ end
37
+
38
+ def parse_float(str, **options)
39
+ return nil if str.to_s.empty?
40
+ Float(clean_decimal(str)) rescue nil
41
+ end
42
+
43
+ def parse_decimal(str)
44
+ return nil if str.to_s.empty?
45
+
46
+ cleaned = clean_decimal(str).to_s
47
+
48
+ if cleaned.empty?
49
+ nil
50
+ else
51
+ BigDecimal(cleaned)
52
+ end
53
+
54
+ end
55
+
56
+ def parse_date(str, **options)
57
+ Date.parse(str.to_s)
58
+ end
59
+
60
+ # @param true_values [Array]: list of values that are interpreted as `true`
61
+ # @param false_values [Array]: list of values that are interpreted as `false`
62
+ # @return true or false, or
63
+ # nil if `str` doesn't match any value interpreted as `true` or `false`
64
+ def parse_boolean(str, true_values: [], false_values: [])
65
+ cleaned = str.to_s.strip.downcase
66
+
67
+ trues = if true_values.empty?
68
+ ['1', 't', 'true']
69
+ else
70
+ true_values.map(&:to_s).map(&:downcase)
71
+ end
72
+ return true if trues.include? cleaned
73
+
74
+ falses = if false_values.empty?
75
+ ['0', 'f', 'false']
76
+ else
77
+ false_values.map(&:to_s).map(&:downcase)
78
+ end
79
+ return false if falses.include? cleaned
80
+
81
+ nil
82
+ end
83
+
84
+ def missing?(obj, missings)
85
+ missings.map(&:to_s).include?(obj.to_s)
86
+ end
87
+
88
+ def clean_decimal(str)
89
+ return str unless str.is_a?(String)
90
+
91
+ val = str.strip
92
+ .gsub(/['`\s]?/, '') # remove thousand separators
93
+ .sub(/[\sa-zA-Z]*$/, '') # remove trailing words like "mg"
94
+ .sub(/^-0*(.+)$/, '-\1') # remove 0 after negative sign: -003 => -3
95
+ if val =~ /^0+$/ # remove leading zeros
96
+ '0'
97
+ else
98
+ val.gsub( /^0*/, '')
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,126 @@
1
+ module CSVPP
2
+ class Format
3
+ attr_reader :name, :skip
4
+
5
+ class << self
6
+ # @param name [String] unique name of the format
7
+ # @param format [Format]
8
+ def add(name, format)
9
+ store[name] = format
10
+ end
11
+
12
+ # @param name [String] unique name of the format
13
+ def find(name)
14
+ store.fetch(name)
15
+ end
16
+
17
+ # @param path [String] path to format file
18
+ # @return [Format]
19
+ def load(path)
20
+ load_from_str File.read(path)
21
+ end
22
+
23
+ # @param json [String]
24
+ # @return [Format]
25
+ def load_from_str(json)
26
+ new Oj.load(json)
27
+ end
28
+
29
+ def all
30
+ store.values
31
+ end
32
+
33
+ def store
34
+ @store ||= {}
35
+ end
36
+ end
37
+
38
+ # @param format [Hash]
39
+ def initialize(format)
40
+ @name = format['name']
41
+ @multiline = format['multiline'].to_s.strip.downcase == 'true'
42
+ @skip = format['skip'].to_i
43
+ @vars = format.fetch('vars')
44
+
45
+ if multiline?
46
+ @vars_grouped_by_line = Hash[
47
+ vars.group_by { |var, meta| meta['line'] }.map do |line_id, vars|
48
+ [line_id, vars.map { |var, *| var }]
49
+ end
50
+ ]
51
+
52
+ @multiline_start = format.fetch('start')
53
+ end
54
+ end
55
+
56
+ def var_names
57
+ vars.keys
58
+ end
59
+
60
+ def length
61
+ var_names.count
62
+ end
63
+
64
+ def index(var)
65
+ position(var) - 1
66
+ end
67
+
68
+ def position(var)
69
+ vars.fetch(var)['position']
70
+ end
71
+
72
+ def type(var)
73
+ vars.fetch(var)['type']
74
+ end
75
+
76
+ # @param var [String]: name of the variable for which the missings are required
77
+ # @return [Array] an array of missing values (can be empty if no missings were defined)
78
+ def missings(var)
79
+ array_from(var, 'missings')
80
+ end
81
+
82
+ # Returns the values that are defined as `true` in the the format's json
83
+ # definition for the given variable.
84
+ # @return [Array] all values that should be interpreted as true for this variable
85
+ def true_values(var)
86
+ return [] unless type(var) == "boolean"
87
+ array_from(var, 'true_values')
88
+ end
89
+
90
+
91
+ # Returns the values that are defined as `false` in the the format's json
92
+ # definition for the given variable.
93
+ # @return [Array] all values that should be interpreted as `false` for this variable
94
+ def false_values(var)
95
+ return [] unless type(var) == "boolean"
96
+ array_from(var, 'false_values')
97
+ end
98
+
99
+ def vars_for_line(line_id)
100
+ vars_grouped_by_line.fetch(line_id)
101
+ end
102
+
103
+ def multiline_start?(line_id)
104
+ multiline_start == line_id
105
+ end
106
+
107
+ def multiline?
108
+ @multiline
109
+ end
110
+
111
+ private
112
+
113
+ # Returns the value or values specified for the given attribute of the given
114
+ # variable in the formats json. An empty array if no such attribute was
115
+ # defined for the given variable.
116
+ # @return [Array] value(s) defined for given attribute for given variable
117
+ def array_from(var, attribute)
118
+ value = vars.fetch(var)[attribute]
119
+ return [] if value.nil?
120
+ return value if value.is_a?(Array)
121
+ [value]
122
+ end
123
+
124
+ attr_reader :vars, :vars_grouped_by_line, :multiline_start
125
+ end
126
+ end