dgaff_ml 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZmVmMWMxYWU2ZjM1NWMzOTRjNWVkNDY1OGE1MmJkYjU0MmMyM2Y0NQ==
4
+ YjcyMDhhYThjMmM2OWNmZWM2Y2Y4Mzk0ZWE1YzBjOGYxNzA4NTkzZg==
5
5
  data.tar.gz: !binary |-
6
- Y2NmMTQ4NjVjYzNkMzE2ZTgwNmE5Y2NiYTNhOGQxZDAwODA2Mjg3ZQ==
6
+ Yzg1YzYwY2Y4N2Y1MjMzZGUxZDhjZTBmNzBiNWE4ZTgzZjc4ODkwZA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MTk0Y2ZhNTA3OTJlY2U2ZDI5ZjRhMjllYjg4YjBkMjUxZWQ0ZjAyZDIxZDZh
10
- NzIwNjM3NDgyM2ZiOTBkZDZlNzkzY2JlNGQwNWFhMzUwNDBkMWJjNzIxYzkw
11
- NWMwMGVmZDc2MTVhMjk2NjE5MTI3OWJlMzZlNDEyOTFkZGE4ZjA=
9
+ YWRhNWEyMDY5ODc2M2RkNTE3ZGRjODNjNTdmZThlOGRkNjNjODM1NDIyMmEx
10
+ NjJkYzBiOGNkMWJmMDk0ZGEzOWNlZTVhZjA3ZTI2OGJhZjQzYzMxMTEwOGRl
11
+ NjBmMjU5ZWQ1MGVmNDRjOGFjZGU3YTRkNTQwOGY0ZmIzMTRiNjI=
12
12
  data.tar.gz: !binary |-
13
- YjNmZjk0ZGFlNDc5YjRhN2ZkYTA0MmJmNjBmZmUwYWZmOWU3MWI0NjdkOTg5
14
- YmFhNzgzNzVlZjEzNmEwNWNmNzVhM2M5MzlkNjAzZTI4MmYwZDgzZjJjYjhj
15
- MmQxMTFlY2IwZDkxMzEwY2QxYTIyMTQ0NGFjNWM4YjljODFiZjU=
13
+ ZDcyYTdhNzg0YzYxMjZhZWQyYzFlZTI3YWY1OTdjYmU5ZThlNDUwOWY1Nzg4
14
+ NDJmYTlkNmJlZDI3YjExOWZkY2Q0NmM4ODEzZDNhMDYwODI1Y2I4MzM0MjRm
15
+ MmZmMDVkNWVlNzM4MjZlYTlhYjZlMWFkNTE0ZDc5OTQ1OGE1YWM=
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/dgaff_ml/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in dgaff_ml.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2017 Devin Gaffney
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,31 @@
1
+ # DgaffMl
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'dgaff_ml'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install dgaff_ml
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/dgaff_ml/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/dgaff_ml/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'dgaff_ml/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "dgaff_ml"
8
+ spec.version = DgaffMl::VERSION
9
+ spec.authors = ["Devin Gaffney"]
10
+ spec.email = ["itsme@devingaffney.com"]
11
+ spec.summary = %q{TODO: Write a short summary. Required.}
12
+ spec.description = %q{TODO: Write a longer description. Optional.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ end
@@ -0,0 +1,3 @@
1
+ module DgaffMl
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require "dgaff_ml/version"
2
+
3
+ module DgaffMl
4
+ # Your code goes here...
5
+ end
@@ -7,17 +7,37 @@ class DGaffML
7
7
  def initialize(user_id)
8
8
  @user = DGaffML::Request.login(user_id)
9
9
  end
10
-
10
+
11
+ def datasets
12
+ DGaffML::Request.datasets(@user["id"]).collect{|d| DGaffML::Dataset.new(self, d)}
13
+ end
14
+
15
+ def dataset(dataset_id)
16
+ DGaffML::Dataset.new(self, DGaffML::Request.dataset(@user["id"], dataset_id))
17
+ end
18
+
19
+ def export_model(dataset_id)
20
+ DGaffML::Model.new(self, DGaffML::Request.export_model(@user["id"], dataset_id))
21
+ end
22
+
11
23
  def models
12
- DGaffML::Request.datasets(@user["id"])
24
+ DGaffML::Request.models(@user["id"]).collect{|m| DGaffML::Model.new(self, m)}
13
25
  end
14
26
 
15
- def model(dataset_id)
16
- DGaffML::Model.new(self, DGaffML::Request.dataset(@user["id"], dataset_id))
27
+ def model(model_id)
28
+ DGaffML::Model.new(self, DGaffML::Request.model(@user["id"], model_id))
17
29
  end
18
-
30
+
19
31
  def predict(dataset_id, obs)
20
32
  DGaffML::Request.predict(@user["id"], dataset_id, obs)
21
33
  end
34
+
35
+ def apply_to_new_dataset(model_id, filepath, prediction_column)
36
+ DGaffML::Dataset.new(self, DGaffML::Request.apply_to_new_dataset(@user["id"], model_id, filepath, prediction_column))
37
+ end
38
+
39
+ def new_dataset(filepath, prediction_column)
40
+ DGaffML::Dataset.new(self,DGaffML::Request.new_dataset(@user["id"], filepath, prediction_column))
41
+ end
22
42
  end
23
43
  end
@@ -0,0 +1,118 @@
1
+ class DGaffML
2
+ class Dataset
3
+ attr_accessor :dataset
4
+ def initialize(client, dataset_response)
5
+ @client = client
6
+ @dataset = dataset_response
7
+ @dataset_id = @dataset["id"]
8
+ @user_id = @dataset["user_id"]
9
+ end
10
+
11
+ def predict(obs)
12
+ predictions = @client.predict(@dataset_id, translate_obs(obs))
13
+ if self.dataset["conversion_pipeline"].keys.include?("label")
14
+ return predictions.collect{|x| self.dataset["conversion_pipeline"]["label"][x]}
15
+ else
16
+ return predictions
17
+ end
18
+ end
19
+
20
+ def translate_obs(obs)
21
+ dataset_keys = (self.dataset["conversion_pipeline"].keys-["label", "internal_headers"]).sort_by(&:to_i)
22
+ dataset_classes = dataset_keys.collect{|k| self.dataset["col_classes"][k.to_i]}
23
+ translated_rows = []
24
+ obs.each do |row|
25
+ translated_row = []
26
+ row.each_with_index do |el, i|
27
+ translated_row << cast_val(el, dataset_classes[i])
28
+ end
29
+ translated_rows << translated_row
30
+ end
31
+ self.convert(translated_rows, dataset_keys, dataset_classes)
32
+ end
33
+
34
+ def convert(rows, dataset_keys, dataset_classes)
35
+ transposed = rows.transpose
36
+ detexted = []
37
+ labels = []
38
+ transposed.each_with_index do |col, i|
39
+ if dataset_classes[i] == "Phrase" || dataset_classes[i] == "Text"
40
+ self.dataset["conversion_pipeline"][dataset_keys[i]]["unique_terms"].each do |term|
41
+ counted = []
42
+ col.each do |row|
43
+ row = [row.to_s] if row.nil?
44
+ counted << row.count(term)
45
+ end
46
+ detexted << counted
47
+ end
48
+ elsif dataset_classes[i] == "Categorical"
49
+ counted = []
50
+ col.each do |val|
51
+ counted << self.dataset["conversion_pipeline"][dataset_keys[i]]["unique_terms"].index(val.to_s)
52
+ end
53
+ detexted << counted
54
+ else
55
+ conversion_pipeline = self.dataset["conversion_pipeline"][dataset_keys[i]]
56
+ replaced = col.collect{|r| r||conversion_pipeline["average"]}
57
+ dist = conversion_pipeline["max"]-conversion_pipeline["min"]
58
+ detexted << replaced
59
+ detexted << replaced.collect{|r| (r-conversion_pipeline["min"]).to_f/dist} if dist > 0
60
+ detexted << replaced.collect{|r| (r-conversion_pipeline["average"]).to_f/conversion_pipeline["stdev"]} if conversion_pipeline["stdev"] > 0
61
+ detexted << replaced.collect{|r| r.abs}
62
+ end
63
+ end
64
+ return detexted.transpose
65
+ end
66
+
67
+ def clean_str(string)
68
+ string.
69
+ gsub(/[^A-Za-z0-9(),!?\'\`]/, " ").
70
+ gsub(" ", " ").
71
+ gsub("\'s", " \'s").
72
+ gsub("", "").
73
+ gsub("\'ve", " \'ve").
74
+ gsub("n\'t", " n\'t").
75
+ gsub("\'re", " \'re").
76
+ gsub("\'d", " \'d").
77
+ gsub("\'ll", " \'ll").
78
+ gsub(",", " , ").
79
+ gsub("!", " ! ").
80
+ gsub("\(", " \\( ").
81
+ gsub("\)", " \\) ").
82
+ gsub(" \\\( \\\( \\\( ", " \(\(\( ").
83
+ gsub(" \\\) \\\) \\\) ", " \)\)\) ").
84
+ gsub("\?", " \? ").
85
+ gsub(/\s{2,}/, " ").
86
+ gsub(Regexp.new("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"), "<URL/>").
87
+ gsub("www", " ").
88
+ gsub("com", " ").
89
+ gsub("org", " ").
90
+ strip.
91
+ downcase
92
+ end
93
+
94
+ def cast_val(value, directive)
95
+ if directive == "Integer"
96
+ return value.to_i
97
+ elsif directive == "Float"
98
+ return value.to_f
99
+ elsif directive == "Time"
100
+ if value.length == 10 and value.scan(/\d/).count == 10
101
+ return Time.at(value).to_i
102
+ elsif value.length == 13 and value.scan(/\d/).count == 13
103
+ return Time.at(value).to_i
104
+ else
105
+ return Chronic.parse(value).to_i
106
+ end
107
+ elsif directive == "Text" or directive == "Phrase"
108
+ return clean_str(value).split(" ").collect{|word| Stemmer::stem_word(word)}
109
+ elsif directive == "Categorical"
110
+ return value
111
+ end
112
+ end
113
+
114
+ def export_model
115
+ @client.export_model(@dataset_id)
116
+ end
117
+ end
118
+ end
@@ -1,114 +1,15 @@
1
1
  class DGaffML
2
2
  class Model
3
- attr_accessor :model
3
+ attr_accessor :dataset
4
4
  def initialize(client, model_response)
5
5
  @client = client
6
6
  @model = model_response
7
- @dataset_id = @model["id"]
7
+ @model_id = @model["id"]
8
8
  @user_id = @model["user_id"]
9
9
  end
10
10
 
11
- def predict(obs)
12
- predictions = @client.predict(@dataset_id, translate_obs(obs))
13
- if self.model["conversion_pipeline"].keys.include?("label")
14
- return predictions.collect{|x| self.model["conversion_pipeline"]["label"][x]}
15
- else
16
- return predictions
17
- end
18
- end
19
-
20
- def translate_obs(obs)
21
- model_keys = (self.model["conversion_pipeline"].keys-["label", "internal_headers"]).sort_by(&:to_i)
22
- model_classes = model_keys.collect{|k| self.model["col_classes"][k.to_i]}
23
- translated_rows = []
24
- obs.each do |row|
25
- translated_row = []
26
- row.each_with_index do |el, i|
27
- translated_row << cast_val(el, model_classes[i])
28
- end
29
- translated_rows << translated_row
30
- end
31
- self.convert(translated_rows, model_keys, model_classes)
32
- end
33
-
34
- def convert(rows, model_keys, model_classes)
35
- transposed = rows.transpose
36
- detexted = []
37
- labels = []
38
- transposed.each_with_index do |col, i|
39
- if model_classes[i] == "Phrase" || model_classes[i] == "Text"
40
- self.model["conversion_pipeline"][model_keys[i]]["unique_terms"].each do |term|
41
- counted = []
42
- col.each do |row|
43
- row = [row.to_s] if row.nil?
44
- counted << row.count(term)
45
- end
46
- detexted << counted
47
- end
48
- elsif model_classes[i] == "Categorical"
49
- counted = []
50
- col.each do |val|
51
- counted << self.model["conversion_pipeline"][model_keys[i]]["unique_terms"].index(val.to_s)
52
- end
53
- detexted << counted
54
- else
55
- conversion_pipeline = self.model["conversion_pipeline"][model_keys[i]]
56
- replaced = col.collect{|r| r||conversion_pipeline["average"]}
57
- dist = conversion_pipeline["max"]-conversion_pipeline["min"]
58
- detexted << replaced
59
- detexted << replaced.collect{|r| (r-conversion_pipeline["min"]).to_f/dist} if dist > 0
60
- detexted << replaced.collect{|r| (r-conversion_pipeline["average"]).to_f/conversion_pipeline["stdev"]} if conversion_pipeline["stdev"] > 0
61
- detexted << replaced.collect{|r| r.abs}
62
- end
63
- end
64
- return detexted.transpose
65
- end
66
-
67
- def clean_str(string)
68
- string.
69
- gsub(/[^A-Za-z0-9(),!?\'\`]/, " ").
70
- gsub(" ", " ").
71
- gsub("\'s", " \'s").
72
- gsub("", "").
73
- gsub("\'ve", " \'ve").
74
- gsub("n\'t", " n\'t").
75
- gsub("\'re", " \'re").
76
- gsub("\'d", " \'d").
77
- gsub("\'ll", " \'ll").
78
- gsub(",", " , ").
79
- gsub("!", " ! ").
80
- gsub("\(", " \\( ").
81
- gsub("\)", " \\) ").
82
- gsub(" \\\( \\\( \\\( ", " \(\(\( ").
83
- gsub(" \\\) \\\) \\\) ", " \)\)\) ").
84
- gsub("\?", " \? ").
85
- gsub(/\s{2,}/, " ").
86
- gsub(Regexp.new("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"), "<URL/>").
87
- gsub("www", " ").
88
- gsub("com", " ").
89
- gsub("org", " ").
90
- strip.
91
- downcase
92
- end
93
-
94
- def cast_val(value, directive)
95
- if directive == "Integer"
96
- return value.to_i
97
- elsif directive == "Float"
98
- return value.to_f
99
- elsif directive == "Time"
100
- if value.length == 10 and value.scan(/\d/).count == 10
101
- return Time.at(value).to_i
102
- elsif value.length == 13 and value.scan(/\d/).count == 13
103
- return Time.at(value).to_i
104
- else
105
- return Chronic.parse(value).to_i
106
- end
107
- elsif directive == "Text" or directive == "Phrase"
108
- return clean_str(value).split(" ").collect{|word| Stemmer::stem_word(word)}
109
- elsif directive == "Categorical"
110
- return value
111
- end
11
+ def apply_to_new_dataset(filepath, prediction_column)
12
+ @client.apply_to_new_dataset(@model_id, filepath, prediction_column)
112
13
  end
113
14
  end
114
15
  end
@@ -3,11 +3,15 @@ class DGaffML
3
3
  def self.hostname
4
4
  "http://machinelearning.devingaffney.com"
5
5
  end
6
-
6
+
7
7
  def self.login(user_id)
8
8
  JSON.parse(RestClient.get(hostname+"/api/#{user_id}").body)
9
9
  end
10
10
 
11
+ def self.new_dataset(user_id, filepath, prediction_column)
12
+ JSON.parse(RestClient.post(hostname+"/api/#{user_id}/new_dataset", {filesize: File.open(filepath).size/1024.0/1024, filename: filepath.split("/").last, csv_data: CSV.read(filepath).to_json, prediction_column: prediction_column}).body)
13
+ end
14
+
11
15
  def self.dataset(user_id, dataset_id)
12
16
  JSON.parse(RestClient.get(hostname+"/api/#{user_id}/dataset/#{dataset_id}").body)
13
17
  end
@@ -16,8 +20,25 @@ class DGaffML
16
20
  JSON.parse(RestClient.get(hostname+"/api/#{user_id}/datasets").body)
17
21
  end
18
22
 
23
+ def self.export_model(user_id, dataset_id)
24
+ JSON.parse(RestClient.get(hostname+"/api/#{user_id}/dataset/#{dataset_id}/export_model").body)
25
+ end
26
+
27
+ def self.model(user_id, model_id)
28
+ JSON.parse(RestClient.get(hostname+"/api/#{user_id}/model/#{model_id}").body)
29
+ end
30
+
31
+ def self.models(user_id)
32
+ JSON.parse(RestClient.get(hostname+"/api/#{user_id}/models").body)
33
+ end
34
+
35
+ def self.apply_to_new_dataset(user_id, model_id, filepath, prediction_column)
36
+ JSON.parse(RestClient.post(hostname+"/api/#{user_id}/model/#{model_id}/apply_to_new_dataset", {filesize: File.open(filepath).size/1024.0/1024, filename: filepath.split("/").last, csv_data: CSV.read(filepath).to_json, prediction_column: prediction_column}).body)
37
+ end
38
+
19
39
  def self.predict(user_id, dataset_id, obs)
20
40
  JSON.parse(RestClient.post(hostname+"/api/#{user_id}/predict/#{dataset_id}", {data: obs.to_json}).body)
21
41
  end
42
+
22
43
  end
23
44
  end
@@ -1,3 +1,3 @@
1
1
  module DgaffMl
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
data/lib/dgaff_ml.rb CHANGED
@@ -1,5 +1,7 @@
1
+ require 'csv'
1
2
  require "dgaff_ml/version"
2
3
  require File.expand_path('../dgaff_ml/client', __FILE__)
4
+ require File.expand_path('../dgaff_ml/dataset', __FILE__)
3
5
  require File.expand_path('../dgaff_ml/model', __FILE__)
4
6
  require File.expand_path('../dgaff_ml/request', __FILE__)
5
7
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dgaff_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Devin Gaffney
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-27 00:00:00.000000000 Z
11
+ date: 2017-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -93,9 +93,18 @@ files:
93
93
  - README.md
94
94
  - Rakefile
95
95
  - dgaff_ml.gemspec
96
+ - dgaff_ml/.gitignore
97
+ - dgaff_ml/Gemfile
98
+ - dgaff_ml/LICENSE.txt
99
+ - dgaff_ml/README.md
100
+ - dgaff_ml/Rakefile
101
+ - dgaff_ml/dgaff_ml.gemspec
102
+ - dgaff_ml/lib/dgaff_ml.rb
103
+ - dgaff_ml/lib/dgaff_ml/version.rb
96
104
  - echo.py
97
105
  - lib/dgaff_ml.rb
98
106
  - lib/dgaff_ml/client.rb
107
+ - lib/dgaff_ml/dataset.rb
99
108
  - lib/dgaff_ml/model.rb
100
109
  - lib/dgaff_ml/request.rb
101
110
  - lib/dgaff_ml/version.rb