dgaff_ml 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZmVmMWMxYWU2ZjM1NWMzOTRjNWVkNDY1OGE1MmJkYjU0MmMyM2Y0NQ==
4
+ YjcyMDhhYThjMmM2OWNmZWM2Y2Y4Mzk0ZWE1YzBjOGYxNzA4NTkzZg==
5
5
  data.tar.gz: !binary |-
6
- Y2NmMTQ4NjVjYzNkMzE2ZTgwNmE5Y2NiYTNhOGQxZDAwODA2Mjg3ZQ==
6
+ Yzg1YzYwY2Y4N2Y1MjMzZGUxZDhjZTBmNzBiNWE4ZTgzZjc4ODkwZA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MTk0Y2ZhNTA3OTJlY2U2ZDI5ZjRhMjllYjg4YjBkMjUxZWQ0ZjAyZDIxZDZh
10
- NzIwNjM3NDgyM2ZiOTBkZDZlNzkzY2JlNGQwNWFhMzUwNDBkMWJjNzIxYzkw
11
- NWMwMGVmZDc2MTVhMjk2NjE5MTI3OWJlMzZlNDEyOTFkZGE4ZjA=
9
+ YWRhNWEyMDY5ODc2M2RkNTE3ZGRjODNjNTdmZThlOGRkNjNjODM1NDIyMmEx
10
+ NjJkYzBiOGNkMWJmMDk0ZGEzOWNlZTVhZjA3ZTI2OGJhZjQzYzMxMTEwOGRl
11
+ NjBmMjU5ZWQ1MGVmNDRjOGFjZGU3YTRkNTQwOGY0ZmIzMTRiNjI=
12
12
  data.tar.gz: !binary |-
13
- YjNmZjk0ZGFlNDc5YjRhN2ZkYTA0MmJmNjBmZmUwYWZmOWU3MWI0NjdkOTg5
14
- YmFhNzgzNzVlZjEzNmEwNWNmNzVhM2M5MzlkNjAzZTI4MmYwZDgzZjJjYjhj
15
- MmQxMTFlY2IwZDkxMzEwY2QxYTIyMTQ0NGFjNWM4YjljODFiZjU=
13
+ ZDcyYTdhNzg0YzYxMjZhZWQyYzFlZTI3YWY1OTdjYmU5ZThlNDUwOWY1Nzg4
14
+ NDJmYTlkNmJlZDI3YjExOWZkY2Q0NmM4ODEzZDNhMDYwODI1Y2I4MzM0MjRm
15
+ MmZmMDVkNWVlNzM4MjZlYTlhYjZlMWFkNTE0ZDc5OTQ1OGE1YWM=
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/dgaff_ml/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in dgaff_ml.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2017 Devin Gaffney
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,31 @@
1
+ # DgaffMl
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'dgaff_ml'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install dgaff_ml
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/dgaff_ml/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/dgaff_ml/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'dgaff_ml/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "dgaff_ml"
8
+ spec.version = DgaffMl::VERSION
9
+ spec.authors = ["Devin Gaffney"]
10
+ spec.email = ["itsme@devingaffney.com"]
11
+ spec.summary = %q{TODO: Write a short summary. Required.}
12
+ spec.description = %q{TODO: Write a longer description. Optional.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ end
@@ -0,0 +1,3 @@
1
+ module DgaffMl
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require "dgaff_ml/version"
2
+
3
+ module DgaffMl
4
+ # Your code goes here...
5
+ end
@@ -7,17 +7,37 @@ class DGaffML
7
7
  def initialize(user_id)
8
8
  @user = DGaffML::Request.login(user_id)
9
9
  end
10
-
10
+
11
+ def datasets
12
+ DGaffML::Request.datasets(@user["id"]).collect{|d| DGaffML::Dataset.new(self, d)}
13
+ end
14
+
15
+ def dataset(dataset_id)
16
+ DGaffML::Dataset.new(self, DGaffML::Request.dataset(@user["id"], dataset_id))
17
+ end
18
+
19
+ def export_model(dataset_id)
20
+ DGaffML::Model.new(self, DGaffML::Request.export_model(@user["id"], dataset_id))
21
+ end
22
+
11
23
  def models
12
- DGaffML::Request.datasets(@user["id"])
24
+ DGaffML::Request.models(@user["id"]).collect{|m| DGaffML::Model.new(self, m)}
13
25
  end
14
26
 
15
- def model(dataset_id)
16
- DGaffML::Model.new(self, DGaffML::Request.dataset(@user["id"], dataset_id))
27
+ def model(model_id)
28
+ DGaffML::Model.new(self, DGaffML::Request.model(@user["id"], model_id))
17
29
  end
18
-
30
+
19
31
  def predict(dataset_id, obs)
20
32
  DGaffML::Request.predict(@user["id"], dataset_id, obs)
21
33
  end
34
+
35
+ def apply_to_new_dataset(model_id, filepath, prediction_column)
36
+ DGaffML::Dataset.new(self, DGaffML::Request.apply_to_new_dataset(@user["id"], model_id, filepath, prediction_column))
37
+ end
38
+
39
+ def new_dataset(filepath, prediction_column)
40
+ DGaffML::Dataset.new(self,DGaffML::Request.new_dataset(@user["id"], filepath, prediction_column))
41
+ end
22
42
  end
23
43
  end
@@ -0,0 +1,118 @@
1
+ class DGaffML
2
+ class Dataset
3
+ attr_accessor :dataset
4
+ def initialize(client, dataset_response)
5
+ @client = client
6
+ @dataset = dataset_response
7
+ @dataset_id = @dataset["id"]
8
+ @user_id = @dataset["user_id"]
9
+ end
10
+
11
+ def predict(obs)
12
+ predictions = @client.predict(@dataset_id, translate_obs(obs))
13
+ if self.dataset["conversion_pipeline"].keys.include?("label")
14
+ return predictions.collect{|x| self.dataset["conversion_pipeline"]["label"][x]}
15
+ else
16
+ return predictions
17
+ end
18
+ end
19
+
20
+ def translate_obs(obs)
21
+ dataset_keys = (self.dataset["conversion_pipeline"].keys-["label", "internal_headers"]).sort_by(&:to_i)
22
+ dataset_classes = dataset_keys.collect{|k| self.dataset["col_classes"][k.to_i]}
23
+ translated_rows = []
24
+ obs.each do |row|
25
+ translated_row = []
26
+ row.each_with_index do |el, i|
27
+ translated_row << cast_val(el, dataset_classes[i])
28
+ end
29
+ translated_rows << translated_row
30
+ end
31
+ self.convert(translated_rows, dataset_keys, dataset_classes)
32
+ end
33
+
34
+ def convert(rows, dataset_keys, dataset_classes)
35
+ transposed = rows.transpose
36
+ detexted = []
37
+ labels = []
38
+ transposed.each_with_index do |col, i|
39
+ if dataset_classes[i] == "Phrase" || dataset_classes[i] == "Text"
40
+ self.dataset["conversion_pipeline"][dataset_keys[i]]["unique_terms"].each do |term|
41
+ counted = []
42
+ col.each do |row|
43
+ row = [row.to_s] if row.nil?
44
+ counted << row.count(term)
45
+ end
46
+ detexted << counted
47
+ end
48
+ elsif dataset_classes[i] == "Categorical"
49
+ counted = []
50
+ col.each do |val|
51
+ counted << self.dataset["conversion_pipeline"][dataset_keys[i]]["unique_terms"].index(val.to_s)
52
+ end
53
+ detexted << counted
54
+ else
55
+ conversion_pipeline = self.dataset["conversion_pipeline"][dataset_keys[i]]
56
+ replaced = col.collect{|r| r||conversion_pipeline["average"]}
57
+ dist = conversion_pipeline["max"]-conversion_pipeline["min"]
58
+ detexted << replaced
59
+ detexted << replaced.collect{|r| (r-conversion_pipeline["min"]).to_f/dist} if dist > 0
60
+ detexted << replaced.collect{|r| (r-conversion_pipeline["average"]).to_f/conversion_pipeline["stdev"]} if conversion_pipeline["stdev"] > 0
61
+ detexted << replaced.collect{|r| r.abs}
62
+ end
63
+ end
64
+ return detexted.transpose
65
+ end
66
+
67
+ def clean_str(string)
68
+ string.
69
+ gsub(/[^A-Za-z0-9(),!?\'\`]/, " ").
70
+ gsub(" ", " ").
71
+ gsub("\'s", " \'s").
72
+ gsub("", "").
73
+ gsub("\'ve", " \'ve").
74
+ gsub("n\'t", " n\'t").
75
+ gsub("\'re", " \'re").
76
+ gsub("\'d", " \'d").
77
+ gsub("\'ll", " \'ll").
78
+ gsub(",", " , ").
79
+ gsub("!", " ! ").
80
+ gsub("\(", " \\( ").
81
+ gsub("\)", " \\) ").
82
+ gsub(" \\\( \\\( \\\( ", " \(\(\( ").
83
+ gsub(" \\\) \\\) \\\) ", " \)\)\) ").
84
+ gsub("\?", " \? ").
85
+ gsub(/\s{2,}/, " ").
86
+ gsub(Regexp.new("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"), "<URL/>").
87
+ gsub("www", " ").
88
+ gsub("com", " ").
89
+ gsub("org", " ").
90
+ strip.
91
+ downcase
92
+ end
93
+
94
+ def cast_val(value, directive)
95
+ if directive == "Integer"
96
+ return value.to_i
97
+ elsif directive == "Float"
98
+ return value.to_f
99
+ elsif directive == "Time"
100
+ if value.length == 10 and value.scan(/\d/).count == 10
101
+ return Time.at(value).to_i
102
+ elsif value.length == 13 and value.scan(/\d/).count == 13
103
+ return Time.at(value).to_i
104
+ else
105
+ return Chronic.parse(value).to_i
106
+ end
107
+ elsif directive == "Text" or directive == "Phrase"
108
+ return clean_str(value).split(" ").collect{|word| Stemmer::stem_word(word)}
109
+ elsif directive == "Categorical"
110
+ return value
111
+ end
112
+ end
113
+
114
+ def export_model
115
+ @client.export_model(@dataset_id)
116
+ end
117
+ end
118
+ end
@@ -1,114 +1,15 @@
1
1
  class DGaffML
2
2
  class Model
3
- attr_accessor :model
3
+ attr_accessor :dataset
4
4
  def initialize(client, model_response)
5
5
  @client = client
6
6
  @model = model_response
7
- @dataset_id = @model["id"]
7
+ @model_id = @model["id"]
8
8
  @user_id = @model["user_id"]
9
9
  end
10
10
 
11
- def predict(obs)
12
- predictions = @client.predict(@dataset_id, translate_obs(obs))
13
- if self.model["conversion_pipeline"].keys.include?("label")
14
- return predictions.collect{|x| self.model["conversion_pipeline"]["label"][x]}
15
- else
16
- return predictions
17
- end
18
- end
19
-
20
- def translate_obs(obs)
21
- model_keys = (self.model["conversion_pipeline"].keys-["label", "internal_headers"]).sort_by(&:to_i)
22
- model_classes = model_keys.collect{|k| self.model["col_classes"][k.to_i]}
23
- translated_rows = []
24
- obs.each do |row|
25
- translated_row = []
26
- row.each_with_index do |el, i|
27
- translated_row << cast_val(el, model_classes[i])
28
- end
29
- translated_rows << translated_row
30
- end
31
- self.convert(translated_rows, model_keys, model_classes)
32
- end
33
-
34
- def convert(rows, model_keys, model_classes)
35
- transposed = rows.transpose
36
- detexted = []
37
- labels = []
38
- transposed.each_with_index do |col, i|
39
- if model_classes[i] == "Phrase" || model_classes[i] == "Text"
40
- self.model["conversion_pipeline"][model_keys[i]]["unique_terms"].each do |term|
41
- counted = []
42
- col.each do |row|
43
- row = [row.to_s] if row.nil?
44
- counted << row.count(term)
45
- end
46
- detexted << counted
47
- end
48
- elsif model_classes[i] == "Categorical"
49
- counted = []
50
- col.each do |val|
51
- counted << self.model["conversion_pipeline"][model_keys[i]]["unique_terms"].index(val.to_s)
52
- end
53
- detexted << counted
54
- else
55
- conversion_pipeline = self.model["conversion_pipeline"][model_keys[i]]
56
- replaced = col.collect{|r| r||conversion_pipeline["average"]}
57
- dist = conversion_pipeline["max"]-conversion_pipeline["min"]
58
- detexted << replaced
59
- detexted << replaced.collect{|r| (r-conversion_pipeline["min"]).to_f/dist} if dist > 0
60
- detexted << replaced.collect{|r| (r-conversion_pipeline["average"]).to_f/conversion_pipeline["stdev"]} if conversion_pipeline["stdev"] > 0
61
- detexted << replaced.collect{|r| r.abs}
62
- end
63
- end
64
- return detexted.transpose
65
- end
66
-
67
- def clean_str(string)
68
- string.
69
- gsub(/[^A-Za-z0-9(),!?\'\`]/, " ").
70
- gsub(" ", " ").
71
- gsub("\'s", " \'s").
72
- gsub("", "").
73
- gsub("\'ve", " \'ve").
74
- gsub("n\'t", " n\'t").
75
- gsub("\'re", " \'re").
76
- gsub("\'d", " \'d").
77
- gsub("\'ll", " \'ll").
78
- gsub(",", " , ").
79
- gsub("!", " ! ").
80
- gsub("\(", " \\( ").
81
- gsub("\)", " \\) ").
82
- gsub(" \\\( \\\( \\\( ", " \(\(\( ").
83
- gsub(" \\\) \\\) \\\) ", " \)\)\) ").
84
- gsub("\?", " \? ").
85
- gsub(/\s{2,}/, " ").
86
- gsub(Regexp.new("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"), "<URL/>").
87
- gsub("www", " ").
88
- gsub("com", " ").
89
- gsub("org", " ").
90
- strip.
91
- downcase
92
- end
93
-
94
- def cast_val(value, directive)
95
- if directive == "Integer"
96
- return value.to_i
97
- elsif directive == "Float"
98
- return value.to_f
99
- elsif directive == "Time"
100
- if value.length == 10 and value.scan(/\d/).count == 10
101
- return Time.at(value).to_i
102
- elsif value.length == 13 and value.scan(/\d/).count == 13
103
- return Time.at(value).to_i
104
- else
105
- return Chronic.parse(value).to_i
106
- end
107
- elsif directive == "Text" or directive == "Phrase"
108
- return clean_str(value).split(" ").collect{|word| Stemmer::stem_word(word)}
109
- elsif directive == "Categorical"
110
- return value
111
- end
11
+ def apply_to_new_dataset(filepath, prediction_column)
12
+ @client.apply_to_new_dataset(@model_id, filepath, prediction_column)
112
13
  end
113
14
  end
114
15
  end
@@ -3,11 +3,15 @@ class DGaffML
3
3
  def self.hostname
4
4
  "http://machinelearning.devingaffney.com"
5
5
  end
6
-
6
+
7
7
  def self.login(user_id)
8
8
  JSON.parse(RestClient.get(hostname+"/api/#{user_id}").body)
9
9
  end
10
10
 
11
+ def self.new_dataset(user_id, filepath, prediction_column)
12
+ JSON.parse(RestClient.post(hostname+"/api/#{user_id}/new_dataset", {filesize: File.open(filepath).size/1024.0/1024, filename: filepath.split("/").last, csv_data: CSV.read(filepath).to_json, prediction_column: prediction_column}).body)
13
+ end
14
+
11
15
  def self.dataset(user_id, dataset_id)
12
16
  JSON.parse(RestClient.get(hostname+"/api/#{user_id}/dataset/#{dataset_id}").body)
13
17
  end
@@ -16,8 +20,25 @@ class DGaffML
16
20
  JSON.parse(RestClient.get(hostname+"/api/#{user_id}/datasets").body)
17
21
  end
18
22
 
23
+ def self.export_model(user_id, dataset_id)
24
+ JSON.parse(RestClient.get(hostname+"/api/#{user_id}/dataset/#{dataset_id}/export_model").body)
25
+ end
26
+
27
+ def self.model(user_id, model_id)
28
+ JSON.parse(RestClient.get(hostname+"/api/#{user_id}/model/#{model_id}").body)
29
+ end
30
+
31
+ def self.models(user_id)
32
+ JSON.parse(RestClient.get(hostname+"/api/#{user_id}/models").body)
33
+ end
34
+
35
+ def self.apply_to_new_dataset(user_id, model_id, filepath, prediction_column)
36
+ JSON.parse(RestClient.post(hostname+"/api/#{user_id}/model/#{model_id}/apply_to_new_dataset", {filesize: File.open(filepath).size/1024.0/1024, filename: filepath.split("/").last, csv_data: CSV.read(filepath).to_json, prediction_column: prediction_column}).body)
37
+ end
38
+
19
39
  def self.predict(user_id, dataset_id, obs)
20
40
  JSON.parse(RestClient.post(hostname+"/api/#{user_id}/predict/#{dataset_id}", {data: obs.to_json}).body)
21
41
  end
42
+
22
43
  end
23
44
  end
@@ -1,3 +1,3 @@
1
1
  module DgaffMl
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
data/lib/dgaff_ml.rb CHANGED
@@ -1,5 +1,7 @@
1
+ require 'csv'
1
2
  require "dgaff_ml/version"
2
3
  require File.expand_path('../dgaff_ml/client', __FILE__)
4
+ require File.expand_path('../dgaff_ml/dataset', __FILE__)
3
5
  require File.expand_path('../dgaff_ml/model', __FILE__)
4
6
  require File.expand_path('../dgaff_ml/request', __FILE__)
5
7
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dgaff_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Devin Gaffney
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-27 00:00:00.000000000 Z
11
+ date: 2017-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -93,9 +93,18 @@ files:
93
93
  - README.md
94
94
  - Rakefile
95
95
  - dgaff_ml.gemspec
96
+ - dgaff_ml/.gitignore
97
+ - dgaff_ml/Gemfile
98
+ - dgaff_ml/LICENSE.txt
99
+ - dgaff_ml/README.md
100
+ - dgaff_ml/Rakefile
101
+ - dgaff_ml/dgaff_ml.gemspec
102
+ - dgaff_ml/lib/dgaff_ml.rb
103
+ - dgaff_ml/lib/dgaff_ml/version.rb
96
104
  - echo.py
97
105
  - lib/dgaff_ml.rb
98
106
  - lib/dgaff_ml/client.rb
107
+ - lib/dgaff_ml/dataset.rb
99
108
  - lib/dgaff_ml/model.rb
100
109
  - lib/dgaff_ml/request.rb
101
110
  - lib/dgaff_ml/version.rb