yannitor 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 86f4b0ff2fca0aaa5270a85df9fcf5a802154eed
4
- data.tar.gz: 55d3f801ad79c800177ca453e08be692eedaf249
2
+ SHA256:
3
+ metadata.gz: 3aa7e3ec52c46173c2b250128ae91232b56f5f77128b96076e03a4bb754d7ebc
4
+ data.tar.gz: 96ba16f09a7d85186d97228758f4a2ff91f996f45204f05f82b6fb1c1b741c4f
5
5
  SHA512:
6
- metadata.gz: b636f749e8a5bae71eb3cbf953925522279ea8272ee6a1b1b63620617743d878d7202f5182d5486debe33034055f6c544fdbb15e47bbf471be8455eac4ccf998
7
- data.tar.gz: 7ac6c13650a2463901647d809773060206e4c6c684f6a731eece17f95ce34acb34d52bfeaecb15e73a81052aee685bf4cc81c11eca8e42e519acab2e5e87b23f
6
+ metadata.gz: f786eaffca03c775ab53f0931ce45ca1c66896132e9a3d052547d4a71711d32ef044378370b077b4bdfcbeea79f939ee002ff18f8fe821367113de0aa694f7d8
7
+ data.tar.gz: f87a486ac25649dab0f2e6ffb10e3030061d6e6d96c12e938fa4e2efb0e9f43b2b12720dcb29ccf5befc26cb78d28b5bbe135e60a430eeb84dac970e7f552208
@@ -1,59 +1,64 @@
1
- require 'active_record'
2
- # require 'active_record/version'
3
- # require 'active_support/core_ext/module'
1
+ # frozen_string_literal: true
4
2
 
5
- # begin
6
- # require 'rails/engine'
7
- # end
3
+ require 'active_record'
8
4
 
9
5
  module Yannitor
10
6
  module Broom
11
- attr_accessor :features
7
+ attr_accessor :yannitor_features
12
8
 
13
9
  def yannitor_is_cleaning(feats = {})
14
- self.features = feats
10
+ self.yannitor_features = feats
15
11
  end
16
12
 
17
- def to_one_hot target_column, type = 'text'
18
- sorted_value_array = self.pluck("distinct(#{target_column})")
19
-
20
- _table_name = self.table_name
21
- values_select = %Q(SELECT value FROM (values ('#{ sorted_value_array.join("'), ('") }')) s(value))
22
-
23
- self.select(%Q(
24
- #{_table_name}.id,
25
- ARRAY_AGG(CASE WHEN sorted_value_table.value::#{type} = #{_table_name}.#{target_column}::#{type} THEN 1 ELSE 0 END) AS o#{target_column}
26
- )).joins(%Q(
13
+ def to_one_hot(target_column, type = 'text')
14
+ sorted_value_array = pluck("distinct(#{target_column})").join("'), ('")
15
+
16
+ table_name = self.table_name
17
+ values_select = %(
18
+ SELECT value FROM (values ('#{sorted_value_array}')) s(value)
19
+ )
20
+
21
+ self.select(%(
22
+ #{table_name}.id,
23
+ ARRAY_AGG(CASE
24
+ WHEN sorted_value_table.value::#{type} = #{table_name}.#{target_column}::#{type}
25
+ THEN 1
26
+ ELSE 0
27
+ END
28
+ ) AS o#{target_column}
29
+ )).joins(%(
27
30
  LEFT JOIN (#{values_select}) AS sorted_value_table ON 1=1
28
- )).group("#{_table_name}.id")
31
+ )).group("#{table_name}.id")
29
32
  end
30
33
 
31
34
  def vectorize
32
- _table_name = self.table_name
33
-
34
- select('*, ' + features[:linear].map do |feature|
35
- min = all.minimum(feature)
36
- max = all.maximum(feature)
37
- "CAST((#{_table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float) AS float) as n#{feature}"
38
- end.join(', ')).all.map do |obj|
35
+ select('*, ' + linear_feature_select).build_linear_features
36
+ end
39
37
 
40
- obj.class.features[:linear].map do |feature|
38
+ def build_linear_features
39
+ all.map do |obj|
40
+ obj.class.yannitor_features[:linear].map do |feature|
41
41
  obj.send("n#{feature}").to_f
42
42
  end
43
43
  end
44
44
  end
45
45
 
46
- def nelect(feature)
47
- _table_name = self.table_name
46
+ def linear_feature_select
47
+ yannitor_features[:linear].map do |feature|
48
+ min = all.minimum(feature)
49
+ max = all.maximum(feature)
50
+ "CAST((#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float) AS float) as n#{feature}"
51
+ end.join(', ')
52
+ end
48
53
 
54
+ def nelect(feature)
49
55
  min = all.minimum(feature)
50
56
  max = all.maximum(feature)
51
-
52
- self.select("*, (#{_table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float)::float as n#{feature}")
57
+
58
+ select("*, (#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float)::float as n#{feature}")
53
59
  end
54
60
 
55
61
  def normalize(feature)
56
- print "Normalizing #{feature}"
57
62
  min = all.minimum(feature)
58
63
  max = all.maximum(feature)
59
64
  data = all.nelect(feature).map do |e|
@@ -63,14 +68,13 @@ module Yannitor
63
68
  [data, min, max]
64
69
  end
65
70
 
66
- def to_file
67
- CSV.open("data.csv", "wb", {col_sep: ' '}) do |csv|
71
+ def to_file(file_name = 'data.csv', separator = ' ')
72
+ CSV.open(file_name, 'wb', col_sep: separator) do |csv|
68
73
  all.vectorize.each { |v| csv << v }
69
74
  end
70
75
 
71
76
  nil
72
77
  end
73
-
74
78
  end
75
79
  end
76
80
 
@@ -1,3 +1,3 @@
1
1
  module Yannitor
2
- VERSION = "0.5.0"
2
+ VERSION = "0.6.0"
3
3
  end
data/lib/yannitor.rb CHANGED
@@ -1,6 +1,7 @@
1
- require "yannitor/version"
2
- require "yannitor/cleaner"
1
+ # frozen_string_literal: true
2
+
3
+ require 'yannitor/version'
4
+ require 'yannitor/cleaner'
3
5
 
4
6
  module Yannitor
5
- # Your code goes here...
6
7
  end
data/yannitor.gemspec CHANGED
@@ -7,21 +7,12 @@ Gem::Specification.new do |spec|
7
7
  spec.name = "yannitor"
8
8
  spec.version = Yannitor::VERSION
9
9
  spec.authors = ["Danielius Visockas"]
10
- spec.email = ["danielius@wisemonks.com"]
10
+ spec.email = ["danieliusvisockas@gmail.com"]
11
11
 
12
- spec.summary = %q{Gem for preprocessing Active record collections}
12
+ spec.summary = %q{Helps you build one-hot or min-max encoded vectors from ActiveRecord collections}
13
13
  spec.description = %q{I'll clean your data}
14
14
  spec.homepage = "https://github.com"
15
15
 
16
- # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
17
- # to allow pushing to a single host or delete this section to allow pushing to any host.
18
- # if spec.respond_to?(:metadata)
19
- # spec.metadata['allowed_push_host'] = "'http://mygemserver.com'"
20
- # else
21
- # raise "RubyGems 2.0 or newer is required to protect against " \
22
- # "public gem pushes."
23
- # end
24
-
25
16
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
26
17
  f.match(%r{^(test|spec|features)/})
27
18
  end
@@ -29,6 +20,7 @@ Gem::Specification.new do |spec|
29
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
21
  spec.require_paths = ["lib"]
31
22
 
23
+ spec.add_runtime_dependency "active_record", ["> 3.2.0"]
32
24
  spec.add_development_dependency "bundler", "~> 1.14"
33
25
  spec.add_development_dependency "rake", "~> 10.0"
34
26
  spec.add_development_dependency "rspec", "~> 3.0"
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yannitor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danielius Visockas
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-11 00:00:00.000000000 Z
11
+ date: 2019-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: active_record
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">"
18
+ - !ruby/object:Gem::Version
19
+ version: 3.2.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">"
25
+ - !ruby/object:Gem::Version
26
+ version: 3.2.0
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: bundler
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -54,7 +68,7 @@ dependencies:
54
68
  version: '3.0'
55
69
  description: I'll clean your data
56
70
  email:
57
- - danielius@wisemonks.com
71
+ - danieliusvisockas@gmail.com
58
72
  executables: []
59
73
  extensions: []
60
74
  extra_rdoc_files: []
@@ -91,8 +105,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
91
105
  version: '0'
92
106
  requirements: []
93
107
  rubyforge_project:
94
- rubygems_version: 2.6.14
108
+ rubygems_version: 2.7.8
95
109
  signing_key:
96
110
  specification_version: 4
97
- summary: Gem for preprocessing Active record collections
111
+ summary: Helps you build one-hot or min-max encoded vectors from ActiveRecord collections
98
112
  test_files: []