yannitor 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/yannitor/cleaner.rb +39 -35
- data/lib/yannitor/version.rb +1 -1
- data/lib/yannitor.rb +4 -3
- data/yannitor.gemspec +3 -11
- metadata +19 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3aa7e3ec52c46173c2b250128ae91232b56f5f77128b96076e03a4bb754d7ebc
|
4
|
+
data.tar.gz: 96ba16f09a7d85186d97228758f4a2ff91f996f45204f05f82b6fb1c1b741c4f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f786eaffca03c775ab53f0931ce45ca1c66896132e9a3d052547d4a71711d32ef044378370b077b4bdfcbeea79f939ee002ff18f8fe821367113de0aa694f7d8
|
7
|
+
data.tar.gz: f87a486ac25649dab0f2e6ffb10e3030061d6e6d96c12e938fa4e2efb0e9f43b2b12720dcb29ccf5befc26cb78d28b5bbe135e60a430eeb84dac970e7f552208
|
data/lib/yannitor/cleaner.rb
CHANGED
@@ -1,59 +1,64 @@
|
|
1
|
-
|
2
|
-
# require 'active_record/version'
|
3
|
-
# require 'active_support/core_ext/module'
|
1
|
+
# frozen_string_literal: true
|
4
2
|
|
5
|
-
|
6
|
-
# require 'rails/engine'
|
7
|
-
# end
|
3
|
+
require 'active_record'
|
8
4
|
|
9
5
|
module Yannitor
|
10
6
|
module Broom
|
11
|
-
attr_accessor :
|
7
|
+
attr_accessor :yannitor_features
|
12
8
|
|
13
9
|
def yannitor_is_cleaning(feats = {})
|
14
|
-
self.
|
10
|
+
self.yannitor_features = feats
|
15
11
|
end
|
16
12
|
|
17
|
-
def to_one_hot
|
18
|
-
sorted_value_array =
|
19
|
-
|
20
|
-
|
21
|
-
values_select = %
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
13
|
+
def to_one_hot(target_column, type = 'text')
|
14
|
+
sorted_value_array = pluck("distinct(#{target_column})").join("'), ('")
|
15
|
+
|
16
|
+
table_name = self.table_name
|
17
|
+
values_select = %(
|
18
|
+
SELECT value FROM (values ('#{sorted_value_array}')) s(value)
|
19
|
+
)
|
20
|
+
|
21
|
+
self.select(%(
|
22
|
+
#{table_name}.id,
|
23
|
+
ARRAY_AGG(CASE
|
24
|
+
WHEN sorted_value_table.value::#{type} = #{table_name}.#{target_column}::#{type}
|
25
|
+
THEN 1
|
26
|
+
ELSE 0
|
27
|
+
END
|
28
|
+
) AS o#{target_column}
|
29
|
+
)).joins(%(
|
27
30
|
LEFT JOIN (#{values_select}) AS sorted_value_table ON 1=1
|
28
|
-
)).group("#{
|
31
|
+
)).group("#{table_name}.id")
|
29
32
|
end
|
30
33
|
|
31
34
|
def vectorize
|
32
|
-
|
33
|
-
|
34
|
-
select('*, ' + features[:linear].map do |feature|
|
35
|
-
min = all.minimum(feature)
|
36
|
-
max = all.maximum(feature)
|
37
|
-
"CAST((#{_table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float) AS float) as n#{feature}"
|
38
|
-
end.join(', ')).all.map do |obj|
|
35
|
+
select('*, ' + linear_feature_select).build_linear_features
|
36
|
+
end
|
39
37
|
|
40
|
-
|
38
|
+
def build_linear_features
|
39
|
+
all.map do |obj|
|
40
|
+
obj.class.yannitor_features[:linear].map do |feature|
|
41
41
|
obj.send("n#{feature}").to_f
|
42
42
|
end
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
-
def
|
47
|
-
|
46
|
+
def linear_feature_select
|
47
|
+
yannitor_features[:linear].map do |feature|
|
48
|
+
min = all.minimum(feature)
|
49
|
+
max = all.maximum(feature)
|
50
|
+
"CAST((#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float) AS float) as n#{feature}"
|
51
|
+
end.join(', ')
|
52
|
+
end
|
48
53
|
|
54
|
+
def nelect(feature)
|
49
55
|
min = all.minimum(feature)
|
50
56
|
max = all.maximum(feature)
|
51
|
-
|
52
|
-
|
57
|
+
|
58
|
+
select("*, (#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float)::float as n#{feature}")
|
53
59
|
end
|
54
60
|
|
55
61
|
def normalize(feature)
|
56
|
-
print "Normalizing #{feature}"
|
57
62
|
min = all.minimum(feature)
|
58
63
|
max = all.maximum(feature)
|
59
64
|
data = all.nelect(feature).map do |e|
|
@@ -63,14 +68,13 @@ module Yannitor
|
|
63
68
|
[data, min, max]
|
64
69
|
end
|
65
70
|
|
66
|
-
def to_file
|
67
|
-
CSV.open(
|
71
|
+
def to_file(file_name = 'data.csv', separator = ' ')
|
72
|
+
CSV.open(file_name, 'wb', col_sep: separator) do |csv|
|
68
73
|
all.vectorize.each { |v| csv << v }
|
69
74
|
end
|
70
75
|
|
71
76
|
nil
|
72
77
|
end
|
73
|
-
|
74
78
|
end
|
75
79
|
end
|
76
80
|
|
data/lib/yannitor/version.rb
CHANGED
data/lib/yannitor.rb
CHANGED
data/yannitor.gemspec
CHANGED
@@ -7,21 +7,12 @@ Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "yannitor"
|
8
8
|
spec.version = Yannitor::VERSION
|
9
9
|
spec.authors = ["Danielius Visockas"]
|
10
|
-
spec.email = ["
|
10
|
+
spec.email = ["danieliusvisockas@gmail.com"]
|
11
11
|
|
12
|
-
spec.summary = %q{
|
12
|
+
spec.summary = %q{Helps you build one-hot or min-max encoded vectors from ActiveRecord collections}
|
13
13
|
spec.description = %q{I'll clean your data}
|
14
14
|
spec.homepage = "https://github.com"
|
15
15
|
|
16
|
-
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
17
|
-
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
18
|
-
# if spec.respond_to?(:metadata)
|
19
|
-
# spec.metadata['allowed_push_host'] = "'http://mygemserver.com'"
|
20
|
-
# else
|
21
|
-
# raise "RubyGems 2.0 or newer is required to protect against " \
|
22
|
-
# "public gem pushes."
|
23
|
-
# end
|
24
|
-
|
25
16
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
26
17
|
f.match(%r{^(test|spec|features)/})
|
27
18
|
end
|
@@ -29,6 +20,7 @@ Gem::Specification.new do |spec|
|
|
29
20
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
30
21
|
spec.require_paths = ["lib"]
|
31
22
|
|
23
|
+
spec.add_runtime_dependency "active_record", ["> 3.2.0"]
|
32
24
|
spec.add_development_dependency "bundler", "~> 1.14"
|
33
25
|
spec.add_development_dependency "rake", "~> 10.0"
|
34
26
|
spec.add_development_dependency "rspec", "~> 3.0"
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yannitor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danielius Visockas
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: active_record
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.2.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.2.0
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -54,7 +68,7 @@ dependencies:
|
|
54
68
|
version: '3.0'
|
55
69
|
description: I'll clean your data
|
56
70
|
email:
|
57
|
-
-
|
71
|
+
- danieliusvisockas@gmail.com
|
58
72
|
executables: []
|
59
73
|
extensions: []
|
60
74
|
extra_rdoc_files: []
|
@@ -91,8 +105,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
91
105
|
version: '0'
|
92
106
|
requirements: []
|
93
107
|
rubyforge_project:
|
94
|
-
rubygems_version: 2.
|
108
|
+
rubygems_version: 2.7.8
|
95
109
|
signing_key:
|
96
110
|
specification_version: 4
|
97
|
-
summary:
|
111
|
+
summary: Helps you build one-hot or min-max encoded vectors from ActiveRecord collections
|
98
112
|
test_files: []
|