yannitor 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/yannitor/cleaner.rb +15 -17
- data/lib/yannitor/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 33f5cfc511f104f0db28db5560043cb36c7908d3c4d52a34d81bca2ee796f9da
|
4
|
+
data.tar.gz: 0a8fbfd8ee32f9349639aa34ca4ab430cdbe2f85f0d9b026e4f3cf2f3b79218a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9776ff46e3ca3080f5449a9369c37898666f492aa1eb20ac474ee54099ed7a5d257e7385b6f5de579719010c8f69adbbd944de077195ed2ff6192c845b15aa57
|
7
|
+
data.tar.gz: cd485253ee2b1ae4a3446c82011f24c174af12b989cf9c720a062b9f2caa0a60839ace0a0e841375ce8b85783b3d938bb52d71a6638368639124d77c6689f85b
|
data/lib/yannitor/cleaner.rb
CHANGED
@@ -6,18 +6,11 @@ module Yannitor
|
|
6
6
|
module Broom
|
7
7
|
attr_accessor :yannitor_features
|
8
8
|
|
9
|
-
def yannitor_is_cleaning(
|
10
|
-
self.yannitor_features =
|
9
|
+
def yannitor_is_cleaning(features = {})
|
10
|
+
self.yannitor_features = features
|
11
11
|
end
|
12
12
|
|
13
13
|
def to_one_hot(target_column, type = 'text')
|
14
|
-
sorted_value_array = pluck("distinct(#{target_column})").join("'), ('")
|
15
|
-
|
16
|
-
table_name = self.table_name
|
17
|
-
values_select = %(
|
18
|
-
SELECT value FROM (values ('#{sorted_value_array}')) s(value)
|
19
|
-
)
|
20
|
-
|
21
14
|
self.select(%(
|
22
15
|
#{table_name}.id,
|
23
16
|
ARRAY_AGG(CASE
|
@@ -27,10 +20,15 @@ module Yannitor
|
|
27
20
|
END
|
28
21
|
) AS o#{target_column}
|
29
22
|
)).joins(%(
|
30
|
-
LEFT JOIN (#{
|
23
|
+
LEFT JOIN (#{values_for_select(target_column)}) AS sorted_value_table ON 1=1
|
31
24
|
)).group("#{table_name}.id")
|
32
25
|
end
|
33
26
|
|
27
|
+
def values_for_select(target_column)
|
28
|
+
sorted_values = pluck("distinct(#{target_column})").join("'), ('")
|
29
|
+
"SELECT value FROM (values ('#{sorted_values}')) s(value)"
|
30
|
+
end
|
31
|
+
|
34
32
|
def vectorize
|
35
33
|
select('*, ' + linear_feature_select).build_linear_features
|
36
34
|
end
|
@@ -45,17 +43,19 @@ module Yannitor
|
|
45
43
|
|
46
44
|
def linear_feature_select
|
47
45
|
yannitor_features[:linear].map do |feature|
|
48
|
-
|
49
|
-
max = all.maximum(feature)
|
50
|
-
"CAST((#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float) AS float) as n#{feature}"
|
46
|
+
"CAST(#{min_max(feature)} AS float) as n#{feature}"
|
51
47
|
end.join(', ')
|
52
48
|
end
|
53
49
|
|
54
|
-
def
|
50
|
+
def min_val(feature)
|
55
51
|
min = all.minimum(feature)
|
56
52
|
max = all.maximum(feature)
|
57
53
|
|
58
|
-
|
54
|
+
"(#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float)"
|
55
|
+
end
|
56
|
+
|
57
|
+
def nelect(feature)
|
58
|
+
select("*, #{min_max(feature)}::float as n#{feature}")
|
59
59
|
end
|
60
60
|
|
61
61
|
def normalize(feature)
|
@@ -72,8 +72,6 @@ module Yannitor
|
|
72
72
|
CSV.open(file_name, 'wb', col_sep: separator) do |csv|
|
73
73
|
all.vectorize.each { |v| csv << v }
|
74
74
|
end
|
75
|
-
|
76
|
-
nil
|
77
75
|
end
|
78
76
|
end
|
79
77
|
end
|
data/lib/yannitor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yannitor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danielius Visockas
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|