yannitor 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/yannitor/cleaner.rb +15 -17
- data/lib/yannitor/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 33f5cfc511f104f0db28db5560043cb36c7908d3c4d52a34d81bca2ee796f9da
|
4
|
+
data.tar.gz: 0a8fbfd8ee32f9349639aa34ca4ab430cdbe2f85f0d9b026e4f3cf2f3b79218a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9776ff46e3ca3080f5449a9369c37898666f492aa1eb20ac474ee54099ed7a5d257e7385b6f5de579719010c8f69adbbd944de077195ed2ff6192c845b15aa57
|
7
|
+
data.tar.gz: cd485253ee2b1ae4a3446c82011f24c174af12b989cf9c720a062b9f2caa0a60839ace0a0e841375ce8b85783b3d938bb52d71a6638368639124d77c6689f85b
|
data/lib/yannitor/cleaner.rb
CHANGED
@@ -6,18 +6,11 @@ module Yannitor
|
|
6
6
|
module Broom
|
7
7
|
attr_accessor :yannitor_features
|
8
8
|
|
9
|
-
def yannitor_is_cleaning(
|
10
|
-
self.yannitor_features =
|
9
|
+
def yannitor_is_cleaning(features = {})
|
10
|
+
self.yannitor_features = features
|
11
11
|
end
|
12
12
|
|
13
13
|
def to_one_hot(target_column, type = 'text')
|
14
|
-
sorted_value_array = pluck("distinct(#{target_column})").join("'), ('")
|
15
|
-
|
16
|
-
table_name = self.table_name
|
17
|
-
values_select = %(
|
18
|
-
SELECT value FROM (values ('#{sorted_value_array}')) s(value)
|
19
|
-
)
|
20
|
-
|
21
14
|
self.select(%(
|
22
15
|
#{table_name}.id,
|
23
16
|
ARRAY_AGG(CASE
|
@@ -27,10 +20,15 @@ module Yannitor
|
|
27
20
|
END
|
28
21
|
) AS o#{target_column}
|
29
22
|
)).joins(%(
|
30
|
-
LEFT JOIN (#{
|
23
|
+
LEFT JOIN (#{values_for_select(target_column)}) AS sorted_value_table ON 1=1
|
31
24
|
)).group("#{table_name}.id")
|
32
25
|
end
|
33
26
|
|
27
|
+
def values_for_select(target_column)
|
28
|
+
sorted_values = pluck("distinct(#{target_column})").join("'), ('")
|
29
|
+
"SELECT value FROM (values ('#{sorted_values}')) s(value)"
|
30
|
+
end
|
31
|
+
|
34
32
|
def vectorize
|
35
33
|
select('*, ' + linear_feature_select).build_linear_features
|
36
34
|
end
|
@@ -45,17 +43,19 @@ module Yannitor
|
|
45
43
|
|
46
44
|
def linear_feature_select
|
47
45
|
yannitor_features[:linear].map do |feature|
|
48
|
-
|
49
|
-
max = all.maximum(feature)
|
50
|
-
"CAST((#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float) AS float) as n#{feature}"
|
46
|
+
"CAST(#{min_max(feature)} AS float) as n#{feature}"
|
51
47
|
end.join(', ')
|
52
48
|
end
|
53
49
|
|
54
|
-
def
|
50
|
+
def min_val(feature)
|
55
51
|
min = all.minimum(feature)
|
56
52
|
max = all.maximum(feature)
|
57
53
|
|
58
|
-
|
54
|
+
"(#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float)"
|
55
|
+
end
|
56
|
+
|
57
|
+
def nelect(feature)
|
58
|
+
select("*, #{min_max(feature)}::float as n#{feature}")
|
59
59
|
end
|
60
60
|
|
61
61
|
def normalize(feature)
|
@@ -72,8 +72,6 @@ module Yannitor
|
|
72
72
|
CSV.open(file_name, 'wb', col_sep: separator) do |csv|
|
73
73
|
all.vectorize.each { |v| csv << v }
|
74
74
|
end
|
75
|
-
|
76
|
-
nil
|
77
75
|
end
|
78
76
|
end
|
79
77
|
end
|
data/lib/yannitor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yannitor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danielius Visockas
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|