everyday-cli-utils 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/everyday-cli-utils/safe/kmeans.rb +44 -46
- data/lib/everyday-cli-utils/version.rb +1 -1
- data/lib/everyday-cli-utils.rb +32 -1
- data/spec/everyday-cli-utils/format_spec.rb +2 -1
- data/spec/everyday-cli-utils/kmeans_spec.rb +2 -1
- data/spec/everyday-cli-utils/maputil_spec.rb +2 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5027d77ef68c7d0aff222110d71d71100b7f0aab
|
4
|
+
data.tar.gz: a66077fff89ce842c5805719343d311cc87c9173
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 536936fbe11f8c7a8af96a0ad9f283db2812221f37663ff1f8e69939fe8b72369439ede6c4d98ebaaa5ac4a34f3631e9f5749ef6a3397bfbe72d8294b797f115
|
7
|
+
data.tar.gz: 00f8f63bdc70ebdfaedc5295ab8d9bfb8d7afbb66821f5cf76a670b5d21e67d0d641023efe4dedc4238cfca734284edbd06c67f67386cb3688fe74e44c557df6
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative 'maputil'
|
2
2
|
|
3
3
|
module EverydayCliUtils
|
4
|
-
module
|
4
|
+
module KmeansUtil
|
5
5
|
def self.normal(x, avg, std)
|
6
6
|
exp = -(((x - avg) / std) ** 2.0) / 2.0
|
7
7
|
((Math.exp(exp) / (std * Math.sqrt(2.0 * Math::PI))))
|
@@ -22,16 +22,15 @@ module EverydayCliUtils
|
|
22
22
|
|
23
23
|
def self.f_test_uv(clusters, cnt, cnt2, means)
|
24
24
|
uv = 0.0
|
25
|
-
(0...means.count).each { |i|
|
26
|
-
unless clusters[i].empty?
|
27
|
-
(0...clusters[i].count).each { |j|
|
28
|
-
uv += (clusters[i][j] - means[i]) * (clusters[i][j] - means[i])
|
29
|
-
}
|
30
|
-
end
|
31
|
-
}
|
25
|
+
(0...means.count).each { |i| uv = f_test_uvi(clusters, i, means, uv) }
|
32
26
|
uv / (cnt - cnt2)
|
33
27
|
end
|
34
28
|
|
29
|
+
def self.f_test_uvi(clusters, i, means, uv)
|
30
|
+
(0...clusters[i].count).each { |j| uv += (clusters[i][j] - means[i]) * (clusters[i][j] - means[i]) } unless clusters[i].empty?
|
31
|
+
uv
|
32
|
+
end
|
33
|
+
|
35
34
|
def self.f_test2(clusters, means, cnt)
|
36
35
|
uv = 0.0
|
37
36
|
cnt2 = clusters.count { |i| !i.empty? }
|
@@ -41,13 +40,36 @@ module EverydayCliUtils
|
|
41
40
|
|
42
41
|
def self.f_test2_calc(clusters, i, means, uv)
|
43
42
|
tmp = 0.0
|
44
|
-
(0...clusters[i].count).each { |j|
|
45
|
-
tmp += (clusters[i][j] - means[i]) ** 2.0
|
46
|
-
}
|
43
|
+
(0...clusters[i].count).each { |j| tmp += (clusters[i][j] - means[i]) ** 2.0 }
|
47
44
|
tmp /= clusters[i].count
|
48
45
|
Math.sqrt(tmp)
|
49
46
|
end
|
50
47
|
|
48
|
+
def self.get_clusters(collection, means)
|
49
|
+
clusters = Array.new(means.count) { Array.new }
|
50
|
+
collection.each { |item|
|
51
|
+
cluster = false
|
52
|
+
distance = false
|
53
|
+
(0...means.count).each { |i|
|
54
|
+
diff = (means[i] - item).abs
|
55
|
+
if distance == false || diff < distance
|
56
|
+
cluster = i
|
57
|
+
distance = diff
|
58
|
+
end
|
59
|
+
}
|
60
|
+
clusters[cluster] << item
|
61
|
+
}
|
62
|
+
clusters
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.find_outliers(avg, cs, i, sensitivity)
|
66
|
+
csi = cs[i]
|
67
|
+
std = EverydayCliUtils::MapUtil.std_dev(csi)
|
68
|
+
cnt = csi.count
|
69
|
+
csi.select { |c| (normal(c, avg, std) * cnt) < sensitivity }
|
70
|
+
end
|
71
|
+
end
|
72
|
+
module Kmeans
|
51
73
|
def self.nmeans_setup_1(collection)
|
52
74
|
su = EverydayCliUtils::MapUtil.sum(collection)
|
53
75
|
cnt = collection.count
|
@@ -57,12 +79,12 @@ module EverydayCliUtils
|
|
57
79
|
end
|
58
80
|
|
59
81
|
def self.nmeans_setup_2(collection, avg, cnt, ks1)
|
60
|
-
cso = get_clusters(collection, ks1)
|
61
|
-
ft1 = f_test2(cso, ks1, cnt)
|
82
|
+
cso = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks1)
|
83
|
+
ft1 = EverydayCliUtils::KmeansUtil.f_test2(cso, ks1, cnt)
|
62
84
|
ks = kmeans(collection, 2)
|
63
|
-
cs = get_clusters(collection, ks)
|
64
|
-
ft = f_test(cs, ks, cnt, avg)
|
65
|
-
ft2 = f_test2(cs, ks, cnt)
|
85
|
+
cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks)
|
86
|
+
ft = EverydayCliUtils::KmeansUtil.f_test(cs, ks, cnt, avg)
|
87
|
+
ft2 = EverydayCliUtils::KmeansUtil.f_test2(cs, ks, cnt)
|
66
88
|
return ft, ft1, ft2, ks
|
67
89
|
end
|
68
90
|
|
@@ -71,9 +93,9 @@ module EverydayCliUtils
|
|
71
93
|
fto = ft
|
72
94
|
fto2 = ft2
|
73
95
|
ks = kmeans(collection, k)
|
74
|
-
cs = get_clusters(collection, ks)
|
75
|
-
ft = f_test(cs, ks, cnt, avg)
|
76
|
-
ft2 = f_test2(cs, ks, cnt)
|
96
|
+
cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks)
|
97
|
+
ft = EverydayCliUtils::KmeansUtil.f_test(cs, ks, cnt, avg)
|
98
|
+
ft2 = EverydayCliUtils::KmeansUtil.f_test2(cs, ks, cnt)
|
77
99
|
return ft, ft2, fto, fto2, ks, kso
|
78
100
|
end
|
79
101
|
|
@@ -87,30 +109,13 @@ module EverydayCliUtils
|
|
87
109
|
|
88
110
|
def self.run_kmean(collection, ks)
|
89
111
|
kso = ks
|
90
|
-
clusters = get_clusters(collection, kso)
|
112
|
+
clusters = EverydayCliUtils::KmeansUtil.get_clusters(collection, kso)
|
91
113
|
ks = []
|
92
114
|
clusters.each_with_index { |val, key| ks[key] = (val.count <= 0) ? kso[key] : (val.sum / val.count) }
|
93
115
|
ks.sort
|
94
116
|
return kso, ks
|
95
117
|
end
|
96
118
|
|
97
|
-
def self.get_clusters(collection, means)
|
98
|
-
clusters = Array.new(means.count) { Array.new }
|
99
|
-
collection.each { |item|
|
100
|
-
cluster = false
|
101
|
-
distance = false
|
102
|
-
(0...means.count).each { |i|
|
103
|
-
diff = (means[i] - item).abs
|
104
|
-
if distance == false || diff < distance
|
105
|
-
cluster = i
|
106
|
-
distance = diff
|
107
|
-
end
|
108
|
-
}
|
109
|
-
clusters[cluster] << item
|
110
|
-
}
|
111
|
-
clusters
|
112
|
-
end
|
113
|
-
|
114
119
|
def self.kmeans(collection, k)
|
115
120
|
mi = collection.min
|
116
121
|
ma = collection.max
|
@@ -132,20 +137,13 @@ module EverydayCliUtils
|
|
132
137
|
run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold)
|
133
138
|
end
|
134
139
|
|
135
|
-
def self.find_outliers(avg, cs, i, sensitivity)
|
136
|
-
csi = cs[i]
|
137
|
-
std = EverydayCliUtils::MapUtil.std_dev(csi)
|
138
|
-
cnt = csi.count
|
139
|
-
csi.select { |c| (normal(c, avg, std) * cnt) < sensitivity }
|
140
|
-
end
|
141
|
-
|
142
140
|
def self.outliers(collection, sensitivity = 0.5, k = nil)
|
143
141
|
ks = k.nil? ? nmeans(collection) : kmeans(collection, k)
|
144
|
-
cs = get_clusters(collection, ks)
|
142
|
+
cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks)
|
145
143
|
|
146
144
|
outliers = []
|
147
145
|
|
148
|
-
ks.each_with_index { |avg, i| outliers += find_outliers(avg, cs, i, sensitivity) }
|
146
|
+
ks.each_with_index { |avg, i| outliers += EverydayCliUtils::KmeansUtil.find_outliers(avg, cs, i, sensitivity) }
|
149
147
|
outliers
|
150
148
|
end
|
151
149
|
end
|
data/lib/everyday-cli-utils.rb
CHANGED
@@ -1,5 +1,36 @@
|
|
1
1
|
require_relative 'everyday-cli-utils/version'
|
2
2
|
|
3
3
|
module EverydayCliUtils
|
4
|
-
|
4
|
+
AVAILABLE_MODULES = [:ask, :format, :format_safe, :histogram, :histogram_safe, :kmeans, :kmeans_safe, :maputil, :maputil_safe, :mycurses, :option]
|
5
|
+
|
6
|
+
def self.import(*names)
|
7
|
+
names.each { |name|
|
8
|
+
case (name)
|
9
|
+
when :ask
|
10
|
+
require_relative 'everyday-cli-utils/ask'
|
11
|
+
when :format
|
12
|
+
require_relative 'everyday-cli-utils/format'
|
13
|
+
when :format_safe
|
14
|
+
require_relative 'everyday-cli-utils/safe/format'
|
15
|
+
when :histogram
|
16
|
+
require_relative 'everyday-cli-utils/histogram'
|
17
|
+
when :histogram_safe
|
18
|
+
require_relative 'everyday-cli-utils/safe/histogram'
|
19
|
+
when :kmeans
|
20
|
+
require_relative 'everyday-cli-utils/kmeans'
|
21
|
+
when :kmeans_safe
|
22
|
+
require_relative 'everyday-cli-utils/safe/kmeans'
|
23
|
+
when :maputil
|
24
|
+
require_relative 'everyday-cli-utils/maputil'
|
25
|
+
when :maputil_safe
|
26
|
+
require_relative 'everyday-cli-utils/safe/maputil'
|
27
|
+
when :mycurses
|
28
|
+
require_relative 'everyday-cli-utils/mycurses'
|
29
|
+
when :option
|
30
|
+
require_relative 'everyday-cli-utils/option'
|
31
|
+
else
|
32
|
+
raise "#{name.to_s} not found!"
|
33
|
+
end
|
34
|
+
}
|
35
|
+
end
|
5
36
|
end
|