everyday-cli-utils 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/everyday-cli-utils/safe/kmeans.rb +44 -46
- data/lib/everyday-cli-utils/version.rb +1 -1
- data/lib/everyday-cli-utils.rb +32 -1
- data/spec/everyday-cli-utils/format_spec.rb +2 -1
- data/spec/everyday-cli-utils/kmeans_spec.rb +2 -1
- data/spec/everyday-cli-utils/maputil_spec.rb +2 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5027d77ef68c7d0aff222110d71d71100b7f0aab
|
4
|
+
data.tar.gz: a66077fff89ce842c5805719343d311cc87c9173
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 536936fbe11f8c7a8af96a0ad9f283db2812221f37663ff1f8e69939fe8b72369439ede6c4d98ebaaa5ac4a34f3631e9f5749ef6a3397bfbe72d8294b797f115
|
7
|
+
data.tar.gz: 00f8f63bdc70ebdfaedc5295ab8d9bfb8d7afbb66821f5cf76a670b5d21e67d0d641023efe4dedc4238cfca734284edbd06c67f67386cb3688fe74e44c557df6
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative 'maputil'
|
2
2
|
|
3
3
|
module EverydayCliUtils
|
4
|
-
module
|
4
|
+
module KmeansUtil
|
5
5
|
def self.normal(x, avg, std)
|
6
6
|
exp = -(((x - avg) / std) ** 2.0) / 2.0
|
7
7
|
((Math.exp(exp) / (std * Math.sqrt(2.0 * Math::PI))))
|
@@ -22,16 +22,15 @@ module EverydayCliUtils
|
|
22
22
|
|
23
23
|
def self.f_test_uv(clusters, cnt, cnt2, means)
|
24
24
|
uv = 0.0
|
25
|
-
(0...means.count).each { |i|
|
26
|
-
unless clusters[i].empty?
|
27
|
-
(0...clusters[i].count).each { |j|
|
28
|
-
uv += (clusters[i][j] - means[i]) * (clusters[i][j] - means[i])
|
29
|
-
}
|
30
|
-
end
|
31
|
-
}
|
25
|
+
(0...means.count).each { |i| uv = f_test_uvi(clusters, i, means, uv) }
|
32
26
|
uv / (cnt - cnt2)
|
33
27
|
end
|
34
28
|
|
29
|
+
def self.f_test_uvi(clusters, i, means, uv)
|
30
|
+
(0...clusters[i].count).each { |j| uv += (clusters[i][j] - means[i]) * (clusters[i][j] - means[i]) } unless clusters[i].empty?
|
31
|
+
uv
|
32
|
+
end
|
33
|
+
|
35
34
|
def self.f_test2(clusters, means, cnt)
|
36
35
|
uv = 0.0
|
37
36
|
cnt2 = clusters.count { |i| !i.empty? }
|
@@ -41,13 +40,36 @@ module EverydayCliUtils
|
|
41
40
|
|
42
41
|
def self.f_test2_calc(clusters, i, means, uv)
|
43
42
|
tmp = 0.0
|
44
|
-
(0...clusters[i].count).each { |j|
|
45
|
-
tmp += (clusters[i][j] - means[i]) ** 2.0
|
46
|
-
}
|
43
|
+
(0...clusters[i].count).each { |j| tmp += (clusters[i][j] - means[i]) ** 2.0 }
|
47
44
|
tmp /= clusters[i].count
|
48
45
|
Math.sqrt(tmp)
|
49
46
|
end
|
50
47
|
|
48
|
+
def self.get_clusters(collection, means)
|
49
|
+
clusters = Array.new(means.count) { Array.new }
|
50
|
+
collection.each { |item|
|
51
|
+
cluster = false
|
52
|
+
distance = false
|
53
|
+
(0...means.count).each { |i|
|
54
|
+
diff = (means[i] - item).abs
|
55
|
+
if distance == false || diff < distance
|
56
|
+
cluster = i
|
57
|
+
distance = diff
|
58
|
+
end
|
59
|
+
}
|
60
|
+
clusters[cluster] << item
|
61
|
+
}
|
62
|
+
clusters
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.find_outliers(avg, cs, i, sensitivity)
|
66
|
+
csi = cs[i]
|
67
|
+
std = EverydayCliUtils::MapUtil.std_dev(csi)
|
68
|
+
cnt = csi.count
|
69
|
+
csi.select { |c| (normal(c, avg, std) * cnt) < sensitivity }
|
70
|
+
end
|
71
|
+
end
|
72
|
+
module Kmeans
|
51
73
|
def self.nmeans_setup_1(collection)
|
52
74
|
su = EverydayCliUtils::MapUtil.sum(collection)
|
53
75
|
cnt = collection.count
|
@@ -57,12 +79,12 @@ module EverydayCliUtils
|
|
57
79
|
end
|
58
80
|
|
59
81
|
def self.nmeans_setup_2(collection, avg, cnt, ks1)
|
60
|
-
cso = get_clusters(collection, ks1)
|
61
|
-
ft1 = f_test2(cso, ks1, cnt)
|
82
|
+
cso = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks1)
|
83
|
+
ft1 = EverydayCliUtils::KmeansUtil.f_test2(cso, ks1, cnt)
|
62
84
|
ks = kmeans(collection, 2)
|
63
|
-
cs = get_clusters(collection, ks)
|
64
|
-
ft = f_test(cs, ks, cnt, avg)
|
65
|
-
ft2 = f_test2(cs, ks, cnt)
|
85
|
+
cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks)
|
86
|
+
ft = EverydayCliUtils::KmeansUtil.f_test(cs, ks, cnt, avg)
|
87
|
+
ft2 = EverydayCliUtils::KmeansUtil.f_test2(cs, ks, cnt)
|
66
88
|
return ft, ft1, ft2, ks
|
67
89
|
end
|
68
90
|
|
@@ -71,9 +93,9 @@ module EverydayCliUtils
|
|
71
93
|
fto = ft
|
72
94
|
fto2 = ft2
|
73
95
|
ks = kmeans(collection, k)
|
74
|
-
cs = get_clusters(collection, ks)
|
75
|
-
ft = f_test(cs, ks, cnt, avg)
|
76
|
-
ft2 = f_test2(cs, ks, cnt)
|
96
|
+
cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks)
|
97
|
+
ft = EverydayCliUtils::KmeansUtil.f_test(cs, ks, cnt, avg)
|
98
|
+
ft2 = EverydayCliUtils::KmeansUtil.f_test2(cs, ks, cnt)
|
77
99
|
return ft, ft2, fto, fto2, ks, kso
|
78
100
|
end
|
79
101
|
|
@@ -87,30 +109,13 @@ module EverydayCliUtils
|
|
87
109
|
|
88
110
|
def self.run_kmean(collection, ks)
|
89
111
|
kso = ks
|
90
|
-
clusters = get_clusters(collection, kso)
|
112
|
+
clusters = EverydayCliUtils::KmeansUtil.get_clusters(collection, kso)
|
91
113
|
ks = []
|
92
114
|
clusters.each_with_index { |val, key| ks[key] = (val.count <= 0) ? kso[key] : (val.sum / val.count) }
|
93
115
|
ks.sort
|
94
116
|
return kso, ks
|
95
117
|
end
|
96
118
|
|
97
|
-
def self.get_clusters(collection, means)
|
98
|
-
clusters = Array.new(means.count) { Array.new }
|
99
|
-
collection.each { |item|
|
100
|
-
cluster = false
|
101
|
-
distance = false
|
102
|
-
(0...means.count).each { |i|
|
103
|
-
diff = (means[i] - item).abs
|
104
|
-
if distance == false || diff < distance
|
105
|
-
cluster = i
|
106
|
-
distance = diff
|
107
|
-
end
|
108
|
-
}
|
109
|
-
clusters[cluster] << item
|
110
|
-
}
|
111
|
-
clusters
|
112
|
-
end
|
113
|
-
|
114
119
|
def self.kmeans(collection, k)
|
115
120
|
mi = collection.min
|
116
121
|
ma = collection.max
|
@@ -132,20 +137,13 @@ module EverydayCliUtils
|
|
132
137
|
run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold)
|
133
138
|
end
|
134
139
|
|
135
|
-
def self.find_outliers(avg, cs, i, sensitivity)
|
136
|
-
csi = cs[i]
|
137
|
-
std = EverydayCliUtils::MapUtil.std_dev(csi)
|
138
|
-
cnt = csi.count
|
139
|
-
csi.select { |c| (normal(c, avg, std) * cnt) < sensitivity }
|
140
|
-
end
|
141
|
-
|
142
140
|
def self.outliers(collection, sensitivity = 0.5, k = nil)
|
143
141
|
ks = k.nil? ? nmeans(collection) : kmeans(collection, k)
|
144
|
-
cs = get_clusters(collection, ks)
|
142
|
+
cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks)
|
145
143
|
|
146
144
|
outliers = []
|
147
145
|
|
148
|
-
ks.each_with_index { |avg, i| outliers += find_outliers(avg, cs, i, sensitivity) }
|
146
|
+
ks.each_with_index { |avg, i| outliers += EverydayCliUtils::KmeansUtil.find_outliers(avg, cs, i, sensitivity) }
|
149
147
|
outliers
|
150
148
|
end
|
151
149
|
end
|
data/lib/everyday-cli-utils.rb
CHANGED
@@ -1,5 +1,36 @@
|
|
1
1
|
require_relative 'everyday-cli-utils/version'
|
2
2
|
|
3
3
|
module EverydayCliUtils
|
4
|
-
|
4
|
+
AVAILABLE_MODULES = [:ask, :format, :format_safe, :histogram, :histogram_safe, :kmeans, :kmeans_safe, :maputil, :maputil_safe, :mycurses, :option]
|
5
|
+
|
6
|
+
def self.import(*names)
|
7
|
+
names.each { |name|
|
8
|
+
case (name)
|
9
|
+
when :ask
|
10
|
+
require_relative 'everyday-cli-utils/ask'
|
11
|
+
when :format
|
12
|
+
require_relative 'everyday-cli-utils/format'
|
13
|
+
when :format_safe
|
14
|
+
require_relative 'everyday-cli-utils/safe/format'
|
15
|
+
when :histogram
|
16
|
+
require_relative 'everyday-cli-utils/histogram'
|
17
|
+
when :histogram_safe
|
18
|
+
require_relative 'everyday-cli-utils/safe/histogram'
|
19
|
+
when :kmeans
|
20
|
+
require_relative 'everyday-cli-utils/kmeans'
|
21
|
+
when :kmeans_safe
|
22
|
+
require_relative 'everyday-cli-utils/safe/kmeans'
|
23
|
+
when :maputil
|
24
|
+
require_relative 'everyday-cli-utils/maputil'
|
25
|
+
when :maputil_safe
|
26
|
+
require_relative 'everyday-cli-utils/safe/maputil'
|
27
|
+
when :mycurses
|
28
|
+
require_relative 'everyday-cli-utils/mycurses'
|
29
|
+
when :option
|
30
|
+
require_relative 'everyday-cli-utils/option'
|
31
|
+
else
|
32
|
+
raise "#{name.to_s} not found!"
|
33
|
+
end
|
34
|
+
}
|
35
|
+
end
|
5
36
|
end
|