everyday-cli-utils 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/everyday-cli-utils/curses_utils.rb +34 -32
- data/lib/everyday-cli-utils/format.rb +1 -107
- data/lib/everyday-cli-utils/histogram.rb +2 -38
- data/lib/everyday-cli-utils/kmeans.rb +5 -140
- data/lib/everyday-cli-utils/maputil.rb +13 -14
- data/lib/everyday-cli-utils/safe/format.rb +107 -0
- data/lib/everyday-cli-utils/safe/histogram.rb +39 -0
- data/lib/everyday-cli-utils/safe/kmeans.rb +152 -0
- data/lib/everyday-cli-utils/safe/maputil.rb +50 -0
- data/lib/everyday-cli-utils/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec3b8b4933935c4c8f32696a300d9e03b10aeb0a
|
4
|
+
data.tar.gz: 6af1473a8f38fabec5097d9666a6972cd728834f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a56b940fd967714110f57b70e47b1178ce069685cf03fba4d55cc9859e23d0f284fab303c5d0eb6d6f22926e2ede774f04fc317d21a45277743b8dfa40628290
|
7
|
+
data.tar.gz: b52ad9d742d1cc507cde63bbb7ae6bd270a268f999ccdeaa022d7d5505fe0a8a052580d3e7a87502039f97c525857d48ed0c73a90e9baf58045c8156fb76c4a2
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# EverydayCliUtils
|
2
2
|
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/everyday-cli-utils.png)](http://badge.fury.io/rb/everyday-cli-utils)
|
3
4
|
[![Build Status](https://travis-ci.org/henderea/everyday-cli-utils.png?branch=master)](https://travis-ci.org/henderea/everyday-cli-utils)
|
5
|
+
[![Dependency Status](https://gemnasium.com/henderea/everyday-cli-utils.png)](https://gemnasium.com/henderea/everyday-cli-utils)
|
4
6
|
[![Code Climate](https://codeclimate.com/github/henderea/everyday-cli-utils.png)](https://codeclimate.com/github/henderea/everyday-cli-utils)
|
5
7
|
[![Coverage Status](https://coveralls.io/repos/henderea/everyday-cli-utils/badge.png?branch=master)](https://coveralls.io/r/henderea/everyday-cli-utils?branch=master)
|
6
8
|
|
@@ -1,38 +1,40 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
1
|
+
module EverydayCliUtils
|
2
|
+
module CursesUtils
|
3
|
+
COLOR_TO_CURSES = {
|
4
|
+
:black => Curses::COLOR_BLACK,
|
5
|
+
:red => Curses::COLOR_RED,
|
6
|
+
:green => Curses::COLOR_GREEN,
|
7
|
+
:yellow => Curses::COLOR_YELLOW,
|
8
|
+
:blue => Curses::COLOR_BLUE,
|
9
|
+
:purple => Curses::COLOR_MAGENTA,
|
10
|
+
:cyan => Curses::COLOR_CYAN,
|
11
|
+
:white => Curses::COLOR_WHITE,
|
12
|
+
:none => -1,
|
13
|
+
}
|
13
14
|
|
14
|
-
|
15
|
-
|
16
|
-
|
15
|
+
def find_color(bgcolor, fgcolor)
|
16
|
+
@colors.find_index { |v| v[0] == (fgcolor || :none) && v[1] == (bgcolor || :none) }
|
17
|
+
end
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
def add_color(bgcolor, fgcolor)
|
20
|
+
Curses::init_pair(@colors.count + 1, COLOR_TO_CURSES[fgcolor || :none], COLOR_TO_CURSES[bgcolor || :none])
|
21
|
+
ind = @colors.count + 1
|
22
|
+
@colors << [fgcolor || :none, bgcolor || :none]
|
23
|
+
ind
|
24
|
+
end
|
24
25
|
|
25
26
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
27
|
+
private
|
28
|
+
def handle_color(fgcolor, bgcolor)
|
29
|
+
return 0 if (fgcolor.nil? || fgcolor == :none) && (bgcolor.nil? || bgcolor == :none)
|
30
|
+
ind = find_color(bgcolor, fgcolor)
|
31
|
+
ind = ind.nil? ? add_color(bgcolor, fgcolor) : ind + 1
|
32
|
+
Curses::color_pair(ind)
|
33
|
+
end
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
35
|
+
def get_format(str)
|
36
|
+
bold, underline, fgcolor, bgcolor = Format::parse_format(str)
|
37
|
+
(bold ? Curses::A_BOLD : 0) | (underline ? Curses::A_UNDERLINE : 0) | handle_color(fgcolor, bgcolor)
|
38
|
+
end
|
37
39
|
end
|
38
|
-
end
|
40
|
+
end
|
@@ -1,110 +1,4 @@
|
|
1
|
-
|
2
|
-
module Format
|
3
|
-
def self.build_format_hash(first_chr)
|
4
|
-
{
|
5
|
-
:black => "#{first_chr}0",
|
6
|
-
:red => "#{first_chr}1",
|
7
|
-
:green => "#{first_chr}2",
|
8
|
-
:yellow => "#{first_chr}3",
|
9
|
-
:blue => "#{first_chr}4",
|
10
|
-
:purple => "#{first_chr}5",
|
11
|
-
:cyan => "#{first_chr}6",
|
12
|
-
:white => "#{first_chr}7",
|
13
|
-
:none => nil,
|
14
|
-
}
|
15
|
-
end
|
16
|
-
|
17
|
-
FORMAT_TO_CODE = {
|
18
|
-
:bold => '1',
|
19
|
-
:underline => '4',
|
20
|
-
}
|
21
|
-
FG_COLOR_TO_CODE = build_format_hash('3')
|
22
|
-
BG_COLOR_TO_CODE = build_format_hash('4')
|
23
|
-
|
24
|
-
def self::format(text, format_code)
|
25
|
-
(format_code.nil? || format_code == '') ? text : "\e[#{format_code}m#{text}\e[0m"
|
26
|
-
end
|
27
|
-
|
28
|
-
def self::build_string(bold, underline, fgcolor, bgcolor)
|
29
|
-
str = ''
|
30
|
-
hit = false
|
31
|
-
hit, str = handle_bold(bold, hit, str)
|
32
|
-
hit, str = handle_underline(hit, str, underline)
|
33
|
-
hit, str = handle_fg_color(fgcolor, hit, str)
|
34
|
-
handle_bg_color(bgcolor, hit, str)
|
35
|
-
end
|
36
|
-
|
37
|
-
def self.handle_bold(bold, hit, str)
|
38
|
-
if bold
|
39
|
-
hit = true
|
40
|
-
str = FORMAT_TO_CODE[:bold]
|
41
|
-
end
|
42
|
-
return hit, str
|
43
|
-
end
|
44
|
-
|
45
|
-
def self.handle_underline(hit, str, underline)
|
46
|
-
if underline
|
47
|
-
str += ';' if hit
|
48
|
-
hit = true
|
49
|
-
str += FORMAT_TO_CODE[:underline]
|
50
|
-
end
|
51
|
-
return hit, str
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.handle_fg_color(fgcolor, hit, str)
|
55
|
-
unless fgcolor.nil? || FG_COLOR_TO_CODE[fgcolor].nil?
|
56
|
-
str += ';' if hit
|
57
|
-
hit = true
|
58
|
-
str += FG_COLOR_TO_CODE[fgcolor]
|
59
|
-
end
|
60
|
-
return hit, str
|
61
|
-
end
|
62
|
-
|
63
|
-
def self.handle_bg_color(bgcolor, hit, str)
|
64
|
-
unless bgcolor.nil? || BG_COLOR_TO_CODE[bgcolor].nil?
|
65
|
-
str += ';' if hit
|
66
|
-
str += BG_COLOR_TO_CODE[bgcolor]
|
67
|
-
end
|
68
|
-
str
|
69
|
-
end
|
70
|
-
|
71
|
-
def self::parse_format(str)
|
72
|
-
parts = str.split(';')
|
73
|
-
bold = false
|
74
|
-
underline = false
|
75
|
-
fgcolor = :none
|
76
|
-
bgcolor = :none
|
77
|
-
parts.each { |v|
|
78
|
-
if v == FORMAT_TO_CODE[:bold]
|
79
|
-
bold = true
|
80
|
-
elsif v == FORMAT_TO_CODE[:underline]
|
81
|
-
underline = true
|
82
|
-
elsif v[0] == '3'
|
83
|
-
fgcolor = FG_COLOR_TO_CODE.invert[v]
|
84
|
-
elsif v[0] == '4'
|
85
|
-
bgcolor = BG_COLOR_TO_CODE.invert[v]
|
86
|
-
end
|
87
|
-
}
|
88
|
-
return bold, underline, fgcolor, bgcolor
|
89
|
-
end
|
90
|
-
|
91
|
-
def self::colorize(text, fgcolor = nil, bgcolor = nil)
|
92
|
-
self::format(text, self::build_string(false, false, fgcolor, bgcolor))
|
93
|
-
end
|
94
|
-
|
95
|
-
def self::bold(text, fgcolor = nil, bgcolor = nil)
|
96
|
-
self::format(text, self::build_string(true, false, fgcolor, bgcolor))
|
97
|
-
end
|
98
|
-
|
99
|
-
def self::underline(text, fgcolor = nil, bgcolor = nil)
|
100
|
-
self::format(text, self::build_string(false, true, fgcolor, bgcolor))
|
101
|
-
end
|
102
|
-
|
103
|
-
def self::boldunderline(text, fgcolor = nil, bgcolor = nil)
|
104
|
-
self::format(text, self::build_string(true, true, fgcolor, bgcolor))
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|
1
|
+
require_relative 'safe/format'
|
108
2
|
|
109
3
|
class String
|
110
4
|
alias :old_method_missing :method_missing
|
@@ -1,43 +1,7 @@
|
|
1
|
-
require_relative '
|
2
|
-
|
3
|
-
module EverydayCliUtils
|
4
|
-
class Histogram
|
5
|
-
def self.setup(collection, height, width)
|
6
|
-
mi = collection.min
|
7
|
-
ma = collection.max
|
8
|
-
diff = ma - mi
|
9
|
-
step = diff.to_f / (width.to_f - 1)
|
10
|
-
counts = Array.new(width, 0)
|
11
|
-
collection.each { |v| counts[((v - mi).to_f / step.to_f).floor] += 1 }
|
12
|
-
max_y = counts.max
|
13
|
-
lines = Array.new(height) { ' ' * width }
|
14
|
-
return counts, lines, max_y, mi, step
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.add_graph(counts, height, lines, max_y, width)
|
18
|
-
(0...width).each { |i|
|
19
|
-
h = ((counts[i].to_f / max_y.to_f) * height.to_f).round
|
20
|
-
((height - h)...height).each { |j|
|
21
|
-
lines[j][i] = '#'
|
22
|
-
}
|
23
|
-
if h == 0 && counts[i] > 0
|
24
|
-
lines[height - 1][i] = '_'
|
25
|
-
end
|
26
|
-
}
|
27
|
-
end
|
28
|
-
|
29
|
-
def self.add_averages(height, ks, lines, mi, step, width)
|
30
|
-
lines[height] = ' ' * width
|
31
|
-
ks.each { |v| lines[height][((v - mi) / step).to_i] = '|' }
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
1
|
+
require_relative 'safe/histogram'
|
35
2
|
|
36
3
|
module Enumerable
|
37
4
|
def histogram(ks = nil, width = 100, height = 50)
|
38
|
-
|
39
|
-
EverydayCliUtils::Histogram.add_graph(counts, height, lines, max_y, width)
|
40
|
-
EverydayCliUtils::Histogram.add_averages(height, ks, lines, mi, step, width) unless ks.nil?
|
41
|
-
lines
|
5
|
+
EverydayCliUtils::Histogram.histogram(self, ks, width, height)
|
42
6
|
end
|
43
7
|
end
|
@@ -1,154 +1,19 @@
|
|
1
|
-
require_relative '
|
2
|
-
|
3
|
-
module EverydayCliUtils
|
4
|
-
class Kmeans
|
5
|
-
def self.normal(x, avg, std)
|
6
|
-
exp = -(((x - avg) / std) ** 2.0) / 2.0
|
7
|
-
((Math.exp(exp) / (std * Math.sqrt(2.0 * Math::PI))))
|
8
|
-
end
|
9
|
-
|
10
|
-
def self.f_test(clusters, means, cnt, avg)
|
11
|
-
cnt2 = clusters.count { |i| !i.empty? }
|
12
|
-
ev = f_test_ev(avg, clusters, cnt2, means)
|
13
|
-
uv = f_test_uv(clusters, cnt, cnt2, means)
|
14
|
-
(ev / uv)
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.f_test_ev(avg, clusters, cnt2, means)
|
18
|
-
ev = 0.0
|
19
|
-
(0...means.count).each { |i| ev += clusters[i].empty? ? 0.0 : clusters[i].count * ((means[i] - avg) ** 2.0) }
|
20
|
-
ev / (cnt2 - 1.0)
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.f_test_uv(clusters, cnt, cnt2, means)
|
24
|
-
uv = 0.0
|
25
|
-
(0...means.count).each { |i|
|
26
|
-
unless clusters[i].empty?
|
27
|
-
(0...clusters[i].count).each { |j|
|
28
|
-
uv += (clusters[i][j] - means[i]) * (clusters[i][j] - means[i])
|
29
|
-
}
|
30
|
-
end
|
31
|
-
}
|
32
|
-
uv / (cnt - cnt2)
|
33
|
-
end
|
34
|
-
|
35
|
-
def self.f_test2(clusters, means, cnt)
|
36
|
-
uv = 0.0
|
37
|
-
cnt2 = clusters.count { |i| !i.empty? }
|
38
|
-
(0...means.count).each { |i| uv += f_test2_calc(clusters, i, means, uv) unless clusters[i].empty? }
|
39
|
-
(uv / (cnt - cnt2))
|
40
|
-
end
|
41
|
-
|
42
|
-
def self.f_test2_calc(clusters, i, means, uv)
|
43
|
-
tmp = 0.0
|
44
|
-
(0...clusters[i].count).each { |j|
|
45
|
-
tmp += (clusters[i][j] - means[i]) ** 2.0
|
46
|
-
}
|
47
|
-
tmp /= clusters[i].count
|
48
|
-
Math.sqrt(tmp)
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.nmeans_setup_1(collection)
|
52
|
-
su = collection.sum
|
53
|
-
cnt = collection.count
|
54
|
-
avg = su / cnt
|
55
|
-
ks1 = collection.kmeans(1)
|
56
|
-
return avg, cnt, ks1
|
57
|
-
end
|
58
|
-
|
59
|
-
def self.nmeans_setup_2(collection, avg, cnt, ks1)
|
60
|
-
cso = collection.get_clusters(ks1)
|
61
|
-
ft1 = f_test2(cso, ks1, cnt)
|
62
|
-
ks = collection.kmeans(2)
|
63
|
-
cs = collection.get_clusters(ks)
|
64
|
-
ft = f_test(cs, ks, cnt, avg)
|
65
|
-
ft2 = f_test2(cs, ks, cnt)
|
66
|
-
return ft, ft1, ft2, ks
|
67
|
-
end
|
68
|
-
|
69
|
-
def self.run_nmean(collection, avg, cnt, ft, ft2, k, ks)
|
70
|
-
kso = ks
|
71
|
-
fto = ft
|
72
|
-
fto2 = ft2
|
73
|
-
ks = collection.kmeans(k)
|
74
|
-
cs = collection.get_clusters(ks)
|
75
|
-
ft = f_test(cs, ks, cnt, avg)
|
76
|
-
ft2 = f_test2(cs, ks, cnt)
|
77
|
-
return ft, ft2, fto, fto2, ks, kso
|
78
|
-
end
|
79
|
-
|
80
|
-
def self.run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold)
|
81
|
-
(3..[max_k, cnt].min).each { |k|
|
82
|
-
ft, ft2, fto, fto2, ks, kso = run_nmean(collection, avg, cnt, ft, ft2, k, ks)
|
83
|
-
return kso if ((ft - fto) / fto) < threshold && fto2 < ft1
|
84
|
-
}
|
85
|
-
ft2 >= ft1 ? ks1 : ks
|
86
|
-
end
|
87
|
-
|
88
|
-
def self.run_kmean(collection, ks)
|
89
|
-
kso = ks
|
90
|
-
clusters = collection.get_clusters(kso)
|
91
|
-
ks = []
|
92
|
-
clusters.each_with_index { |val, key| ks[key] = (val.count <= 0) ? kso[key] : (val.sum / val.count) }
|
93
|
-
ks.sort
|
94
|
-
return kso, ks
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
1
|
+
require_relative 'safe/kmeans'
|
98
2
|
|
99
3
|
module Enumerable
|
100
4
|
def outliers(sensitivity = 0.5, k = nil)
|
101
|
-
|
102
|
-
cs = get_clusters(ks)
|
103
|
-
|
104
|
-
outliers = []
|
105
|
-
|
106
|
-
ks.each_with_index { |avg, i| outliers += find_outliers(avg, cs, i, sensitivity) }
|
107
|
-
outliers
|
108
|
-
end
|
109
|
-
|
110
|
-
def find_outliers(avg, cs, i, sensitivity)
|
111
|
-
csi = cs[i]
|
112
|
-
std = csi.std_dev
|
113
|
-
cnt = csi.count
|
114
|
-
csi.select { |c| (EverydayCliUtils::Kmeans.normal(c, avg, std) * cnt) < sensitivity }
|
5
|
+
EverydayCliUtils::Kmeans.outliers(self, sensitivity, k)
|
115
6
|
end
|
116
7
|
|
117
8
|
def nmeans(max_k = 10, threshold = 0.05)
|
118
|
-
|
119
|
-
avg, cnt, ks1 = EverydayCliUtils::Kmeans.nmeans_setup_1(collection)
|
120
|
-
return ks1 if cnt == 1
|
121
|
-
ft, ft1, ft2, ks = EverydayCliUtils::Kmeans.nmeans_setup_2(collection, avg, cnt, ks1)
|
122
|
-
EverydayCliUtils::Kmeans.run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold)
|
9
|
+
EverydayCliUtils::Kmeans.nmeans(self, max_k, threshold)
|
123
10
|
end
|
124
11
|
|
125
12
|
def kmeans(k)
|
126
|
-
|
127
|
-
ma = max
|
128
|
-
diff = ma - mi
|
129
|
-
ks = []
|
130
|
-
(1..k).each { |i| ks[i - 1] = mi + (i * (diff / (k + 1.0))) }
|
131
|
-
kso = false
|
132
|
-
while ks != kso
|
133
|
-
kso, ks = EverydayCliUtils::Kmeans.run_kmean(self, ks)
|
134
|
-
end
|
135
|
-
ks
|
13
|
+
EverydayCliUtils::Kmeans.kmeans(self, k)
|
136
14
|
end
|
137
15
|
|
138
16
|
def get_clusters(means)
|
139
|
-
|
140
|
-
each { |item|
|
141
|
-
cluster = false
|
142
|
-
distance = false
|
143
|
-
(0...means.count).each { |i|
|
144
|
-
diff = (means[i] - item).abs
|
145
|
-
if distance == false || diff < distance
|
146
|
-
cluster = i
|
147
|
-
distance = diff
|
148
|
-
end
|
149
|
-
}
|
150
|
-
clusters[cluster][clusters[cluster].count] = item
|
151
|
-
}
|
152
|
-
clusters
|
17
|
+
EverydayCliUtils::Kmeans.get_clusters(self, means)
|
153
18
|
end
|
154
19
|
end
|
@@ -1,48 +1,47 @@
|
|
1
|
+
require_relative 'safe/maputil'
|
2
|
+
|
1
3
|
module Enumerable
|
2
4
|
def removefalse
|
3
|
-
|
5
|
+
EverydayCliUtils::MapUtil.removefalse(self)
|
4
6
|
end
|
5
7
|
|
6
8
|
def filtermap(&block)
|
7
|
-
|
9
|
+
EverydayCliUtils::MapUtil.filtermap(self, &block)
|
8
10
|
end
|
9
11
|
|
10
12
|
def sum
|
11
|
-
|
13
|
+
EverydayCliUtils::MapUtil.sum(self)
|
12
14
|
end
|
13
15
|
|
14
16
|
def prod
|
15
|
-
|
17
|
+
EverydayCliUtils::MapUtil.prod(self)
|
16
18
|
end
|
17
19
|
|
18
20
|
def average
|
19
|
-
|
21
|
+
EverydayCliUtils::MapUtil.average(self)
|
20
22
|
end
|
21
23
|
|
22
24
|
def std_dev
|
23
|
-
|
24
|
-
cnt = count.to_f
|
25
|
-
su = summap { |v| (v.to_f - avg.to_f) ** 2 }
|
26
|
-
Math.sqrt(su / cnt)
|
25
|
+
EverydayCliUtils::MapUtil.std_dev(self)
|
27
26
|
end
|
28
27
|
|
29
28
|
def floats
|
30
|
-
|
29
|
+
EverydayCliUtils::MapUtil.floats(self)
|
31
30
|
end
|
32
31
|
|
33
32
|
def summap(&block)
|
34
|
-
|
33
|
+
EverydayCliUtils::MapUtil.summap(self, &block)
|
35
34
|
end
|
36
35
|
|
37
36
|
def productmap(&block)
|
38
|
-
|
37
|
+
EverydayCliUtils::MapUtil.productmap(self, &block)
|
39
38
|
end
|
40
39
|
|
41
40
|
def chompall
|
42
|
-
|
41
|
+
EverydayCliUtils::MapUtil.chompall(self)
|
43
42
|
end
|
44
43
|
|
45
44
|
def join(join_str)
|
46
|
-
|
45
|
+
EverydayCliUtils::MapUtil.join(self, join_str)
|
47
46
|
end
|
48
47
|
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module EverydayCliUtils
|
2
|
+
module Format
|
3
|
+
def self.build_format_hash(first_chr)
|
4
|
+
{
|
5
|
+
:black => "#{first_chr}0",
|
6
|
+
:red => "#{first_chr}1",
|
7
|
+
:green => "#{first_chr}2",
|
8
|
+
:yellow => "#{first_chr}3",
|
9
|
+
:blue => "#{first_chr}4",
|
10
|
+
:purple => "#{first_chr}5",
|
11
|
+
:cyan => "#{first_chr}6",
|
12
|
+
:white => "#{first_chr}7",
|
13
|
+
:none => nil,
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
FORMAT_TO_CODE = {
|
18
|
+
:bold => '1',
|
19
|
+
:underline => '4',
|
20
|
+
}
|
21
|
+
FG_COLOR_TO_CODE = build_format_hash('3')
|
22
|
+
BG_COLOR_TO_CODE = build_format_hash('4')
|
23
|
+
|
24
|
+
def self::format(text, format_code)
|
25
|
+
(format_code.nil? || format_code == '') ? text : "\e[#{format_code}m#{text}\e[0m"
|
26
|
+
end
|
27
|
+
|
28
|
+
def self::build_string(bold, underline, fgcolor, bgcolor)
|
29
|
+
str = ''
|
30
|
+
hit = false
|
31
|
+
hit, str = handle_bold(bold, hit, str)
|
32
|
+
hit, str = handle_underline(hit, str, underline)
|
33
|
+
hit, str = handle_fg_color(fgcolor, hit, str)
|
34
|
+
handle_bg_color(bgcolor, hit, str)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.handle_bold(bold, hit, str)
|
38
|
+
if bold
|
39
|
+
hit = true
|
40
|
+
str = FORMAT_TO_CODE[:bold]
|
41
|
+
end
|
42
|
+
return hit, str
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.handle_underline(hit, str, underline)
|
46
|
+
if underline
|
47
|
+
str += ';' if hit
|
48
|
+
hit = true
|
49
|
+
str += FORMAT_TO_CODE[:underline]
|
50
|
+
end
|
51
|
+
return hit, str
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.handle_fg_color(fgcolor, hit, str)
|
55
|
+
unless fgcolor.nil? || FG_COLOR_TO_CODE[fgcolor].nil?
|
56
|
+
str += ';' if hit
|
57
|
+
hit = true
|
58
|
+
str += FG_COLOR_TO_CODE[fgcolor]
|
59
|
+
end
|
60
|
+
return hit, str
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.handle_bg_color(bgcolor, hit, str)
|
64
|
+
unless bgcolor.nil? || BG_COLOR_TO_CODE[bgcolor].nil?
|
65
|
+
str += ';' if hit
|
66
|
+
str += BG_COLOR_TO_CODE[bgcolor]
|
67
|
+
end
|
68
|
+
str
|
69
|
+
end
|
70
|
+
|
71
|
+
def self::parse_format(str)
|
72
|
+
parts = str.split(';')
|
73
|
+
bold = false
|
74
|
+
underline = false
|
75
|
+
fgcolor = :none
|
76
|
+
bgcolor = :none
|
77
|
+
parts.each { |v|
|
78
|
+
if v == FORMAT_TO_CODE[:bold]
|
79
|
+
bold = true
|
80
|
+
elsif v == FORMAT_TO_CODE[:underline]
|
81
|
+
underline = true
|
82
|
+
elsif v[0] == '3'
|
83
|
+
fgcolor = FG_COLOR_TO_CODE.invert[v]
|
84
|
+
elsif v[0] == '4'
|
85
|
+
bgcolor = BG_COLOR_TO_CODE.invert[v]
|
86
|
+
end
|
87
|
+
}
|
88
|
+
return bold, underline, fgcolor, bgcolor
|
89
|
+
end
|
90
|
+
|
91
|
+
def self::colorize(text, fgcolor = nil, bgcolor = nil)
|
92
|
+
self::format(text, self::build_string(false, false, fgcolor, bgcolor))
|
93
|
+
end
|
94
|
+
|
95
|
+
def self::bold(text, fgcolor = nil, bgcolor = nil)
|
96
|
+
self::format(text, self::build_string(true, false, fgcolor, bgcolor))
|
97
|
+
end
|
98
|
+
|
99
|
+
def self::underline(text, fgcolor = nil, bgcolor = nil)
|
100
|
+
self::format(text, self::build_string(false, true, fgcolor, bgcolor))
|
101
|
+
end
|
102
|
+
|
103
|
+
def self::boldunderline(text, fgcolor = nil, bgcolor = nil)
|
104
|
+
self::format(text, self::build_string(true, true, fgcolor, bgcolor))
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module EverydayCliUtils
|
2
|
+
class Histogram
|
3
|
+
def self.setup(collection, height, width)
|
4
|
+
mi = collection.min
|
5
|
+
ma = collection.max
|
6
|
+
diff = ma - mi
|
7
|
+
step = diff.to_f / (width.to_f - 1)
|
8
|
+
counts = Array.new(width, 0)
|
9
|
+
collection.each { |v| counts[((v - mi).to_f / step.to_f).floor] += 1 }
|
10
|
+
max_y = counts.max
|
11
|
+
lines = Array.new(height) { ' ' * width }
|
12
|
+
return counts, lines, max_y, mi, step
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.add_graph(counts, height, lines, max_y, width)
|
16
|
+
(0...width).each { |i|
|
17
|
+
h = ((counts[i].to_f / max_y.to_f) * height.to_f).round
|
18
|
+
((height - h)...height).each { |j|
|
19
|
+
lines[j][i] = '#'
|
20
|
+
}
|
21
|
+
if h == 0 && counts[i] > 0
|
22
|
+
lines[height - 1][i] = '_'
|
23
|
+
end
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.add_averages(height, ks, lines, mi, step, width)
|
28
|
+
lines[height] = ' ' * width
|
29
|
+
ks.each { |v| lines[height][((v - mi) / step).to_i] = '|' }
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.histogram(collection, ks = nil, width = 100, height = 50)
|
33
|
+
counts, lines, max_y, mi, step = setup(collection, height, width)
|
34
|
+
add_graph(counts, height, lines, max_y, width)
|
35
|
+
add_averages(height, ks, lines, mi, step, width) unless ks.nil?
|
36
|
+
lines
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require_relative 'maputil'
|
2
|
+
|
3
|
+
module EverydayCliUtils
|
4
|
+
module Kmeans
|
5
|
+
def self.normal(x, avg, std)
|
6
|
+
exp = -(((x - avg) / std) ** 2.0) / 2.0
|
7
|
+
((Math.exp(exp) / (std * Math.sqrt(2.0 * Math::PI))))
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.f_test(clusters, means, cnt, avg)
|
11
|
+
cnt2 = clusters.count { |i| !i.empty? }
|
12
|
+
ev = f_test_ev(avg, clusters, cnt2, means)
|
13
|
+
uv = f_test_uv(clusters, cnt, cnt2, means)
|
14
|
+
(ev / uv)
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.f_test_ev(avg, clusters, cnt2, means)
|
18
|
+
ev = 0.0
|
19
|
+
(0...means.count).each { |i| ev += clusters[i].empty? ? 0.0 : clusters[i].count * ((means[i] - avg) ** 2.0) }
|
20
|
+
ev / (cnt2 - 1.0)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.f_test_uv(clusters, cnt, cnt2, means)
|
24
|
+
uv = 0.0
|
25
|
+
(0...means.count).each { |i|
|
26
|
+
unless clusters[i].empty?
|
27
|
+
(0...clusters[i].count).each { |j|
|
28
|
+
uv += (clusters[i][j] - means[i]) * (clusters[i][j] - means[i])
|
29
|
+
}
|
30
|
+
end
|
31
|
+
}
|
32
|
+
uv / (cnt - cnt2)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.f_test2(clusters, means, cnt)
|
36
|
+
uv = 0.0
|
37
|
+
cnt2 = clusters.count { |i| !i.empty? }
|
38
|
+
(0...means.count).each { |i| uv += f_test2_calc(clusters, i, means, uv) unless clusters[i].empty? }
|
39
|
+
(uv / (cnt - cnt2))
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.f_test2_calc(clusters, i, means, uv)
|
43
|
+
tmp = 0.0
|
44
|
+
(0...clusters[i].count).each { |j|
|
45
|
+
tmp += (clusters[i][j] - means[i]) ** 2.0
|
46
|
+
}
|
47
|
+
tmp /= clusters[i].count
|
48
|
+
Math.sqrt(tmp)
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.nmeans_setup_1(collection)
|
52
|
+
su = EverydayCliUtils::MapUtil.sum(collection)
|
53
|
+
cnt = collection.count
|
54
|
+
avg = su / cnt
|
55
|
+
ks1 = kmeans(collection, 1)
|
56
|
+
return avg, cnt, ks1
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.nmeans_setup_2(collection, avg, cnt, ks1)
|
60
|
+
cso = get_clusters(collection, ks1)
|
61
|
+
ft1 = f_test2(cso, ks1, cnt)
|
62
|
+
ks = kmeans(collection, 2)
|
63
|
+
cs = get_clusters(collection, ks)
|
64
|
+
ft = f_test(cs, ks, cnt, avg)
|
65
|
+
ft2 = f_test2(cs, ks, cnt)
|
66
|
+
return ft, ft1, ft2, ks
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.run_nmean(collection, avg, cnt, ft, ft2, k, ks)
|
70
|
+
kso = ks
|
71
|
+
fto = ft
|
72
|
+
fto2 = ft2
|
73
|
+
ks = kmeans(collection, k)
|
74
|
+
cs = get_clusters(collection, ks)
|
75
|
+
ft = f_test(cs, ks, cnt, avg)
|
76
|
+
ft2 = f_test2(cs, ks, cnt)
|
77
|
+
return ft, ft2, fto, fto2, ks, kso
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold)
|
81
|
+
(3..[max_k, cnt].min).each { |k|
|
82
|
+
ft, ft2, fto, fto2, ks, kso = run_nmean(collection, avg, cnt, ft, ft2, k, ks)
|
83
|
+
return kso if ((ft - fto) / fto) < threshold && fto2 < ft1
|
84
|
+
}
|
85
|
+
ft2 >= ft1 ? ks1 : ks
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.run_kmean(collection, ks)
|
89
|
+
kso = ks
|
90
|
+
clusters = get_clusters(collection, kso)
|
91
|
+
ks = []
|
92
|
+
clusters.each_with_index { |val, key| ks[key] = (val.count <= 0) ? kso[key] : (val.sum / val.count) }
|
93
|
+
ks.sort
|
94
|
+
return kso, ks
|
95
|
+
end
|
96
|
+
|
97
|
+
def self.get_clusters(collection, means)
|
98
|
+
clusters = Array.new(means.count) { Array.new }
|
99
|
+
collection.each { |item|
|
100
|
+
cluster = false
|
101
|
+
distance = false
|
102
|
+
(0...means.count).each { |i|
|
103
|
+
diff = (means[i] - item).abs
|
104
|
+
if distance == false || diff < distance
|
105
|
+
cluster = i
|
106
|
+
distance = diff
|
107
|
+
end
|
108
|
+
}
|
109
|
+
clusters[cluster] << item
|
110
|
+
}
|
111
|
+
clusters
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.kmeans(collection, k)
|
115
|
+
mi = collection.min
|
116
|
+
ma = collection.max
|
117
|
+
diff = ma - mi
|
118
|
+
ks = []
|
119
|
+
(1..k).each { |i| ks[i - 1] = mi + (i * (diff / (k + 1.0))) }
|
120
|
+
kso = false
|
121
|
+
while ks != kso
|
122
|
+
kso, ks = run_kmean(collection, ks)
|
123
|
+
end
|
124
|
+
ks
|
125
|
+
end
|
126
|
+
|
127
|
+
def self.nmeans(collection, max_k = 10, threshold = 0.05)
|
128
|
+
collection = EverydayCliUtils::MapUtil.floats(collection)
|
129
|
+
avg, cnt, ks1 = nmeans_setup_1(collection)
|
130
|
+
return ks1 if cnt == 1
|
131
|
+
ft, ft1, ft2, ks = nmeans_setup_2(collection, avg, cnt, ks1)
|
132
|
+
run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold)
|
133
|
+
end
|
134
|
+
|
135
|
+
def self.find_outliers(avg, cs, i, sensitivity)
|
136
|
+
csi = cs[i]
|
137
|
+
std = EverydayCliUtils::MapUtil.std_dev(csi)
|
138
|
+
cnt = csi.count
|
139
|
+
csi.select { |c| (normal(c, avg, std) * cnt) < sensitivity }
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.outliers(collection, sensitivity = 0.5, k = nil)
|
143
|
+
ks = k.nil? ? nmeans(collection) : kmeans(collection, k)
|
144
|
+
cs = get_clusters(collection, ks)
|
145
|
+
|
146
|
+
outliers = []
|
147
|
+
|
148
|
+
ks.each_with_index { |avg, i| outliers += find_outliers(avg, cs, i, sensitivity) }
|
149
|
+
outliers
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module EverydayCliUtils
|
2
|
+
module MapUtil
|
3
|
+
def self.removefalse(collection)
|
4
|
+
collection.select { |i| i }
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.filtermap(collection, &block)
|
8
|
+
removefalse(collection.map(&block))
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.sum(collection)
|
12
|
+
collection.reduce(:+)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.prod(collection)
|
16
|
+
collection.reduce(:*)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.average(collection)
|
20
|
+
sum(collection).to_f / collection.count.to_f
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.std_dev(collection)
|
24
|
+
avg = average(collection)
|
25
|
+
cnt = collection.count.to_f
|
26
|
+
su = summap(collection) { |v| (v.to_f - avg.to_f) ** 2 }
|
27
|
+
Math.sqrt(su / cnt)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.floats(collection)
|
31
|
+
collection.map(&:to_f)
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.summap(collection, &block)
|
35
|
+
sum(collection.map(&block))
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.productmap(collection, &block)
|
39
|
+
prod(collection.map(&block))
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.chompall(collection)
|
43
|
+
collection.map(&:chomp)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.join(collection, join_str)
|
47
|
+
collection.map(&:to_s).reduce { |a, b| a << join_str << b }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: everyday-cli-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Henderson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -120,6 +120,10 @@ files:
|
|
120
120
|
- lib/everyday-cli-utils/maputil.rb
|
121
121
|
- lib/everyday-cli-utils/mycurses.rb
|
122
122
|
- lib/everyday-cli-utils/option.rb
|
123
|
+
- lib/everyday-cli-utils/safe/format.rb
|
124
|
+
- lib/everyday-cli-utils/safe/histogram.rb
|
125
|
+
- lib/everyday-cli-utils/safe/kmeans.rb
|
126
|
+
- lib/everyday-cli-utils/safe/maputil.rb
|
123
127
|
- lib/everyday-cli-utils/version.rb
|
124
128
|
- spec/everyday-cli-utils/format_spec.rb
|
125
129
|
- spec/everyday-cli-utils/kmeans_spec.rb
|