shades 0.11 → 0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/README.md +7 -1
- data/bin/histo +73 -16
- data/lib/formatter.rb +8 -2
- data/lib/histo.rb +18 -7
- data/lib/model.rb +14 -8
- data/lib/queryparser.rb +1 -1
- data/lib/streamparser.rb +6 -2
- data/shades.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MmI0ZDFmZWZhYTIxN2FkN2E0MWFmNjk1NDFiYWI3MDgyZmRmZjg2Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OWRjYjQzZmI5ZGQ4NWYzYTQ1NDk1NDkzYWI2MTZmYTc5NmViZTYwNg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZWRkOTNkOTcyNTExN2E1Mzc0NmViNmVhNWNhOWM1NTljNjdhN2QwMTVhOGY4
|
10
|
+
OTU2MTk4YWEzYTE5YWMzODU1N2NjODYzMTY1NTE0ZDE3M2E0NjdiOTA5MDRm
|
11
|
+
M2VjODM1MDc4NzEzNWMwY2ZhZjNhMjM1NTNlNDlhZDE1MmUyMjg=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ODQxZjBkMmNiYTk2ZTY4ZGEwMjUyNjU1NTM2ZGIxM2Q0MmI2NzAzZWY3ZjNl
|
14
|
+
ZTI4MjExNjdjZmEyODcyODczNzQ3NjA3YjFmZGYxZWFiMDBmZDY2YzgzNDQy
|
15
|
+
MTgxZGRlZWUyNjNkNDE1ZjZkOThkYmIwYTYwNzM5ZDBkZDkwZTA=
|
data/README.md
CHANGED
@@ -4,9 +4,15 @@ Get a new perspective on your data. In-memory [OLAP cubing](http://en.wikipedia.
|
|
4
4
|
|
5
5
|
![](https://dl.dropboxusercontent.com/u/1133314/i/shades.gif)
|
6
6
|
|
7
|
+
## Install
|
8
|
+
|
9
|
+
```
|
10
|
+
gem install shades
|
11
|
+
```
|
12
|
+
|
7
13
|
## As a command line utility for OLAP cubing
|
8
14
|
|
9
|
-
The ```shades``` utility will accept whitespace-delimited data, one event per line, preceeded by two commented lines describing the dimensions and
|
15
|
+
The ```shades``` utility will accept whitespace-delimited data, one event per line, preceeded by two commented lines describing the dimensions and measures within.
|
10
16
|
|
11
17
|
```
|
12
18
|
# dimensions: timestamp transactionid customer item
|
data/bin/histo
CHANGED
@@ -1,27 +1,84 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
#/ Usage: histo [options]
|
3
|
+
#/ -p <measure> : Parse shades formatted data and histogram a specific measure given as the last argument
|
4
|
+
#/ -n <num> : the max number of bins to include in the output (default = 10)
|
5
|
+
#/ -w <width> : the width of the histogram bars (default = 30)
|
6
|
+
#/ -l : output ascii art on a log scale (default is linear).
|
7
|
+
#/ : tip: if the value distribution looks linear using this option then you're dealing with a power law.
|
8
|
+
#/ -h : show this usage
|
2
9
|
$: << File.realpath(File.dirname(__FILE__) + "/../lib")
|
3
10
|
|
4
11
|
require 'shades'
|
12
|
+
require 'getoptlong'
|
13
|
+
require 'pathname'
|
5
14
|
|
6
|
-
def
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
end
|
15
|
+
def usage
|
16
|
+
program = Pathname.new(__FILE__).realpath
|
17
|
+
help = `grep ^\#\/ #{program} | cut -c4-`
|
18
|
+
puts help
|
19
|
+
exit 1
|
20
|
+
end
|
13
21
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
22
|
+
def main(mkey, max_bins, ascii_width, log_base)
|
23
|
+
|
24
|
+
histo = Shades::DynamicHistogram.new(max_bins)
|
25
|
+
|
26
|
+
if mkey.nil?
|
27
|
+
# just read a stream of numbers from stdin
|
28
|
+
$stdin.each_line do |line|
|
29
|
+
line.scan(/(?:\d+\.?\d*|\d*\.\d+)/).each do |s|
|
30
|
+
begin
|
31
|
+
n = Float(s)
|
32
|
+
histo.add(n)
|
33
|
+
rescue => err
|
34
|
+
puts "error parsing %s as float" % s
|
35
|
+
puts err.message
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
else
|
40
|
+
# set up the histogram data to accept streaming input
|
41
|
+
p = Shades::StreamParser.new do |e|
|
42
|
+
histo.add(e.measure(mkey))
|
43
|
+
end
|
18
44
|
|
19
|
-
|
45
|
+
# stream stdin lines to the parser
|
46
|
+
$stdin.each_line do |line|
|
47
|
+
p << line
|
48
|
+
end
|
49
|
+
end
|
50
|
+
$stdout.puts histo.ascii_art(ascii_width, log_base)
|
20
51
|
end
|
21
52
|
|
22
|
-
|
23
|
-
|
24
|
-
|
53
|
+
opts = GetoptLong.new
|
54
|
+
opts.set_options(
|
55
|
+
# parse shades formatted data and histogram a specific measure given as the last argument
|
56
|
+
["-p", "--parse" , GetoptLong::OPTIONAL_ARGUMENT],
|
57
|
+
["-n", "--max-bins" , GetoptLong::OPTIONAL_ARGUMENT],
|
58
|
+
["-w", "--ascii-width" , GetoptLong::OPTIONAL_ARGUMENT],
|
59
|
+
["-l", "--log-base" , GetoptLong::OPTIONAL_ARGUMENT],
|
60
|
+
["-h", "--help" , GetoptLong::NO_ARGUMENT],
|
61
|
+
)
|
62
|
+
|
63
|
+
measure = nil
|
64
|
+
max_bins = 10
|
65
|
+
ascii_width = 30
|
66
|
+
log_base = 0
|
67
|
+
|
68
|
+
opts.each do |opt, arg|
|
69
|
+
case opt
|
70
|
+
when '-h'
|
71
|
+
usage
|
72
|
+
when '-p'
|
73
|
+
measure = arg
|
74
|
+
when '-n'
|
75
|
+
max_bins = arg.to_i
|
76
|
+
when '-w'
|
77
|
+
ascii_width = arg.to_i
|
78
|
+
when '-l'
|
79
|
+
log_base = arg.to_i
|
80
|
+
end
|
81
|
+
end
|
25
82
|
|
26
|
-
main(measure,
|
83
|
+
main(measure, max_bins, ascii_width, log_base)
|
27
84
|
|
data/lib/formatter.rb
CHANGED
@@ -4,7 +4,10 @@ module Shades
|
|
4
4
|
@spacer = spacer
|
5
5
|
end
|
6
6
|
def text(out, events)
|
7
|
-
|
7
|
+
if events.empty?
|
8
|
+
return
|
9
|
+
end
|
10
|
+
metadata = events[0].metadata
|
8
11
|
lines = []
|
9
12
|
out.puts "# dimensions: %s" % (metadata.dimensions.join(@spacer))
|
10
13
|
out.puts "# measures: %s" % (metadata.measures.join(@spacer))
|
@@ -13,7 +16,10 @@ module Shades
|
|
13
16
|
end
|
14
17
|
end
|
15
18
|
def pretty_text(out, events)
|
16
|
-
|
19
|
+
if events.empty?
|
20
|
+
return
|
21
|
+
end
|
22
|
+
metadata = events[0].metadata
|
17
23
|
lines = []
|
18
24
|
(events.length+1).times {|i|lines[i] = []}
|
19
25
|
metadata.dimensions.each do |d|
|
data/lib/histo.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
module Shades
|
3
3
|
|
4
4
|
# streaming histograms:
|
5
|
-
#
|
5
|
+
# Ruby port of the Clojure library from BigML: https://github.com/bigmlcom/histogram
|
6
6
|
class DynamicHistogram
|
7
7
|
|
8
8
|
def initialize(max_size)
|
@@ -18,8 +18,14 @@ module Shades
|
|
18
18
|
@res.lines
|
19
19
|
end
|
20
20
|
|
21
|
-
def ascii_art
|
22
|
-
@res.histo_text
|
21
|
+
def ascii_art(output_width, log_base)
|
22
|
+
@res.histo_text(output_width) do |x|
|
23
|
+
if log_base != 0
|
24
|
+
Math::log(x, log_base)
|
25
|
+
else
|
26
|
+
x
|
27
|
+
end
|
28
|
+
end
|
23
29
|
end
|
24
30
|
end
|
25
31
|
|
@@ -90,18 +96,23 @@ module Shades
|
|
90
96
|
##
|
91
97
|
## So, the line above that reads "0.502 ( 27) ##############################"
|
92
98
|
## can be read as: "There are 27 values close to 0.502"
|
93
|
-
def histo_text
|
99
|
+
def histo_text(output_width)
|
94
100
|
a = []
|
95
101
|
max_bin_count = 1
|
96
|
-
width = 30
|
97
102
|
@bins.each do |b|
|
98
103
|
if b.count > max_bin_count
|
99
104
|
max_bin_count = b.count
|
100
105
|
end
|
101
106
|
end
|
107
|
+
scaled_max = yield max_bin_count
|
108
|
+
output_width -= 23
|
109
|
+
if output_width < 10
|
110
|
+
output_width = 10
|
111
|
+
end
|
102
112
|
@bins.each do |b|
|
103
|
-
|
104
|
-
|
113
|
+
scaled_value = yield b.count
|
114
|
+
repeat = output_width * ( scaled_value / scaled_max )
|
115
|
+
a << "%14.3f (%5d) %s" % [b.mean, b.count, '#' * repeat]
|
105
116
|
end
|
106
117
|
a.join("\n")
|
107
118
|
end
|
data/lib/model.rb
CHANGED
@@ -10,15 +10,21 @@ module Shades
|
|
10
10
|
# parse an event line that adheres to this metadat
|
11
11
|
def parse_event(line, sep)
|
12
12
|
values = line.split(sep)
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
begin
|
14
|
+
d = {}
|
15
|
+
@dimensions.zip(values.take(@dimensions.length)).each do |k, v|
|
16
|
+
d[k] = v.strip
|
17
|
+
end
|
18
|
+
m = {}
|
19
|
+
@measures.zip(values.drop(@dimensions.length)).each do |k, v|
|
20
|
+
m[k] = Float(v.strip)
|
21
|
+
end
|
22
|
+
return Event.new(self, d, m)
|
23
|
+
rescue => err
|
24
|
+
puts err.message
|
25
|
+
puts "line: #{line}"
|
16
26
|
end
|
17
|
-
|
18
|
-
@measures.zip(values.drop(@dimensions.length)).each do |k, v|
|
19
|
-
m[k] = Float(v.strip)
|
20
|
-
end
|
21
|
-
Event.new(self, d, m)
|
27
|
+
nil
|
22
28
|
end
|
23
29
|
end
|
24
30
|
|
data/lib/queryparser.rb
CHANGED
data/lib/streamparser.rb
CHANGED
@@ -12,9 +12,13 @@ module Shades
|
|
12
12
|
line.strip!
|
13
13
|
if !@metadata.nil?
|
14
14
|
event = @metadata.parse_event(line, /\s+/)
|
15
|
-
|
15
|
+
if event.nil?
|
16
|
+
puts line
|
17
|
+
else
|
18
|
+
@receiver.call(event)
|
19
|
+
end
|
16
20
|
elsif line.start_with?("#")
|
17
|
-
parts = line.scan(
|
21
|
+
parts = line.scan(/[\w\.]+/)
|
18
22
|
if parts[0].eql?("dimensions")
|
19
23
|
@dimensions = parts.drop(1)
|
20
24
|
elsif parts[0].eql?("measures")
|
data/shades.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: shades
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.12'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dietrich Featherston
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-06-
|
11
|
+
date: 2013-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|