shades 0.11 → 0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/README.md +7 -1
- data/bin/histo +73 -16
- data/lib/formatter.rb +8 -2
- data/lib/histo.rb +18 -7
- data/lib/model.rb +14 -8
- data/lib/queryparser.rb +1 -1
- data/lib/streamparser.rb +6 -2
- data/shades.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MmI0ZDFmZWZhYTIxN2FkN2E0MWFmNjk1NDFiYWI3MDgyZmRmZjg2Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OWRjYjQzZmI5ZGQ4NWYzYTQ1NDk1NDkzYWI2MTZmYTc5NmViZTYwNg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZWRkOTNkOTcyNTExN2E1Mzc0NmViNmVhNWNhOWM1NTljNjdhN2QwMTVhOGY4
|
10
|
+
OTU2MTk4YWEzYTE5YWMzODU1N2NjODYzMTY1NTE0ZDE3M2E0NjdiOTA5MDRm
|
11
|
+
M2VjODM1MDc4NzEzNWMwY2ZhZjNhMjM1NTNlNDlhZDE1MmUyMjg=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ODQxZjBkMmNiYTk2ZTY4ZGEwMjUyNjU1NTM2ZGIxM2Q0MmI2NzAzZWY3ZjNl
|
14
|
+
ZTI4MjExNjdjZmEyODcyODczNzQ3NjA3YjFmZGYxZWFiMDBmZDY2YzgzNDQy
|
15
|
+
MTgxZGRlZWUyNjNkNDE1ZjZkOThkYmIwYTYwNzM5ZDBkZDkwZTA=
|
data/README.md
CHANGED
@@ -4,9 +4,15 @@ Get a new perspective on your data. In-memory [OLAP cubing](http://en.wikipedia.
|
|
4
4
|
|
5
5
|

|
6
6
|
|
7
|
+
## Install
|
8
|
+
|
9
|
+
```
|
10
|
+
gem install shades
|
11
|
+
```
|
12
|
+
|
7
13
|
## As a command line utility for OLAP cubing
|
8
14
|
|
9
|
-
The ```shades``` utility will accept whitespace-delimited data, one event per line, preceeded by two commented lines describing the dimensions and
|
15
|
+
The ```shades``` utility will accept whitespace-delimited data, one event per line, preceeded by two commented lines describing the dimensions and measures within.
|
10
16
|
|
11
17
|
```
|
12
18
|
# dimensions: timestamp transactionid customer item
|
data/bin/histo
CHANGED
@@ -1,27 +1,84 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
#/ Usage: histo [options]
|
3
|
+
#/ -p <measure> : Parse shades formatted data and histogram a specific measure given as the last argument
|
4
|
+
#/ -n <num> : the max number of bins to include in the output (default = 10)
|
5
|
+
#/ -w <width> : the width of the histogram bars (default = 30)
|
6
|
+
#/ -l : output ascii art on a log scale (default is linear).
|
7
|
+
#/ : tip: if the value distribution looks linear using this option then you're dealing with a power law.
|
8
|
+
#/ -h : show this usage
|
2
9
|
$: << File.realpath(File.dirname(__FILE__) + "/../lib")
|
3
10
|
|
4
11
|
require 'shades'
|
12
|
+
require 'getoptlong'
|
13
|
+
require 'pathname'
|
5
14
|
|
6
|
-
def
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
end
|
15
|
+
def usage
|
16
|
+
program = Pathname.new(__FILE__).realpath
|
17
|
+
help = `grep ^\#\/ #{program} | cut -c4-`
|
18
|
+
puts help
|
19
|
+
exit 1
|
20
|
+
end
|
13
21
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
22
|
+
def main(mkey, max_bins, ascii_width, log_base)
|
23
|
+
|
24
|
+
histo = Shades::DynamicHistogram.new(max_bins)
|
25
|
+
|
26
|
+
if mkey.nil?
|
27
|
+
# just read a stream of numbers from stdin
|
28
|
+
$stdin.each_line do |line|
|
29
|
+
line.scan(/(?:\d+\.?\d*|\d*\.\d+)/).each do |s|
|
30
|
+
begin
|
31
|
+
n = Float(s)
|
32
|
+
histo.add(n)
|
33
|
+
rescue => err
|
34
|
+
puts "error parsing %s as float" % s
|
35
|
+
puts err.message
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
else
|
40
|
+
# set up the histogram data to accept streaming input
|
41
|
+
p = Shades::StreamParser.new do |e|
|
42
|
+
histo.add(e.measure(mkey))
|
43
|
+
end
|
18
44
|
|
19
|
-
|
45
|
+
# stream stdin lines to the parser
|
46
|
+
$stdin.each_line do |line|
|
47
|
+
p << line
|
48
|
+
end
|
49
|
+
end
|
50
|
+
$stdout.puts histo.ascii_art(ascii_width, log_base)
|
20
51
|
end
|
21
52
|
|
22
|
-
|
23
|
-
|
24
|
-
|
53
|
+
opts = GetoptLong.new
|
54
|
+
opts.set_options(
|
55
|
+
# parse shades formatted data and histogram a specific measure given as the last argument
|
56
|
+
["-p", "--parse" , GetoptLong::OPTIONAL_ARGUMENT],
|
57
|
+
["-n", "--max-bins" , GetoptLong::OPTIONAL_ARGUMENT],
|
58
|
+
["-w", "--ascii-width" , GetoptLong::OPTIONAL_ARGUMENT],
|
59
|
+
["-l", "--log-base" , GetoptLong::OPTIONAL_ARGUMENT],
|
60
|
+
["-h", "--help" , GetoptLong::NO_ARGUMENT],
|
61
|
+
)
|
62
|
+
|
63
|
+
measure = nil
|
64
|
+
max_bins = 10
|
65
|
+
ascii_width = 30
|
66
|
+
log_base = 0
|
67
|
+
|
68
|
+
opts.each do |opt, arg|
|
69
|
+
case opt
|
70
|
+
when '-h'
|
71
|
+
usage
|
72
|
+
when '-p'
|
73
|
+
measure = arg
|
74
|
+
when '-n'
|
75
|
+
max_bins = arg.to_i
|
76
|
+
when '-w'
|
77
|
+
ascii_width = arg.to_i
|
78
|
+
when '-l'
|
79
|
+
log_base = arg.to_i
|
80
|
+
end
|
81
|
+
end
|
25
82
|
|
26
|
-
main(measure,
|
83
|
+
main(measure, max_bins, ascii_width, log_base)
|
27
84
|
|
data/lib/formatter.rb
CHANGED
@@ -4,7 +4,10 @@ module Shades
|
|
4
4
|
@spacer = spacer
|
5
5
|
end
|
6
6
|
def text(out, events)
|
7
|
-
|
7
|
+
if events.empty?
|
8
|
+
return
|
9
|
+
end
|
10
|
+
metadata = events[0].metadata
|
8
11
|
lines = []
|
9
12
|
out.puts "# dimensions: %s" % (metadata.dimensions.join(@spacer))
|
10
13
|
out.puts "# measures: %s" % (metadata.measures.join(@spacer))
|
@@ -13,7 +16,10 @@ module Shades
|
|
13
16
|
end
|
14
17
|
end
|
15
18
|
def pretty_text(out, events)
|
16
|
-
|
19
|
+
if events.empty?
|
20
|
+
return
|
21
|
+
end
|
22
|
+
metadata = events[0].metadata
|
17
23
|
lines = []
|
18
24
|
(events.length+1).times {|i|lines[i] = []}
|
19
25
|
metadata.dimensions.each do |d|
|
data/lib/histo.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
module Shades
|
3
3
|
|
4
4
|
# streaming histograms:
|
5
|
-
#
|
5
|
+
# Ruby port of the Clojure library from BigML: https://github.com/bigmlcom/histogram
|
6
6
|
class DynamicHistogram
|
7
7
|
|
8
8
|
def initialize(max_size)
|
@@ -18,8 +18,14 @@ module Shades
|
|
18
18
|
@res.lines
|
19
19
|
end
|
20
20
|
|
21
|
-
def ascii_art
|
22
|
-
@res.histo_text
|
21
|
+
def ascii_art(output_width, log_base)
|
22
|
+
@res.histo_text(output_width) do |x|
|
23
|
+
if log_base != 0
|
24
|
+
Math::log(x, log_base)
|
25
|
+
else
|
26
|
+
x
|
27
|
+
end
|
28
|
+
end
|
23
29
|
end
|
24
30
|
end
|
25
31
|
|
@@ -90,18 +96,23 @@ module Shades
|
|
90
96
|
##
|
91
97
|
## So, the line above that reads "0.502 ( 27) ##############################"
|
92
98
|
## can be read as: "There are 27 values close to 0.502"
|
93
|
-
def histo_text
|
99
|
+
def histo_text(output_width)
|
94
100
|
a = []
|
95
101
|
max_bin_count = 1
|
96
|
-
width = 30
|
97
102
|
@bins.each do |b|
|
98
103
|
if b.count > max_bin_count
|
99
104
|
max_bin_count = b.count
|
100
105
|
end
|
101
106
|
end
|
107
|
+
scaled_max = yield max_bin_count
|
108
|
+
output_width -= 23
|
109
|
+
if output_width < 10
|
110
|
+
output_width = 10
|
111
|
+
end
|
102
112
|
@bins.each do |b|
|
103
|
-
|
104
|
-
|
113
|
+
scaled_value = yield b.count
|
114
|
+
repeat = output_width * ( scaled_value / scaled_max )
|
115
|
+
a << "%14.3f (%5d) %s" % [b.mean, b.count, '#' * repeat]
|
105
116
|
end
|
106
117
|
a.join("\n")
|
107
118
|
end
|
data/lib/model.rb
CHANGED
@@ -10,15 +10,21 @@ module Shades
|
|
10
10
|
# parse an event line that adheres to this metadat
|
11
11
|
def parse_event(line, sep)
|
12
12
|
values = line.split(sep)
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
begin
|
14
|
+
d = {}
|
15
|
+
@dimensions.zip(values.take(@dimensions.length)).each do |k, v|
|
16
|
+
d[k] = v.strip
|
17
|
+
end
|
18
|
+
m = {}
|
19
|
+
@measures.zip(values.drop(@dimensions.length)).each do |k, v|
|
20
|
+
m[k] = Float(v.strip)
|
21
|
+
end
|
22
|
+
return Event.new(self, d, m)
|
23
|
+
rescue => err
|
24
|
+
puts err.message
|
25
|
+
puts "line: #{line}"
|
16
26
|
end
|
17
|
-
|
18
|
-
@measures.zip(values.drop(@dimensions.length)).each do |k, v|
|
19
|
-
m[k] = Float(v.strip)
|
20
|
-
end
|
21
|
-
Event.new(self, d, m)
|
27
|
+
nil
|
22
28
|
end
|
23
29
|
end
|
24
30
|
|
data/lib/queryparser.rb
CHANGED
data/lib/streamparser.rb
CHANGED
@@ -12,9 +12,13 @@ module Shades
|
|
12
12
|
line.strip!
|
13
13
|
if !@metadata.nil?
|
14
14
|
event = @metadata.parse_event(line, /\s+/)
|
15
|
-
|
15
|
+
if event.nil?
|
16
|
+
puts line
|
17
|
+
else
|
18
|
+
@receiver.call(event)
|
19
|
+
end
|
16
20
|
elsif line.start_with?("#")
|
17
|
-
parts = line.scan(
|
21
|
+
parts = line.scan(/[\w\.]+/)
|
18
22
|
if parts[0].eql?("dimensions")
|
19
23
|
@dimensions = parts.drop(1)
|
20
24
|
elsif parts[0].eql?("measures")
|
data/shades.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: shades
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.12'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dietrich Featherston
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-06-
|
11
|
+
date: 2013-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|