shades 0.11 → 0.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- Y2M3MzYxNDM1Y2VlYTY5NDQxMzFhMTgzZmIyNjExMzg3MGY2MTIyMg==
4
+ MmI0ZDFmZWZhYTIxN2FkN2E0MWFmNjk1NDFiYWI3MDgyZmRmZjg2Yw==
5
5
  data.tar.gz: !binary |-
6
- ZWJjZDEyZjVkM2NmMzhjMzMwN2MwZjAzMDVkN2VkZTUzMGFhMzJiOA==
6
+ OWRjYjQzZmI5ZGQ4NWYzYTQ1NDk1NDkzYWI2MTZmYTc5NmViZTYwNg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- NWZjNGY4OTk5YmNhMDhlYWNkYWU0ZGIwYzVkNTViMzE1ZTIwN2EzZDdiNDYy
10
- YjMwNmEwOTVlYWFiMmM0ZTNkMWQ4MTQwNGM3MDYxZThlMzA1NDJmZjA0NGRj
11
- YThiNzMwMDEyOTQ3ZjE0YWJiNDRlNjdiODIwNTk2MjdmMzFlM2E=
9
+ ZWRkOTNkOTcyNTExN2E1Mzc0NmViNmVhNWNhOWM1NTljNjdhN2QwMTVhOGY4
10
+ OTU2MTk4YWEzYTE5YWMzODU1N2NjODYzMTY1NTE0ZDE3M2E0NjdiOTA5MDRm
11
+ M2VjODM1MDc4NzEzNWMwY2ZhZjNhMjM1NTNlNDlhZDE1MmUyMjg=
12
12
  data.tar.gz: !binary |-
13
- MmJkYWU5NGI2NzhlODcyNzdmMWE1MGIzMzg3YzNjMDQwNTZjNTc3YWI2ZDZk
14
- ZWNkNjQ4MzA5M2IwYzYyMWFjNTRmMWFkMTk5Y2UxOGI0NzhkMjFlZDU4NDRh
15
- MWY5ZjNmZGEzYzdhMTQ1MDc4Y2EwNjMyMTBiOTAzY2NiOTM4MTk=
13
+ ODQxZjBkMmNiYTk2ZTY4ZGEwMjUyNjU1NTM2ZGIxM2Q0MmI2NzAzZWY3ZjNl
14
+ ZTI4MjExNjdjZmEyODcyODczNzQ3NjA3YjFmZGYxZWFiMDBmZDY2YzgzNDQy
15
+ MTgxZGRlZWUyNjNkNDE1ZjZkOThkYmIwYTYwNzM5ZDBkZDkwZTA=
data/README.md CHANGED
@@ -4,9 +4,15 @@ Get a new perspective on your data. In-memory [OLAP cubing](http://en.wikipedia.
4
4
 
5
5
  ![](https://dl.dropboxusercontent.com/u/1133314/i/shades.gif)
6
6
 
7
+ ## Install
8
+
9
+ ```
10
+ gem install shades
11
+ ```
12
+
7
13
  ## As a command line utility for OLAP cubing
8
14
 
9
- The ```shades``` utility will accept whitespace-delimited data, one event per line, preceeded by two commented lines describing the dimensions and data within.
15
+ The ```shades``` utility will accept whitespace-delimited data, one event per line, preceeded by two commented lines describing the dimensions and measures within.
10
16
 
11
17
  ```
12
18
  # dimensions: timestamp transactionid customer item
data/bin/histo CHANGED
@@ -1,27 +1,84 @@
1
1
  #!/usr/bin/env ruby
2
+ #/ Usage: histo [options]
3
+ #/ -p <measure> : Parse shades formatted data and histogram a specific measure given as the last argument
4
+ #/ -n <num> : the max number of bins to include in the output (default = 10)
5
+ #/ -w <width> : the width of the histogram bars (default = 30)
6
+ #/ -l : output ascii art on a log scale (default is linear).
7
+ #/ : tip: if the value distribution looks linear using this option then you're dealing with a power law.
8
+ #/ -h : show this usage
2
9
  $: << File.realpath(File.dirname(__FILE__) + "/../lib")
3
10
 
4
11
  require 'shades'
12
+ require 'getoptlong'
13
+ require 'pathname'
5
14
 
6
- def main(mkey, max_buckets, output_width)
7
-
8
- # set up the histogram data to accept streaming input
9
- histo = Shades::DynamicHistogram.new(max_buckets)
10
- p = Shades::StreamParser.new do |e|
11
- histo.add(e.measure(mkey))
12
- end
15
+ def usage
16
+ program = Pathname.new(__FILE__).realpath
17
+ help = `grep ^\#\/ #{program} | cut -c4-`
18
+ puts help
19
+ exit 1
20
+ end
13
21
 
14
- # stream stdin lines to the parser
15
- $stdin.each_line do |line|
16
- p << line
17
- end
22
+ def main(mkey, max_bins, ascii_width, log_base)
23
+
24
+ histo = Shades::DynamicHistogram.new(max_bins)
25
+
26
+ if mkey.nil?
27
+ # just read a stream of numbers from stdin
28
+ $stdin.each_line do |line|
29
+ line.scan(/(?:\d+\.?\d*|\d*\.\d+)/).each do |s|
30
+ begin
31
+ n = Float(s)
32
+ histo.add(n)
33
+ rescue => err
34
+ puts "error parsing %s as float" % s
35
+ puts err.message
36
+ end
37
+ end
38
+ end
39
+ else
40
+ # set up the histogram data to accept streaming input
41
+ p = Shades::StreamParser.new do |e|
42
+ histo.add(e.measure(mkey))
43
+ end
18
44
 
19
- $stdout.puts histo.ascii_art
45
+ # stream stdin lines to the parser
46
+ $stdin.each_line do |line|
47
+ p << line
48
+ end
49
+ end
50
+ $stdout.puts histo.ascii_art(ascii_width, log_base)
20
51
  end
21
52
 
22
- measure = ARGV[-1]
23
- max_buckets = 10
24
- output_width = 30
53
+ opts = GetoptLong.new
54
+ opts.set_options(
55
+ # parse shades formatted data and histogram a specific measure given as the last argument
56
+ ["-p", "--parse" , GetoptLong::OPTIONAL_ARGUMENT],
57
+ ["-n", "--max-bins" , GetoptLong::OPTIONAL_ARGUMENT],
58
+ ["-w", "--ascii-width" , GetoptLong::OPTIONAL_ARGUMENT],
59
+ ["-l", "--log-base" , GetoptLong::OPTIONAL_ARGUMENT],
60
+ ["-h", "--help" , GetoptLong::NO_ARGUMENT],
61
+ )
62
+
63
+ measure = nil
64
+ max_bins = 10
65
+ ascii_width = 30
66
+ log_base = 0
67
+
68
+ opts.each do |opt, arg|
69
+ case opt
70
+ when '-h'
71
+ usage
72
+ when '-p'
73
+ measure = arg
74
+ when '-n'
75
+ max_bins = arg.to_i
76
+ when '-w'
77
+ ascii_width = arg.to_i
78
+ when '-l'
79
+ log_base = arg.to_i
80
+ end
81
+ end
25
82
 
26
- main(measure, max_buckets, output_width)
83
+ main(measure, max_bins, ascii_width, log_base)
27
84
 
data/lib/formatter.rb CHANGED
@@ -4,7 +4,10 @@ module Shades
4
4
  @spacer = spacer
5
5
  end
6
6
  def text(out, events)
7
- metadata = events[0].metadata unless events.empty?
7
+ if events.empty?
8
+ return
9
+ end
10
+ metadata = events[0].metadata
8
11
  lines = []
9
12
  out.puts "# dimensions: %s" % (metadata.dimensions.join(@spacer))
10
13
  out.puts "# measures: %s" % (metadata.measures.join(@spacer))
@@ -13,7 +16,10 @@ module Shades
13
16
  end
14
17
  end
15
18
  def pretty_text(out, events)
16
- metadata = events[0].metadata unless events.empty?
19
+ if events.empty?
20
+ return
21
+ end
22
+ metadata = events[0].metadata
17
23
  lines = []
18
24
  (events.length+1).times {|i|lines[i] = []}
19
25
  metadata.dimensions.each do |d|
data/lib/histo.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  module Shades
3
3
 
4
4
  # streaming histograms:
5
- # implementation of the clojure library from BigML: https://github.com/bigmlcom/histogram
5
+ # Ruby port of the Clojure library from BigML: https://github.com/bigmlcom/histogram
6
6
  class DynamicHistogram
7
7
 
8
8
  def initialize(max_size)
@@ -18,8 +18,14 @@ module Shades
18
18
  @res.lines
19
19
  end
20
20
 
21
- def ascii_art
22
- @res.histo_text
21
+ def ascii_art(output_width, log_base)
22
+ @res.histo_text(output_width) do |x|
23
+ if log_base != 0
24
+ Math::log(x, log_base)
25
+ else
26
+ x
27
+ end
28
+ end
23
29
  end
24
30
  end
25
31
 
@@ -90,18 +96,23 @@ module Shades
90
96
  ##
91
97
  ## So, the line above that reads "0.502 ( 27) ##############################"
92
98
  ## can be read as: "There are 27 values close to 0.502"
93
- def histo_text
99
+ def histo_text(output_width)
94
100
  a = []
95
101
  max_bin_count = 1
96
- width = 30
97
102
  @bins.each do |b|
98
103
  if b.count > max_bin_count
99
104
  max_bin_count = b.count
100
105
  end
101
106
  end
107
+ scaled_max = yield max_bin_count
108
+ output_width -= 23
109
+ if output_width < 10
110
+ output_width = 10
111
+ end
102
112
  @bins.each do |b|
103
- repeat = width * Float(b.count)/Float(max_bin_count)
104
- a << "%10.3f (%3d) %s" % [b.mean, b.count, '#' * repeat]
113
+ scaled_value = yield b.count
114
+ repeat = output_width * ( scaled_value / scaled_max )
115
+ a << "%14.3f (%5d) %s" % [b.mean, b.count, '#' * repeat]
105
116
  end
106
117
  a.join("\n")
107
118
  end
data/lib/model.rb CHANGED
@@ -10,15 +10,21 @@ module Shades
10
10
  # parse an event line that adheres to this metadat
11
11
  def parse_event(line, sep)
12
12
  values = line.split(sep)
13
- d = {}
14
- @dimensions.zip(values.take(@dimensions.length)).each do |k, v|
15
- d[k] = v.strip
13
+ begin
14
+ d = {}
15
+ @dimensions.zip(values.take(@dimensions.length)).each do |k, v|
16
+ d[k] = v.strip
17
+ end
18
+ m = {}
19
+ @measures.zip(values.drop(@dimensions.length)).each do |k, v|
20
+ m[k] = Float(v.strip)
21
+ end
22
+ return Event.new(self, d, m)
23
+ rescue => err
24
+ puts err.message
25
+ puts "line: #{line}"
16
26
  end
17
- m = {}
18
- @measures.zip(values.drop(@dimensions.length)).each do |k, v|
19
- m[k] = Float(v.strip)
20
- end
21
- Event.new(self, d, m)
27
+ nil
22
28
  end
23
29
  end
24
30
 
data/lib/queryparser.rb CHANGED
@@ -7,7 +7,7 @@ module Shades
7
7
  class QueryParser
8
8
 
9
9
  def self.parse(qs)
10
- parts = qs.scan(/\w+/)
10
+ parts = qs.scan(/[\w\.]+/)
11
11
  tokens = []
12
12
  t = BeginRollupToken.new
13
13
  parts.each do |p|
data/lib/streamparser.rb CHANGED
@@ -12,9 +12,13 @@ module Shades
12
12
  line.strip!
13
13
  if !@metadata.nil?
14
14
  event = @metadata.parse_event(line, /\s+/)
15
- @receiver.call(event)
15
+ if event.nil?
16
+ puts line
17
+ else
18
+ @receiver.call(event)
19
+ end
16
20
  elsif line.start_with?("#")
17
- parts = line.scan(/\w+/)
21
+ parts = line.scan(/[\w\.]+/)
18
22
  if parts[0].eql?("dimensions")
19
23
  @dimensions = parts.drop(1)
20
24
  elsif parts[0].eql?("measures")
data/shades.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'shades'
3
- s.version = '0.11'
3
+ s.version = '0.12'
4
4
 
5
5
  s.summary = "Get a new perspective on your data. In-memory data cubing of event data for Ruby."
6
6
  s.description = <<-EOF
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shades
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.11'
4
+ version: '0.12'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dietrich Featherston
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-06-25 00:00:00.000000000 Z
11
+ date: 2013-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler