elskwid-munger 0.1.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,90 @@
1
+ Munger Ruby Reporting Library
2
+ =============================
3
+
4
+ Munger is basically a simple data munging and reporting library
5
+ for Ruby as an alternative to Ruport, which did not fill my needs
6
+ in ways that convinced me to start over rather than try to fork or
7
+ patch it. Apologies to the Ruport chaps, whom I am sure are
8
+ smashing blokes - it just didn't wiggle my worm.
9
+
10
+ See the Wiki for details : http://github.com/schacon/munger/wikis
11
+
12
+ 3-Part Reporting
13
+ =============================
14
+
15
+ Munger creates reports in three stages, much like an Apollo rocket. My
16
+ main problem with Ruport was the coupling of different parts of these
17
+ stages in ways that didn't make the data easily re-usable, cacheable or
18
+ didn't give me enough control. I like to have my data separate from my
19
+ report, which should be renderable however I want.
20
+
21
+ * Stage 1 - Data Munging *
22
+
23
+ The first stage is getting a dataset that has all the information you need.
24
+ I like to call this stage 'munging' (pronounced: 'MON'-day + chan-'GING'),
25
+ which is taking a simple set of data (from a SQL query, perhaps) and
26
+ transforming fields, adding derived data, pivoting, etc - and making it into
27
+ a table of all the actual data-points you need.
28
+
29
+ * Stage 2 - Report Formatting *
30
+
31
+ Then there is the Reporting. To me, this means taking your massaged dataset
32
+ and doing all the fun reporting to it. This includes grouping, subgrouping,
33
+ sorting, column ordering, multi-level aggregation (sums, avg, etc) and
34
+ highlighting important information (values that are too small, too high, etc).
35
+
36
+ It can be argued that pivoting should be at this level, rather than the first,
37
+ but I decided to put it there instead, mostly because I really think of the
38
+ pivoted data as a different data set and also for performance reasons - the
39
+ pivot data can be a bear to produce, and I plan on caching the first stage and
40
+ then running different reporting options on it.
41
+
42
+ * Stage 3 - Output Rendering *
43
+
44
+ Now that I have my super spiffy report, I want to be able to render it however
45
+ I want, possibly in multiple formats - HTML and XLS are the most important to
46
+ me, but PDF, text, csv, etc will also likely be produced eventually.
47
+
48
+
49
+ Examples
50
+ =============================
51
+
52
+ The starting data can be ActiveRecord collections or an array of Hashes.
53
+
54
+ # webpage_hit table has ip_address, hit_date, action, referrer #
55
+
56
+ * Simple Example *
57
+
58
+ hits = WebpageHits.find(:all, :conditions => ['hit_date > ?', 1.days.ago])
59
+ @table_data = Munger::Report.new(:data => data)
60
+ @table_data.sort('hit_date').aggregate(:count => :action)
61
+ html_table = Munger::Render::Html.new(@table_data).render
62
+
63
+
64
+ * More Complex Example *
65
+
66
+ hits = WebpageHits.find(:all, :conditions => ['hit_date > ?', 7.days.ago])
67
+
68
+ data = Munger::Data.new
69
+ data.transform_column('hit_date') { |row| row.hit_date.day }
70
+ data.add_column('controller') { |row| row.action.split('/').first }
71
+
72
+ day_columns = data.pivot('hit_date', 'action', 'ip_address', :count)
73
+
74
+ @table_data = Munger::Report.new(:data => data,
75
+ :columns => [:action] + day_columns,
76
+ :aggregate => {:sum => day_columns})
77
+
78
+ @table_data.sort('action').subgroup('controller')
79
+ @table_data.process.style_cells('low_traffic', :only => new_columns) do |cell, row|
80
+ # highlight any index pages that have < 500 hits
81
+ cell.to_i < 500 if row.action =~ /index/
82
+ end
83
+
84
+ html_table = Munger::Render::Html.new(@table_data).render
85
+
86
+
87
+
88
+
89
+
90
+
data/Rakefile ADDED
@@ -0,0 +1,41 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rake/rdoctask'
4
+ require 'spec/rake/spectask'
5
+
6
+ spec = eval(File.new('munger.gemspec','r').read)
7
+
8
+ Rake::GemPackageTask.new(spec) do |pkg|
9
+ pkg.need_tar = true
10
+ end
11
+
12
+ task :default => "pkg/#{spec.name}-#{spec.version}.gem" do
13
+ puts "generated latest version"
14
+ end
15
+
16
+ desc 'Run specs'
17
+ Spec::Rake::SpecTask.new do |t|
18
+ t.spec_opts = ['--format', 'specdoc', '--colour', '--diff']
19
+ end
20
+
21
+ desc 'Generate coverage reports'
22
+ Spec::Rake::SpecTask.new('spec:coverage') do |t|
23
+ t.rcov = true
24
+ end
25
+
26
+ desc 'Generate a nice HTML report of spec results'
27
+ Spec::Rake::SpecTask.new('spec:report') do |t|
28
+ t.spec_opts = ['--format', 'html:report.html', '--diff']
29
+ end
30
+
31
+ task :doc => [:rdoc]
32
+ namespace :doc do
33
+ Rake::RDocTask.new do |rdoc|
34
+ files = ["README", "lib/**/*.rb"]
35
+ rdoc.rdoc_files.add(files)
36
+ rdoc.main = "README"
37
+ rdoc.title = "Munger Docs"
38
+ rdoc.rdoc_dir = "doc"
39
+ rdoc.options << "--line-numbers" << "--inline-source"
40
+ end
41
+ end
@@ -0,0 +1,30 @@
1
+ require File.dirname(__FILE__) + "/example_helper"
2
+ include ExampleHelper
3
+
4
+ data = Munger::Data.load_data(test_data)
5
+
6
+ data.add_column([:advert, :rate]) do |row|
7
+ rate = (row.clicks / row.airtime)
8
+ [row.advert.capitalize, rate]
9
+ end
10
+
11
+ #data.filter_rows { |row| row.rate > 10 }
12
+
13
+ #new_columns = data.pivot('airtime', 'advert', 'rate', :average)
14
+
15
+ report = Munger::Report.from_data(data)
16
+ report.columns(:advert => 'Spot', :airdate => 'Air Date', :airtime => 'Airtime', :rate => 'Rate')
17
+ report.sort = [['airtime', :asc], ['rate', :asc]]
18
+ #report.subgroup('airtime')
19
+ #report.aggregate(Proc.new {|arr| arr.inject(0) {|total, i| i * i + (total - 30) }} => :airtime, :avg => :rate)
20
+ report.process
21
+
22
+ report.style_cells('myRed', :only => :rate) { |cell, row| (cell.to_i < 10) }
23
+
24
+ #puts html = Munger::Render.to_html(report, :classes => {:table => 'other-class'} )
25
+ puts text = Munger::Render.to_text(report)
26
+
27
+
28
+ f = File.open('test.html', 'w')
29
+ f.write(html)
30
+ f.close
@@ -0,0 +1,2 @@
1
+ GET /example | Status: 200 | Params: {:format=>"html"}
2
+ GET /favicon.ico | Status: 404 | Params: {}
@@ -0,0 +1,23 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/../lib/munger")
2
+
3
+ require 'fileutils'
4
+ require 'logger'
5
+ require 'pp'
6
+
7
+ module ExampleHelper
8
+ def test_data
9
+ [
10
+ {:advert => "spot 1", :airtime => 15, :airdate => "2008-01-01", :clicks => 301},
11
+ {:advert => "spot 1", :airtime => 30, :airdate => "2008-01-02", :clicks => 199},
12
+ {:advert => "spot 1", :airtime => 30, :airdate => "2008-01-03", :clicks => 234},
13
+ {:advert => "spot 1", :airtime => 15, :airdate => "2008-01-04", :clicks => 342},
14
+ {:advert => "spot 2", :airtime => 30, :airdate => "2008-01-01", :clicks => 172},
15
+ {:advert => "spot 2", :airtime => 15, :airdate => "2008-01-02", :clicks => 217},
16
+ {:advert => "spot 2", :airtime => 90, :airdate => "2008-01-03", :clicks => 1023},
17
+ {:advert => "spot 2", :airtime => 30, :airdate => "2008-01-04", :clicks => 321},
18
+ {:advert => "spot 3", :airtime => 60, :airdate => "2008-01-01", :clicks => 512},
19
+ {:advert => "spot 3", :airtime => 30, :airdate => "2008-01-02", :clicks => 813},
20
+ {:advert => "spot 3", :airtime => 15, :airdate => "2008-01-03", :clicks => 333},
21
+ ]
22
+ end
23
+ end
@@ -0,0 +1,100 @@
1
+ require 'rubygems'
2
+ require 'sinatra'
3
+ require File.expand_path(File.dirname(__FILE__) + "/../lib/munger")
4
+
5
+ get '/' do
6
+ data = Munger::Data.load_data(test_data)
7
+
8
+ report = Munger::Report.from_data(data)
9
+ report.process
10
+
11
+ out = Munger::Render.to_html(report, :classes => {:table => 'other-class'} )
12
+ show(out)
13
+ end
14
+
15
+ get '/pivot' do
16
+ data = Munger::Data.load_data(test_data)
17
+
18
+ data.add_column([:advert, :rate]) do |row|
19
+ rate = (row.clicks / row.airtime)
20
+ [row.advert.capitalize, rate]
21
+ end
22
+
23
+ new_columns = data.pivot('airtime', 'advert', 'rate', :average)
24
+
25
+ report = Munger::Report.from_data(data)
26
+ report.columns([:advert] + new_columns.sort)
27
+ report.process
28
+
29
+ report.style_cells('myRed', :only => new_columns) { |cell, row| (cell.to_i < 10 && cell.to_i > 0) }
30
+
31
+ out = Munger::Render.to_html(report, :classes => {:table => 'other-class'} )
32
+
33
+ show(out)
34
+ end
35
+
36
+ get '/example' do
37
+ data = Munger::Data.load_data(test_data)
38
+
39
+ data.add_column([:advert, :rate]) do |row|
40
+ rate = (row.clicks / row.airtime)
41
+ [row.advert.capitalize, rate]
42
+ end
43
+
44
+ #data.filter_rows { |row| row.rate > 10 }
45
+ #new_columns = data.pivot('airtime', 'advert', 'rate', :average)
46
+
47
+ report = Munger::Report.from_data(data)
48
+ report.columns(:advert => 'Spot', :airdate => 'Air Date', :airtime => 'Airtime', :rate => 'Rate')
49
+ report.sort = [['airtime', :asc], ['rate', :asc]]
50
+ report.subgroup('airtime', :with_titles => true)
51
+ report.aggregate(Proc.new {|arr| arr.inject(0) {|total, i| i * i + (total - 30) }} => :airtime, :average => :rate)
52
+ report.process
53
+
54
+ report.style_cells('myRed', :only => :rate) { |cell, row| (cell.to_i < 10) }
55
+
56
+ out = Munger::Render.to_html(report, :classes => {:table => 'other-class'} )
57
+
58
+ show(out)
59
+ end
60
+
61
+ def test_data
62
+ [
63
+ {:advert => "spot 1", :airtime => 15, :airdate => "2008-01-01", :clicks => 301},
64
+ {:advert => "spot 1", :airtime => 30, :airdate => "2008-01-02", :clicks => 199},
65
+ {:advert => "spot 1", :airtime => 30, :airdate => "2008-01-03", :clicks => 234},
66
+ {:advert => "spot 1", :airtime => 15, :airdate => "2008-01-04", :clicks => 342},
67
+ {:advert => "spot 2", :airtime => 30, :airdate => "2008-01-01", :clicks => 172},
68
+ {:advert => "spot 2", :airtime => 15, :airdate => "2008-01-02", :clicks => 217},
69
+ {:advert => "spot 2", :airtime => 90, :airdate => "2008-01-03", :clicks => 1023},
70
+ {:advert => "spot 2", :airtime => 30, :airdate => "2008-01-04", :clicks => 321},
71
+ {:advert => "spot 3", :airtime => 60, :airdate => "2008-01-01", :clicks => 512},
72
+ {:advert => "spot 3", :airtime => 30, :airdate => "2008-01-02", :clicks => 813},
73
+ {:advert => "spot 3", :airtime => 15, :airdate => "2008-01-03", :clicks => 333},
74
+ ]
75
+ end
76
+
77
+ def show(data)
78
+ %Q(
79
+ <html>
80
+ <head>
81
+ <style>
82
+ .myRed { background: #e44; }
83
+
84
+ tr.group0 { background: #bbb;}
85
+ tr.group1 { background: #ddd;}
86
+
87
+ tr.groupHeader1 { background: #ccc;}
88
+
89
+ table tr td {padding: 0 15px;}
90
+ table tr th { background: #aaa; padding: 5px; }
91
+ body { font-family: verdana, "Lucida Grande", arial, helvetica, sans-serif;
92
+ color: #333; }
93
+ </style>
94
+ </head>
95
+ <body>
96
+ #{data}
97
+ </body>
98
+ </html>
99
+ )
100
+ end
File without changes
@@ -0,0 +1,234 @@
1
+ module Munger #:nodoc:
2
+
3
+ # this class is a data munger
4
+ # it takes raw data (arrays of hashes, basically)
5
+ # and can manipulate it in various interesting ways
6
+ class Data
7
+
8
+ attr_accessor :data
9
+
10
+ # will accept active record collection or array of hashes
11
+ def initialize(options = {})
12
+ @data = options[:data] if options[:data]
13
+ yield self if block_given?
14
+ end
15
+
16
+ def <<(data)
17
+ add_data(data)
18
+ end
19
+
20
+ def add_data(data)
21
+ if @data
22
+ @data = @data + data
23
+ else
24
+ @data = data
25
+ end
26
+ @data
27
+ end
28
+
29
+
30
+ #--
31
+ # NOTE:
32
+ # The name seems redundant; why:
33
+ # Munger::Data.load_data(data)
34
+ # and not:
35
+ # Munger::Data.load(data)
36
+ #++
37
+ def self.load_data(data, options = {})
38
+ Data.new(:data => data)
39
+ end
40
+
41
+ def columns
42
+ @columns ||= clean_data(@data.first).to_hash.keys
43
+ rescue
44
+ puts clean_data(@data.first).to_hash.inspect
45
+ end
46
+
47
+ # :default: The default value to use for the column in existing rows.
48
+ # Set to nil if not specified.
49
+ # if a block is passed, you can set the values manually
50
+ def add_column(names, options = {})
51
+ default = options[:default] || nil
52
+ @data.each_with_index do |row, index|
53
+ if block_given?
54
+ col_data = yield Item.ensure(row)
55
+ else
56
+ col_data = default
57
+ end
58
+
59
+ if names.is_a? Array
60
+ names.each_with_index do |col, i|
61
+ row[col] = col_data[i]
62
+ end
63
+ else
64
+ row[names] = col_data
65
+ end
66
+ @data[index] = Item.ensure(row)
67
+ end
68
+ end
69
+ alias :add_columns :add_column
70
+ alias :transform_column :add_column
71
+ alias :transform_columns :add_column
72
+
73
+ def clean_data(hash_or_ar)
74
+ if hash_or_ar.is_a? Hash
75
+ return Item.ensure(hash_or_ar)
76
+ elsif hash_or_ar.respond_to? :attributes
77
+ return Item.ensure(hash_or_ar.attributes)
78
+ end
79
+ hash_or_ar
80
+ end
81
+
82
+ def filter_rows
83
+ new_data = []
84
+
85
+ @data.each do |row|
86
+ row = Item.ensure(row)
87
+ if (yield row)
88
+ new_data << row
89
+ end
90
+ end
91
+
92
+ @data = new_data
93
+ end
94
+
95
+ # group the data like sql
96
+ def group(groups, agg_hash = {})
97
+ data_hash = {}
98
+
99
+ agg_columns = []
100
+ agg_hash.each do |key, columns|
101
+ Data.array(columns).each do |col| # column name
102
+ agg_columns << col
103
+ end
104
+ end
105
+ agg_columns = agg_columns.uniq.compact
106
+
107
+ @data.each do |row|
108
+ row_key = Data.array(groups).map { |rk| row[rk] }
109
+ data_hash[row_key] ||= {:cells => {}, :data => {}, :count => 0}
110
+ focus = data_hash[row_key]
111
+ focus[:data] = clean_data(row)
112
+
113
+ agg_columns.each do |col|
114
+ focus[:cells][col] ||= []
115
+ focus[:cells][col] << row[col]
116
+ end
117
+ focus[:count] += 1
118
+ end
119
+
120
+ new_data = []
121
+ new_keys = []
122
+
123
+ data_hash.each do |row_key, data|
124
+ new_row = data[:data]
125
+ agg_hash.each do |key, columns|
126
+ Data.array(columns).each do |col| # column name
127
+ newcol = ''
128
+ if key.is_a?(Array) && key[1].is_a?(Proc)
129
+ newcol = key[0].to_s + '_' + col.to_s
130
+ new_row[newcol] = key[1].call(data[:cells][col])
131
+ else
132
+ newcol = key.to_s + '_' + col.to_s
133
+ case key
134
+ when :average
135
+ sum = data[:cells][col].inject { |sum, a| sum + a }
136
+ new_row[newcol] = (sum / data[:count])
137
+ when :count
138
+ new_row[newcol] = data[:count]
139
+ else
140
+ new_row[newcol] = data[:cells][col].inject { |sum, a| sum + a }
141
+ end
142
+ end
143
+ new_keys << newcol
144
+ end
145
+ end
146
+ new_data << Item.ensure(new_row)
147
+ end
148
+
149
+ @data = new_data
150
+ new_keys.compact
151
+ end
152
+
153
+ def pivot(columns, rows, value, aggregation = :sum)
154
+ data_hash = {}
155
+
156
+ @data.each do |row|
157
+ column_key = Data.array(columns).map { |rk| row[rk] }
158
+ row_key = Data.array(rows).map { |rk| row[rk] }
159
+ data_hash[row_key] ||= {}
160
+ data_hash[row_key][column_key] ||= {:sum => 0, :data => {}, :count => 0}
161
+ focus = data_hash[row_key][column_key]
162
+ focus[:data] = clean_data(row)
163
+ focus[:count] += 1
164
+ focus[:sum] += row[value]
165
+ end
166
+
167
+ new_data = []
168
+ new_keys = {}
169
+
170
+ data_hash.each do |row_key, row_hash|
171
+ new_row = {}
172
+ row_hash.each do |column_key, data|
173
+ column_key.each do |ckey|
174
+ new_row.merge!(clean_data(data[:data].data))
175
+ case aggregation
176
+ when :average
177
+ new_row[ckey] = (data[:sum] / data[:count])
178
+ when :count
179
+ new_row[ckey] = data[:count]
180
+ else
181
+ new_row[ckey] = data[:sum]
182
+ end
183
+ new_keys[ckey] = true
184
+ end
185
+ end
186
+ new_data << Item.ensure(new_row)
187
+ end
188
+
189
+ @data = new_data
190
+ new_keys.keys
191
+ end
192
+
193
+ def self.array(string_or_array)
194
+ if string_or_array.is_a? Array
195
+ return string_or_array
196
+ else
197
+ return [string_or_array]
198
+ end
199
+ end
200
+
201
+ def size
202
+ @data.size
203
+ end
204
+ alias :length :size
205
+
206
+ def valid?
207
+ if ((@data.size > 0) &&
208
+ (@data.respond_to? :each_with_index) &&
209
+ (@data.first.respond_to?(:keys) ||
210
+ @data.first.respond_to?(:attributes) ||
211
+ @data.first.is_a?(Munger::Item))) &&
212
+ (!@data.first.is_a? String)
213
+ return true
214
+ else
215
+ return false
216
+ end
217
+ rescue
218
+ false
219
+ end
220
+
221
+ # cols is an array of column names, if given, the nested arrays are built in this order
222
+ def to_a(cols=nil)
223
+ array = []
224
+ cols ||= self.columns
225
+ @data.each do |row|
226
+ array << cols.inject([]){ |a,col| a << row[col] }
227
+ end
228
+ array
229
+ end
230
+
231
+ end
232
+
233
+ end
234
+
@@ -0,0 +1,50 @@
1
+ module Munger #:nodoc:
2
+
3
+ class Item
4
+
5
+ attr_reader :data
6
+
7
+ def initialize(data)
8
+ @data = data
9
+ end
10
+
11
+ def [](key)
12
+ return @data[key] if @data[key]
13
+ if key.is_a? Symbol
14
+ return @data[key.to_s] if @data[key.to_s]
15
+ elsif key.is_a? String
16
+ return @data[key.to_sym] if @data[key.to_sym]
17
+ end
18
+ end
19
+
20
+ def []=(key, value)
21
+ @data[key] = value
22
+ end
23
+
24
+ def method_missing( id, *args )
25
+ if @data[id].nil?
26
+ m = id.to_s
27
+ if /=$/ =~ m
28
+ @data[m.chomp!] = (args.length < 2 ? args[0] : args)
29
+ else
30
+ @data[m]
31
+ end
32
+ else
33
+ @data[id]
34
+ end
35
+ end
36
+
37
+ def self.ensure(item)
38
+ if item.is_a? Munger::Item
39
+ return item
40
+ else
41
+ return Item.new(item)
42
+ end
43
+ end
44
+
45
+ def to_hash
46
+ @data
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,31 @@
1
+ module Munger #:nodoc:
2
+ module Render #:nodoc:
3
+ class CSV #:nodoc:
4
+
5
+ attr_reader :report
6
+
7
+ def initialize(report)
8
+ @report = report
9
+ end
10
+
11
+ def render
12
+ output = []
13
+
14
+ # header
15
+ output << @report.columns.collect { |col| @report.column_title(col).to_s }.join(',')
16
+
17
+ # body
18
+ @report.process_data.each do |row|
19
+ output << @report.columns.collect { |col| row[:data][col].to_s }.join(',')
20
+ end
21
+
22
+ output.join("\n")
23
+ end
24
+
25
+ def valid?
26
+ @report.is_a? Munger::Report
27
+ end
28
+
29
+ end
30
+ end
31
+ end