crosstab 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +23 -0
- data/README.txt +133 -0
- data/Rakefile +17 -0
- data/lib/crosstab.rb +25 -0
- data/lib/crosstab/banner.rb +90 -0
- data/lib/crosstab/cell.rb +162 -0
- data/lib/crosstab/column.rb +64 -0
- data/lib/crosstab/crosstab.rb +243 -0
- data/lib/crosstab/extensions.rb +26 -0
- data/lib/crosstab/generic.rb +83 -0
- data/lib/crosstab/group.rb +28 -0
- data/lib/crosstab/row.rb +65 -0
- data/lib/crosstab/table.rb +85 -0
- data/test/test_banner.rb +81 -0
- data/test/test_cell.rb +95 -0
- data/test/test_column.rb +60 -0
- data/test/test_crosstab.rb +214 -0
- data/test/test_extensions.rb +12 -0
- data/test/test_group.rb +67 -0
- data/test/test_missing.rb +75 -0
- data/test/test_row.rb +60 -0
- data/test/test_table.rb +67 -0
- metadata +87 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
class Crosstab::Column < Crosstab::Generic
|
2
|
+
def initialize(label=nil, qual=nil)
|
3
|
+
title label if label
|
4
|
+
qualification qual if qual
|
5
|
+
end
|
6
|
+
|
7
|
+
# attr_reader for the records attribute which should contain an empty array, or -- if calculate was called on
|
8
|
+
# the crosstab -- this will contain the array of records that fit this column's qualification.
|
9
|
+
#
|
10
|
+
# Example:
|
11
|
+
#
|
12
|
+
# my_crosstab = Crosstab::Crosstab.new do
|
13
|
+
# data_source [{:a => 1, :b => 1}, {:a => 2, :b => 2}]
|
14
|
+
#
|
15
|
+
# banner do
|
16
|
+
# title "Age"
|
17
|
+
# column "18-34", :b => 1
|
18
|
+
# column "35-54", :b => 2
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# table do
|
22
|
+
# row "Male", :a => 1
|
23
|
+
# row "Female", :a => 2
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# my_crosstab.calculate
|
28
|
+
#
|
29
|
+
# my_crosstab.banner.columns[0].title
|
30
|
+
# # => "18-34"
|
31
|
+
#
|
32
|
+
# my_crosstab.banner.columns[0].records
|
33
|
+
# # => [{:a => 1, :b => 1}]
|
34
|
+
#
|
35
|
+
def records(value=nil)
|
36
|
+
if value
|
37
|
+
@records = value
|
38
|
+
else
|
39
|
+
@records ||= []
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# DSL accessor for the group attribute
|
44
|
+
#
|
45
|
+
# Example:
|
46
|
+
#
|
47
|
+
# group
|
48
|
+
# # => nil
|
49
|
+
#
|
50
|
+
# group Crosstab::Group.new("Gender")
|
51
|
+
# # => Crosstab::Group...
|
52
|
+
#
|
53
|
+
# group.title
|
54
|
+
# # => "Gender"
|
55
|
+
#
|
56
|
+
def group(g=nil)
|
57
|
+
if g
|
58
|
+
@group = g
|
59
|
+
g.children << self # Add self to its list of children
|
60
|
+
else
|
61
|
+
@group ||= nil
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,243 @@
|
|
1
|
+
class Crosstab::Crosstab < Crosstab::Generic
|
2
|
+
# Pass in a block and we'll execute it within the context of this class.
|
3
|
+
#
|
4
|
+
# Example:
|
5
|
+
#
|
6
|
+
# my_crosstab = Crosstab.new do
|
7
|
+
# banner do
|
8
|
+
# column "Total"
|
9
|
+
# column "18-34", :b => 1
|
10
|
+
# column "35-54", :b => 2
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# table do
|
14
|
+
# row "Male", :a => 1
|
15
|
+
# row "Female", :a => 2
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
def initialize(&block)
|
20
|
+
instance_eval(&block) if block
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
# DSL accessor for the data_source attribute which normally contains an empty array or an array of hashes. To install
|
26
|
+
# your own data, just pass in an array of hashes as the argument.
|
27
|
+
#
|
28
|
+
# Example:
|
29
|
+
#
|
30
|
+
# my_crosstab = Crosstab::Crosstab.new
|
31
|
+
#
|
32
|
+
# my_crosstab.data_source
|
33
|
+
# # => []
|
34
|
+
#
|
35
|
+
# my_crosstab.data_source [{:a => 1, :b => 2},
|
36
|
+
# {:a => 2, :b => 2}]
|
37
|
+
#
|
38
|
+
# my_crosstab.data_source
|
39
|
+
# # => [{:a => 1, :b => 2},{:a => 2, :b => 2}]
|
40
|
+
#
|
41
|
+
def data_source(value=nil)
|
42
|
+
if value
|
43
|
+
@data_source = value
|
44
|
+
else
|
45
|
+
@data_source ||= []
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Creates a new Crosstab::Banner
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
#
|
53
|
+
# # First let's look at the default banner with its total column:
|
54
|
+
#
|
55
|
+
# banner
|
56
|
+
# #=> Crosstab::Banner
|
57
|
+
#
|
58
|
+
# # banner.columns.first.title
|
59
|
+
# #=> "Total"
|
60
|
+
#
|
61
|
+
# # Now let's create a new banner.
|
62
|
+
# banner do
|
63
|
+
# column "Male", :a => 1
|
64
|
+
# column "Female", :a => 2
|
65
|
+
# end
|
66
|
+
#
|
67
|
+
# banner.columns.first.title
|
68
|
+
# #=> "Male"
|
69
|
+
# banner.columns.last.title
|
70
|
+
# #=> "Female"
|
71
|
+
#
|
72
|
+
def banner(&block)
|
73
|
+
if block
|
74
|
+
@banner = Crosstab::Banner.new(&block)
|
75
|
+
else
|
76
|
+
@banner ||= Crosstab::Banner.new
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Returns the array of tables for this Crosstab.
|
81
|
+
def tables
|
82
|
+
@tables ||= []
|
83
|
+
end
|
84
|
+
|
85
|
+
# Creates a new Crosstab::Table and appends it to tables
|
86
|
+
#
|
87
|
+
# Example:
|
88
|
+
#
|
89
|
+
# my_crosstab = Crosstab.new
|
90
|
+
# my_crosstab.tables
|
91
|
+
# # => []
|
92
|
+
#
|
93
|
+
# my_crosstab.table do
|
94
|
+
# title "Q.B Age"
|
95
|
+
# row "18-34", :b => 1
|
96
|
+
# row "35-54", :b => 2
|
97
|
+
# end
|
98
|
+
#
|
99
|
+
# my_crosstab.tables
|
100
|
+
# # => [Crosstab::Table]
|
101
|
+
#
|
102
|
+
# my_crosstab.tables.first.rows.first.title
|
103
|
+
# #=> "18-34"
|
104
|
+
#
|
105
|
+
# my_crosstab.tables.first.rows.last.title
|
106
|
+
# #=> "35-54"
|
107
|
+
#
|
108
|
+
def table(&block)
|
109
|
+
tables << Crosstab::Table.new(&block)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Runs the calculations.
|
113
|
+
#
|
114
|
+
# Warning: This is a CPU-heavy method. If you're working with a large record size, processing time can explode out of control.
|
115
|
+
# You can expect this routine to process 1,000,000+ transactions a second. What's a transaction? It's just about 1 record x
|
116
|
+
# 1 row x 1 column.
|
117
|
+
#
|
118
|
+
# Some examples: 1 second => N=1,000 * 100 rows * 10 columns
|
119
|
+
# 1 second => N=100,000 * 10 rows * 1 column
|
120
|
+
# 16.6 minutes => N=1,000,000 * 100 rows * 10 columns
|
121
|
+
#
|
122
|
+
def calculate
|
123
|
+
# pre-calculate which interviews belong in the banner run
|
124
|
+
working_records = data_source.select do |i|
|
125
|
+
self.qualifies? i and banner.qualifies? i
|
126
|
+
end
|
127
|
+
|
128
|
+
# pre-calculate which interviews belong in each column
|
129
|
+
banner.columns.each do |column|
|
130
|
+
column.records(working_records.select { |i| column.qualifies? i })
|
131
|
+
end
|
132
|
+
|
133
|
+
tables.each do |table|
|
134
|
+
banner.columns.each_with_index do |column, column_index|
|
135
|
+
# pre-calculate which interviews belong in this table
|
136
|
+
table_records = column.records.select do |i|
|
137
|
+
table.qualifies? i
|
138
|
+
end
|
139
|
+
|
140
|
+
# do the actual stub calculations
|
141
|
+
table.rows.each do |row|
|
142
|
+
# if this row is part of a group, and the group hasn't already been calculated for this cell...
|
143
|
+
if row.group and row.group.cells[column_index].nil?
|
144
|
+
row.group.cells[column_index] = Crosstab::Cell.new
|
145
|
+
row.group.cells[column_index].base table_records.length
|
146
|
+
row.group.cells[column_index].frequency table_records.select { |i| row.group.qualifies? i }.length
|
147
|
+
end
|
148
|
+
|
149
|
+
# The actual normal row calculations
|
150
|
+
row.cells[column_index] ||= Crosstab::Cell.new
|
151
|
+
row.cells[column_index].base table_records.length
|
152
|
+
row.cells[column_index].frequency table_records.select { |i| row.qualifies? i }.length
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_s
|
159
|
+
calculate
|
160
|
+
|
161
|
+
widths = { :hyphenation_min => 3,
|
162
|
+
:row_header => 29, # Width of the title of each row
|
163
|
+
:row_indent => 2, # Indent 2 spaces if the row is part of a group
|
164
|
+
:column => 7,
|
165
|
+
:divider => 2 }
|
166
|
+
|
167
|
+
letter_lookup = {}
|
168
|
+
(1..26).each { |x| letter_lookup[x] = (x+64).chr }
|
169
|
+
|
170
|
+
format_strings = { :group_headers => " " * widths[:row_header] + " " * widths[:divider] + (banner.columns.collect { |x| x.group }.to_freq_chart.collect { |x| (x[1].nil? ? " " : "|") * (widths[:column] * x[0] + widths[:divider] * (x[0] - 1)) + " " * widths[:divider]}.join),
|
171
|
+
:group_border => " " * widths[:row_header] + " " * widths[:divider] + (banner.columns.collect { |x| x.group }.to_freq_chart.collect { |x| (x[1].nil? ? " " : "-") * (widths[:column] * x[0] + widths[:divider] * (x[0] - 1)) + " " * widths[:divider]}.join),
|
172
|
+
:column_headers => " " * widths[:row_header] + " " * widths[:divider] + ("|" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
173
|
+
:column_border => " " * widths[:row_header] + " " * widths[:divider] + ("-" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
174
|
+
:baseline => "[" * widths[:row_header] + " " * widths[:divider] + ("]" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
175
|
+
:rows => "[" * widths[:row_header] + " " * widths[:divider] + ("]" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
176
|
+
:indented_rows => " " * widths[:row_indent] + "[" * (widths[:row_header] - widths[:row_indent]) + " " * widths[:divider] + ("]" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
177
|
+
:underline_row => "_" * widths[:row_header],
|
178
|
+
:line_break => "",
|
179
|
+
:page_break => "-" * 72 }
|
180
|
+
|
181
|
+
r = Text::Reform.new
|
182
|
+
r.min_break = widths[:hyphenation_min]
|
183
|
+
|
184
|
+
report_stack = []
|
185
|
+
tables.each_with_index do |tbl, i|
|
186
|
+
# Table Header
|
187
|
+
report_stack << "Table #{i + 1}"
|
188
|
+
report_stack << tbl.title.dup if tbl.title
|
189
|
+
|
190
|
+
# Group headers
|
191
|
+
|
192
|
+
if banner.columns.any? { |x| x.group }
|
193
|
+
report_stack << format_strings[:group_headers]
|
194
|
+
|
195
|
+
banner.columns.each do |col|
|
196
|
+
if col.group
|
197
|
+
unless report_stack.last == col.group.title
|
198
|
+
report_stack << col.group.title.dup
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
report_stack << format_strings[:group_border]
|
204
|
+
end
|
205
|
+
|
206
|
+
# Column Headers
|
207
|
+
report_stack << format_strings[:column_headers]
|
208
|
+
banner.columns.each_with_index do |col, col_index|
|
209
|
+
report_stack << [ col.title.dup, "(#{letter_lookup[col_index + 1]})"]
|
210
|
+
end
|
211
|
+
|
212
|
+
report_stack << format_strings[:column_border]
|
213
|
+
|
214
|
+
# Baseline
|
215
|
+
report_stack << format_strings[:baseline]
|
216
|
+
report_stack << "(BASE)"
|
217
|
+
report_stack += tbl.rows[0].cells.collect { |x| x.base }
|
218
|
+
report_stack << format_strings[:line_break]
|
219
|
+
|
220
|
+
# Each row
|
221
|
+
tbl.rows.each do |row|
|
222
|
+
if row.group and not row.group.printed?
|
223
|
+
row.group.printed? true # Set to true so it won't be printed again
|
224
|
+
|
225
|
+
report_stack << format_strings[:rows]
|
226
|
+
report_stack << [row.group.title.dup, "-" * widths[:row_header]]
|
227
|
+
report_stack += row.group.cells.collect { |cell| cell.result }
|
228
|
+
report_stack << format_strings[:line_break]
|
229
|
+
end
|
230
|
+
|
231
|
+
report_stack << format_strings[row.group ? :indented_rows : :rows] # if it's part of a group then indent it.
|
232
|
+
report_stack << row.title.dup
|
233
|
+
report_stack += row.cells.collect { |cell| cell.result }
|
234
|
+
report_stack << format_strings[:line_break]
|
235
|
+
end
|
236
|
+
|
237
|
+
report_stack << format_strings[:page_break]
|
238
|
+
end
|
239
|
+
|
240
|
+
r.format(*report_stack)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Array
|
2
|
+
|
3
|
+
# Converts a flat list to a frequency chart.
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
#
|
7
|
+
# [nil, "a", "a", "a", "b", "b", "b", nil, "c"].to_freq_chart
|
8
|
+
# # => [[1,nil],[3,"a"],[3,"b"],[1,nil],[1,"c"]]
|
9
|
+
#
|
10
|
+
def to_freq_chart
|
11
|
+
# pre_state: [nil, a, a, a, b, b, b, nil, c]
|
12
|
+
bland_array = self.collect { |x| [1,x] }
|
13
|
+
|
14
|
+
# pre_state: [[1,nil],[1,a],[1,a],[1,a],[1,b],[1,b],[1,b],[1,nil],[1,c]]
|
15
|
+
final_array = []
|
16
|
+
bland_array.each do |x|
|
17
|
+
if final_array.length > 0 and final_array.last[1] == x[1]
|
18
|
+
final_array.last[0] += 1
|
19
|
+
else
|
20
|
+
final_array << x
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
final_array
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
class Crosstab::Generic
|
2
|
+
# DSL accessor for the title attribute
|
3
|
+
#
|
4
|
+
# Example:
|
5
|
+
#
|
6
|
+
# title
|
7
|
+
# # => nil
|
8
|
+
#
|
9
|
+
# title "Q.A Gender:"
|
10
|
+
# # => "Q.A Gender:"
|
11
|
+
#
|
12
|
+
# title
|
13
|
+
# # => "Q.A Gender:"
|
14
|
+
#
|
15
|
+
def title(value=nil)
|
16
|
+
if value
|
17
|
+
@title = value
|
18
|
+
else
|
19
|
+
@title ||= nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Redefines the qualifies? test for this object
|
24
|
+
#
|
25
|
+
# Example:
|
26
|
+
#
|
27
|
+
# # By default, qualifies? always returns true.
|
28
|
+
#
|
29
|
+
# qualifies? :a => 1
|
30
|
+
# # => true
|
31
|
+
#
|
32
|
+
# # But if we set it...
|
33
|
+
# qualification :a => 2
|
34
|
+
#
|
35
|
+
# # Then qualifies? returns false unless :a == 1
|
36
|
+
# qualifies? :a => 1
|
37
|
+
# # => false
|
38
|
+
#
|
39
|
+
# qualifies? :a => 2
|
40
|
+
# # => true
|
41
|
+
#
|
42
|
+
def qualification(hash)
|
43
|
+
@key, @value = *hash.to_a.first
|
44
|
+
|
45
|
+
# Performance hack: improves overall benchmark from 1.37 to 0.96 by rendering the key and value inline.
|
46
|
+
# Don't use any weird keys or values. Stick to standard ruby library ones unless you know what you're doing.
|
47
|
+
instance_eval %{
|
48
|
+
def qualifies?(i)
|
49
|
+
i[#{@key.inspect}] == #{@value.inspect}
|
50
|
+
end
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
# Returns true when a record passes the qualification filter.
|
55
|
+
# This will always return true until set by qualification.
|
56
|
+
def qualifies?(record)
|
57
|
+
true
|
58
|
+
end
|
59
|
+
|
60
|
+
# DSL accessor for the printed flag. Returns true if this object hasn't been printed to the screen yet.
|
61
|
+
# It's just a useful flag when building a report (e.g., you can set printed? to true everytime you touch
|
62
|
+
# an object, and then you'll know if you've printed a group already.)
|
63
|
+
#
|
64
|
+
# Example:
|
65
|
+
#
|
66
|
+
# printed?
|
67
|
+
# # => false
|
68
|
+
#
|
69
|
+
# printed? true
|
70
|
+
# # => true
|
71
|
+
#
|
72
|
+
# printed?
|
73
|
+
# # => true
|
74
|
+
#
|
75
|
+
|
76
|
+
def printed?(value=nil)
|
77
|
+
if value
|
78
|
+
@printed = value
|
79
|
+
else
|
80
|
+
@printed ||= false
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class Crosstab::Group < Crosstab::Row
|
2
|
+
|
3
|
+
# attr_reader for the children attribute which should contain an empty array, or a list of rows or columns
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
#
|
7
|
+
# children
|
8
|
+
# #=> []
|
9
|
+
#
|
10
|
+
# children [ Crosstab::Row("Male", :a => 1) ]
|
11
|
+
#
|
12
|
+
# children.first.title
|
13
|
+
# #=> "Male"
|
14
|
+
#
|
15
|
+
def children(value=nil)
|
16
|
+
if value
|
17
|
+
@children = value
|
18
|
+
else
|
19
|
+
@children ||= []
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns true when a record passes a qualification filter belonging to any child of the group. This is how
|
24
|
+
# subtotals work.
|
25
|
+
def qualifies?(record)
|
26
|
+
children.any? { |child| child.qualifies? record }
|
27
|
+
end
|
28
|
+
end
|
data/lib/crosstab/row.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
class Crosstab::Row < Crosstab::Generic
|
2
|
+
def initialize(label=nil, qual=nil)
|
3
|
+
title label if label
|
4
|
+
qualification qual if qual
|
5
|
+
end
|
6
|
+
|
7
|
+
# attr_reader for the cells attribute which should contain an empty array, or -- if calculate was called on
|
8
|
+
# the crosstab -- this will contain an array of cells, one for each column in the banner.
|
9
|
+
#
|
10
|
+
# Example:
|
11
|
+
#
|
12
|
+
# my_crosstab = Crosstab::Crosstab.new do
|
13
|
+
# data_source [{:a => 1}, {:a => 2}]
|
14
|
+
#
|
15
|
+
# table do
|
16
|
+
# title "Q.A Gender:"
|
17
|
+
# row "Male", :a => 1
|
18
|
+
# row "Female", :a => 2
|
19
|
+
# end
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# my_crosstab.calculate
|
23
|
+
#
|
24
|
+
# my_crosstab.tables[0].rows[0].title
|
25
|
+
# # => "Male"
|
26
|
+
#
|
27
|
+
# my_crosstab.tables[0].rows[0].cells[0].frequency
|
28
|
+
# # => 1
|
29
|
+
#
|
30
|
+
# my_crosstab.tables[0].rows[0].cells[0].base
|
31
|
+
# # => 2
|
32
|
+
#
|
33
|
+
# my_crosstab.tables[0].rows[0].cells[0].percentage
|
34
|
+
# # => 0.5
|
35
|
+
#
|
36
|
+
def cells(value=nil)
|
37
|
+
if value
|
38
|
+
@cells = value
|
39
|
+
else
|
40
|
+
@cells ||= []
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# DSL accessor for the group attribute
|
45
|
+
#
|
46
|
+
# Example:
|
47
|
+
#
|
48
|
+
# group
|
49
|
+
# # => nil
|
50
|
+
#
|
51
|
+
# group Crosstab::Group.new("Gender")
|
52
|
+
# # => Crosstab::Group...
|
53
|
+
#
|
54
|
+
# group.title
|
55
|
+
# # => "Gender"
|
56
|
+
#
|
57
|
+
def group(g=nil)
|
58
|
+
if g
|
59
|
+
@group = g
|
60
|
+
g.children << self # Add self to its list of children
|
61
|
+
else
|
62
|
+
@group ||= nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|