crosstab 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +23 -0
- data/README.txt +133 -0
- data/Rakefile +17 -0
- data/lib/crosstab.rb +25 -0
- data/lib/crosstab/banner.rb +90 -0
- data/lib/crosstab/cell.rb +162 -0
- data/lib/crosstab/column.rb +64 -0
- data/lib/crosstab/crosstab.rb +243 -0
- data/lib/crosstab/extensions.rb +26 -0
- data/lib/crosstab/generic.rb +83 -0
- data/lib/crosstab/group.rb +28 -0
- data/lib/crosstab/row.rb +65 -0
- data/lib/crosstab/table.rb +85 -0
- data/test/test_banner.rb +81 -0
- data/test/test_cell.rb +95 -0
- data/test/test_column.rb +60 -0
- data/test/test_crosstab.rb +214 -0
- data/test/test_extensions.rb +12 -0
- data/test/test_group.rb +67 -0
- data/test/test_missing.rb +75 -0
- data/test/test_row.rb +60 -0
- data/test/test_table.rb +67 -0
- metadata +87 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
class Crosstab::Column < Crosstab::Generic
|
2
|
+
def initialize(label=nil, qual=nil)
|
3
|
+
title label if label
|
4
|
+
qualification qual if qual
|
5
|
+
end
|
6
|
+
|
7
|
+
# attr_reader for the records attribute which should contain an empty array, or -- if calculate was called on
|
8
|
+
# the crosstab -- this will contain the array of records that fit this column's qualification.
|
9
|
+
#
|
10
|
+
# Example:
|
11
|
+
#
|
12
|
+
# my_crosstab = Crosstab::Crosstab.new do
|
13
|
+
# data_source [{:a => 1, :b => 1}, {:a => 2, :b => 2}]
|
14
|
+
#
|
15
|
+
# banner do
|
16
|
+
# title "Age"
|
17
|
+
# column "18-34", :b => 1
|
18
|
+
# column "35-54", :b => 2
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# table do
|
22
|
+
# row "Male", :a => 1
|
23
|
+
# row "Female", :a => 2
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# my_crosstab.calculate
|
28
|
+
#
|
29
|
+
# my_crosstab.banner.columns[0].title
|
30
|
+
# # => "18-34"
|
31
|
+
#
|
32
|
+
# my_crosstab.banner.columns[0].records
|
33
|
+
# # => [{:a => 1, :b => 1}]
|
34
|
+
#
|
35
|
+
def records(value=nil)
|
36
|
+
if value
|
37
|
+
@records = value
|
38
|
+
else
|
39
|
+
@records ||= []
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# DSL accessor for the group attribute
|
44
|
+
#
|
45
|
+
# Example:
|
46
|
+
#
|
47
|
+
# group
|
48
|
+
# # => nil
|
49
|
+
#
|
50
|
+
# group Crosstab::Group.new("Gender")
|
51
|
+
# # => Crosstab::Group...
|
52
|
+
#
|
53
|
+
# group.title
|
54
|
+
# # => "Gender"
|
55
|
+
#
|
56
|
+
def group(g=nil)
|
57
|
+
if g
|
58
|
+
@group = g
|
59
|
+
g.children << self # Add self to its list of children
|
60
|
+
else
|
61
|
+
@group ||= nil
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,243 @@
|
|
1
|
+
class Crosstab::Crosstab < Crosstab::Generic
|
2
|
+
# Pass in a block and we'll execute it within the context of this class.
|
3
|
+
#
|
4
|
+
# Example:
|
5
|
+
#
|
6
|
+
# my_crosstab = Crosstab.new do
|
7
|
+
# banner do
|
8
|
+
# column "Total"
|
9
|
+
# column "18-34", :b => 1
|
10
|
+
# column "35-54", :b => 2
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# table do
|
14
|
+
# row "Male", :a => 1
|
15
|
+
# row "Female", :a => 2
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
def initialize(&block)
|
20
|
+
instance_eval(&block) if block
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
# DSL accessor for the data_source attribute which normally contains an empty array or an array of hashes. To install
|
26
|
+
# your own data, just pass in an array of hashes as the argument.
|
27
|
+
#
|
28
|
+
# Example:
|
29
|
+
#
|
30
|
+
# my_crosstab = Crosstab::Crosstab.new
|
31
|
+
#
|
32
|
+
# my_crosstab.data_source
|
33
|
+
# # => []
|
34
|
+
#
|
35
|
+
# my_crosstab.data_source [{:a => 1, :b => 2},
|
36
|
+
# {:a => 2, :b => 2}]
|
37
|
+
#
|
38
|
+
# my_crosstab.data_source
|
39
|
+
# # => [{:a => 1, :b => 2},{:a => 2, :b => 2}]
|
40
|
+
#
|
41
|
+
def data_source(value=nil)
|
42
|
+
if value
|
43
|
+
@data_source = value
|
44
|
+
else
|
45
|
+
@data_source ||= []
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Creates a new Crosstab::Banner
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
#
|
53
|
+
# # First let's look at the default banner with its total column:
|
54
|
+
#
|
55
|
+
# banner
|
56
|
+
# #=> Crosstab::Banner
|
57
|
+
#
|
58
|
+
# # banner.columns.first.title
|
59
|
+
# #=> "Total"
|
60
|
+
#
|
61
|
+
# # Now let's create a new banner.
|
62
|
+
# banner do
|
63
|
+
# column "Male", :a => 1
|
64
|
+
# column "Female", :a => 2
|
65
|
+
# end
|
66
|
+
#
|
67
|
+
# banner.columns.first.title
|
68
|
+
# #=> "Male"
|
69
|
+
# banner.columns.last.title
|
70
|
+
# #=> "Female"
|
71
|
+
#
|
72
|
+
def banner(&block)
|
73
|
+
if block
|
74
|
+
@banner = Crosstab::Banner.new(&block)
|
75
|
+
else
|
76
|
+
@banner ||= Crosstab::Banner.new
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Returns the array of tables for this Crosstab.
|
81
|
+
def tables
|
82
|
+
@tables ||= []
|
83
|
+
end
|
84
|
+
|
85
|
+
# Creates a new Crosstab::Table and appends it to tables
|
86
|
+
#
|
87
|
+
# Example:
|
88
|
+
#
|
89
|
+
# my_crosstab = Crosstab.new
|
90
|
+
# my_crosstab.tables
|
91
|
+
# # => []
|
92
|
+
#
|
93
|
+
# my_crosstab.table do
|
94
|
+
# title "Q.B Age"
|
95
|
+
# row "18-34", :b => 1
|
96
|
+
# row "35-54", :b => 2
|
97
|
+
# end
|
98
|
+
#
|
99
|
+
# my_crosstab.tables
|
100
|
+
# # => [Crosstab::Table]
|
101
|
+
#
|
102
|
+
# my_crosstab.tables.first.rows.first.title
|
103
|
+
# #=> "18-34"
|
104
|
+
#
|
105
|
+
# my_crosstab.tables.first.rows.last.title
|
106
|
+
# #=> "35-54"
|
107
|
+
#
|
108
|
+
def table(&block)
|
109
|
+
tables << Crosstab::Table.new(&block)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Runs the calculations.
|
113
|
+
#
|
114
|
+
# Warning: This is a CPU-heavy method. If you're working with a large record size, processing time can explode out of control.
|
115
|
+
# You can expect this routine to process 1,000,000+ transactions a second. What's a transaction? It's just about 1 record x
|
116
|
+
# 1 row x 1 column.
|
117
|
+
#
|
118
|
+
# Some examples: 1 second => N=1,000 * 100 rows * 10 columns
|
119
|
+
# 1 second => N=100,000 * 10 rows * 1 column
|
120
|
+
# 16.6 minutes => N=1,000,000 * 100 rows * 10 columns
|
121
|
+
#
|
122
|
+
def calculate
|
123
|
+
# pre-calculate which interviews belong in the banner run
|
124
|
+
working_records = data_source.select do |i|
|
125
|
+
self.qualifies? i and banner.qualifies? i
|
126
|
+
end
|
127
|
+
|
128
|
+
# pre-calculate which interviews belong in each column
|
129
|
+
banner.columns.each do |column|
|
130
|
+
column.records(working_records.select { |i| column.qualifies? i })
|
131
|
+
end
|
132
|
+
|
133
|
+
tables.each do |table|
|
134
|
+
banner.columns.each_with_index do |column, column_index|
|
135
|
+
# pre-calculate which interviews belong in this table
|
136
|
+
table_records = column.records.select do |i|
|
137
|
+
table.qualifies? i
|
138
|
+
end
|
139
|
+
|
140
|
+
# do the actual stub calculations
|
141
|
+
table.rows.each do |row|
|
142
|
+
# if this row is part of a group, and the group hasn't already been calculated for this cell...
|
143
|
+
if row.group and row.group.cells[column_index].nil?
|
144
|
+
row.group.cells[column_index] = Crosstab::Cell.new
|
145
|
+
row.group.cells[column_index].base table_records.length
|
146
|
+
row.group.cells[column_index].frequency table_records.select { |i| row.group.qualifies? i }.length
|
147
|
+
end
|
148
|
+
|
149
|
+
# The actual normal row calculations
|
150
|
+
row.cells[column_index] ||= Crosstab::Cell.new
|
151
|
+
row.cells[column_index].base table_records.length
|
152
|
+
row.cells[column_index].frequency table_records.select { |i| row.qualifies? i }.length
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_s
|
159
|
+
calculate
|
160
|
+
|
161
|
+
widths = { :hyphenation_min => 3,
|
162
|
+
:row_header => 29, # Width of the title of each row
|
163
|
+
:row_indent => 2, # Indent 2 spaces if the row is part of a group
|
164
|
+
:column => 7,
|
165
|
+
:divider => 2 }
|
166
|
+
|
167
|
+
letter_lookup = {}
|
168
|
+
(1..26).each { |x| letter_lookup[x] = (x+64).chr }
|
169
|
+
|
170
|
+
format_strings = { :group_headers => " " * widths[:row_header] + " " * widths[:divider] + (banner.columns.collect { |x| x.group }.to_freq_chart.collect { |x| (x[1].nil? ? " " : "|") * (widths[:column] * x[0] + widths[:divider] * (x[0] - 1)) + " " * widths[:divider]}.join),
|
171
|
+
:group_border => " " * widths[:row_header] + " " * widths[:divider] + (banner.columns.collect { |x| x.group }.to_freq_chart.collect { |x| (x[1].nil? ? " " : "-") * (widths[:column] * x[0] + widths[:divider] * (x[0] - 1)) + " " * widths[:divider]}.join),
|
172
|
+
:column_headers => " " * widths[:row_header] + " " * widths[:divider] + ("|" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
173
|
+
:column_border => " " * widths[:row_header] + " " * widths[:divider] + ("-" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
174
|
+
:baseline => "[" * widths[:row_header] + " " * widths[:divider] + ("]" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
175
|
+
:rows => "[" * widths[:row_header] + " " * widths[:divider] + ("]" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
176
|
+
:indented_rows => " " * widths[:row_indent] + "[" * (widths[:row_header] - widths[:row_indent]) + " " * widths[:divider] + ("]" * widths[:column] + " " * widths[:divider]) * banner.columns.length,
|
177
|
+
:underline_row => "_" * widths[:row_header],
|
178
|
+
:line_break => "",
|
179
|
+
:page_break => "-" * 72 }
|
180
|
+
|
181
|
+
r = Text::Reform.new
|
182
|
+
r.min_break = widths[:hyphenation_min]
|
183
|
+
|
184
|
+
report_stack = []
|
185
|
+
tables.each_with_index do |tbl, i|
|
186
|
+
# Table Header
|
187
|
+
report_stack << "Table #{i + 1}"
|
188
|
+
report_stack << tbl.title.dup if tbl.title
|
189
|
+
|
190
|
+
# Group headers
|
191
|
+
|
192
|
+
if banner.columns.any? { |x| x.group }
|
193
|
+
report_stack << format_strings[:group_headers]
|
194
|
+
|
195
|
+
banner.columns.each do |col|
|
196
|
+
if col.group
|
197
|
+
unless report_stack.last == col.group.title
|
198
|
+
report_stack << col.group.title.dup
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
report_stack << format_strings[:group_border]
|
204
|
+
end
|
205
|
+
|
206
|
+
# Column Headers
|
207
|
+
report_stack << format_strings[:column_headers]
|
208
|
+
banner.columns.each_with_index do |col, col_index|
|
209
|
+
report_stack << [ col.title.dup, "(#{letter_lookup[col_index + 1]})"]
|
210
|
+
end
|
211
|
+
|
212
|
+
report_stack << format_strings[:column_border]
|
213
|
+
|
214
|
+
# Baseline
|
215
|
+
report_stack << format_strings[:baseline]
|
216
|
+
report_stack << "(BASE)"
|
217
|
+
report_stack += tbl.rows[0].cells.collect { |x| x.base }
|
218
|
+
report_stack << format_strings[:line_break]
|
219
|
+
|
220
|
+
# Each row
|
221
|
+
tbl.rows.each do |row|
|
222
|
+
if row.group and not row.group.printed?
|
223
|
+
row.group.printed? true # Set to true so it won't be printed again
|
224
|
+
|
225
|
+
report_stack << format_strings[:rows]
|
226
|
+
report_stack << [row.group.title.dup, "-" * widths[:row_header]]
|
227
|
+
report_stack += row.group.cells.collect { |cell| cell.result }
|
228
|
+
report_stack << format_strings[:line_break]
|
229
|
+
end
|
230
|
+
|
231
|
+
report_stack << format_strings[row.group ? :indented_rows : :rows] # if it's part of a group then indent it.
|
232
|
+
report_stack << row.title.dup
|
233
|
+
report_stack += row.cells.collect { |cell| cell.result }
|
234
|
+
report_stack << format_strings[:line_break]
|
235
|
+
end
|
236
|
+
|
237
|
+
report_stack << format_strings[:page_break]
|
238
|
+
end
|
239
|
+
|
240
|
+
r.format(*report_stack)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Array
|
2
|
+
|
3
|
+
# Converts a flat list to a frequency chart.
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
#
|
7
|
+
# [nil, "a", "a", "a", "b", "b", "b", nil, "c"].to_freq_chart
|
8
|
+
# # => [[1,nil],[3,"a"],[3,"b"],[1,nil],[1,"c"]]
|
9
|
+
#
|
10
|
+
def to_freq_chart
|
11
|
+
# pre_state: [nil, a, a, a, b, b, b, nil, c]
|
12
|
+
bland_array = self.collect { |x| [1,x] }
|
13
|
+
|
14
|
+
# pre_state: [[1,nil],[1,a],[1,a],[1,a],[1,b],[1,b],[1,b],[1,nil],[1,c]]
|
15
|
+
final_array = []
|
16
|
+
bland_array.each do |x|
|
17
|
+
if final_array.length > 0 and final_array.last[1] == x[1]
|
18
|
+
final_array.last[0] += 1
|
19
|
+
else
|
20
|
+
final_array << x
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
final_array
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
class Crosstab::Generic
|
2
|
+
# DSL accessor for the title attribute
|
3
|
+
#
|
4
|
+
# Example:
|
5
|
+
#
|
6
|
+
# title
|
7
|
+
# # => nil
|
8
|
+
#
|
9
|
+
# title "Q.A Gender:"
|
10
|
+
# # => "Q.A Gender:"
|
11
|
+
#
|
12
|
+
# title
|
13
|
+
# # => "Q.A Gender:"
|
14
|
+
#
|
15
|
+
def title(value=nil)
|
16
|
+
if value
|
17
|
+
@title = value
|
18
|
+
else
|
19
|
+
@title ||= nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Redefines the qualifies? test for this object
|
24
|
+
#
|
25
|
+
# Example:
|
26
|
+
#
|
27
|
+
# # By default, qualifies? always returns true.
|
28
|
+
#
|
29
|
+
# qualifies? :a => 1
|
30
|
+
# # => true
|
31
|
+
#
|
32
|
+
# # But if we set it...
|
33
|
+
# qualification :a => 2
|
34
|
+
#
|
35
|
+
# # Then qualifies? returns false unless :a == 1
|
36
|
+
# qualifies? :a => 1
|
37
|
+
# # => false
|
38
|
+
#
|
39
|
+
# qualifies? :a => 2
|
40
|
+
# # => true
|
41
|
+
#
|
42
|
+
def qualification(hash)
|
43
|
+
@key, @value = *hash.to_a.first
|
44
|
+
|
45
|
+
# Performance hack: improves overall benchmark from 1.37 to 0.96 by rendering the key and value inline.
|
46
|
+
# Don't use any weird keys or values. Stick to standard ruby library ones unless you know what you're doing.
|
47
|
+
instance_eval %{
|
48
|
+
def qualifies?(i)
|
49
|
+
i[#{@key.inspect}] == #{@value.inspect}
|
50
|
+
end
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
# Returns true when a record passes the qualification filter.
|
55
|
+
# This will always return true until set by qualification.
|
56
|
+
def qualifies?(record)
|
57
|
+
true
|
58
|
+
end
|
59
|
+
|
60
|
+
# DSL accessor for the printed flag. Returns true if this object hasn't been printed to the screen yet.
|
61
|
+
# It's just a useful flag when building a report (e.g., you can set printed? to true everytime you touch
|
62
|
+
# an object, and then you'll know if you've printed a group already.)
|
63
|
+
#
|
64
|
+
# Example:
|
65
|
+
#
|
66
|
+
# printed?
|
67
|
+
# # => false
|
68
|
+
#
|
69
|
+
# printed? true
|
70
|
+
# # => true
|
71
|
+
#
|
72
|
+
# printed?
|
73
|
+
# # => true
|
74
|
+
#
|
75
|
+
|
76
|
+
def printed?(value=nil)
|
77
|
+
if value
|
78
|
+
@printed = value
|
79
|
+
else
|
80
|
+
@printed ||= false
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class Crosstab::Group < Crosstab::Row
|
2
|
+
|
3
|
+
# attr_reader for the children attribute which should contain an empty array, or a list of rows or columns
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
#
|
7
|
+
# children
|
8
|
+
# #=> []
|
9
|
+
#
|
10
|
+
# children [ Crosstab::Row("Male", :a => 1) ]
|
11
|
+
#
|
12
|
+
# children.first.title
|
13
|
+
# #=> "Male"
|
14
|
+
#
|
15
|
+
def children(value=nil)
|
16
|
+
if value
|
17
|
+
@children = value
|
18
|
+
else
|
19
|
+
@children ||= []
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns true when a record passes a qualification filter belonging to any child of the group. This is how
|
24
|
+
# subtotals work.
|
25
|
+
def qualifies?(record)
|
26
|
+
children.any? { |child| child.qualifies? record }
|
27
|
+
end
|
28
|
+
end
|
data/lib/crosstab/row.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
class Crosstab::Row < Crosstab::Generic
|
2
|
+
def initialize(label=nil, qual=nil)
|
3
|
+
title label if label
|
4
|
+
qualification qual if qual
|
5
|
+
end
|
6
|
+
|
7
|
+
# attr_reader for the cells attribute which should contain an empty array, or -- if calculate was called on
|
8
|
+
# the crosstab -- this will contain an array of cells, one for each column in the banner.
|
9
|
+
#
|
10
|
+
# Example:
|
11
|
+
#
|
12
|
+
# my_crosstab = Crosstab::Crosstab.new do
|
13
|
+
# data_source [{:a => 1}, {:a => 2}]
|
14
|
+
#
|
15
|
+
# table do
|
16
|
+
# title "Q.A Gender:"
|
17
|
+
# row "Male", :a => 1
|
18
|
+
# row "Female", :a => 2
|
19
|
+
# end
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# my_crosstab.calculate
|
23
|
+
#
|
24
|
+
# my_crosstab.tables[0].rows[0].title
|
25
|
+
# # => "Male"
|
26
|
+
#
|
27
|
+
# my_crosstab.tables[0].rows[0].cells[0].frequency
|
28
|
+
# # => 1
|
29
|
+
#
|
30
|
+
# my_crosstab.tables[0].rows[0].cells[0].base
|
31
|
+
# # => 2
|
32
|
+
#
|
33
|
+
# my_crosstab.tables[0].rows[0].cells[0].percentage
|
34
|
+
# # => 0.5
|
35
|
+
#
|
36
|
+
def cells(value=nil)
|
37
|
+
if value
|
38
|
+
@cells = value
|
39
|
+
else
|
40
|
+
@cells ||= []
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# DSL accessor for the group attribute
|
45
|
+
#
|
46
|
+
# Example:
|
47
|
+
#
|
48
|
+
# group
|
49
|
+
# # => nil
|
50
|
+
#
|
51
|
+
# group Crosstab::Group.new("Gender")
|
52
|
+
# # => Crosstab::Group...
|
53
|
+
#
|
54
|
+
# group.title
|
55
|
+
# # => "Gender"
|
56
|
+
#
|
57
|
+
def group(g=nil)
|
58
|
+
if g
|
59
|
+
@group = g
|
60
|
+
g.children << self # Add self to its list of children
|
61
|
+
else
|
62
|
+
@group ||= nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|