tablestakes 0.8.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +39 -13
- data/doc/OrderedRow.html +272 -0
- data/doc/Table.html +651 -91
- data/doc/created.rid +2 -2
- data/doc/index.html +2 -0
- data/doc/js/search_index.js +1 -1
- data/doc/table_of_contents.html +27 -0
- data/lib/tablestakes.rb +273 -115
- data/spec/spec_helper.rb +9 -0
- data/spec/table_spec.rb +71 -17
- metadata +47 -4
data/doc/created.rid
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
Wed,
|
2
|
-
lib/tablestakes.rb
|
1
|
+
Wed, 06 Aug 2014 07:36:36 -0400
|
2
|
+
lib/tablestakes.rb Fri, 25 Jul 2014 07:05:42 -0400
|
data/doc/index.html
CHANGED
data/doc/js/search_index.js
CHANGED
@@ -1 +1 @@
|
|
1
|
-
var search_data = {"index":{"searchIndex":["table","bottom()","column()","count()","get_columns()","get_rows()","intersect()","join()","length()","new()","row()","select()","size()","sub()","sub!()","tally()","to_a()","to_s()","top()","union()","where()","write_file()"],"longSearchIndex":["table","table#bottom()","table#column()","table#count()","table#get_columns()","table#get_rows()","table#intersect()","table#join()","table#length()","table::new()","table#row()","table#select()","table#size()","table#sub()","table#sub!()","table#tally()","table#to_a()","table#to_s()","table#top()","table#union()","table#where()","table#write_file()"],"info":[["Table","","Table.html","","<p>This class is a Ruby representation of a table. All data is captured as\ntype <code>String</code> by default. Columns …\n"],["bottom","Table","Table.html#method-i-bottom","(colname, num=1)","<p>
|
1
|
+
var search_data = {"index":{"searchIndex":["orderedrow","table","<=>()","add_column()","add_row()","add_rows()","bottom()","column()","count()","data()","del_column()","del_row()","each()","empty?()","get_columns()","get_rows()","intersect()","join()","length()","new()","new()","row()","select()","size()","sort()","sort!()","sub()","sub!()","tally()","to_a()","to_s()","top()","union()","where()","write_file()"],"longSearchIndex":["orderedrow","table","orderedrow#<=>()","table#add_column()","table#add_row()","table#add_rows()","table#bottom()","table#column()","table#count()","orderedrow#data()","table#del_column()","table#del_row()","table#each()","table#empty?()","table#get_columns()","table#get_rows()","table#intersect()","table#join()","table#length()","orderedrow::new()","table::new()","table#row()","table#select()","table#size()","table#sort()","table#sort!()","table#sub()","table#sub!()","table#tally()","table#to_a()","table#to_s()","table#top()","table#union()","table#where()","table#write_file()"],"info":[["OrderedRow","","OrderedRow.html","","<p>This class functions as a temporary representation of a row. The OrderedRow\ncontains information about …\n"],["Table","","Table.html","","<p>This class is a Ruby representation of a table. All data is captured as\ntype <code>String</code> by default. Columns …\n"],["<=>","OrderedRow","OrderedRow.html#method-i-3C-3D-3E","(other)","<p>Implements comparable\n<p>Attributes\n<p>other — The row to be compared\n"],["add_column","Table","Table.html#method-i-add_column","(*args)","<p>Add a column to the Table. Raises ArgumentError if the column name is\nalready taken or there are not …\n"],["add_row","Table","Table.html#method-i-add_row","(*row)","<p>Add a row to the Table, appending it to the end. Raises ArgumentError if \nthere are not the correct number …\n"],["add_rows","Table","Table.html#method-i-add_rows","(array_of_rows)","<p>Add one or more rows to the Table, appending it to the end. Raises\nArgumentError if there are not the …\n"],["bottom","Table","Table.html#method-i-bottom","(colname, num=1)","<p>Returns counts of the least frequent values found in a given column in the\nform of a Table. Raises …\n"],["column","Table","Table.html#method-i-column","(colname)","<p>Return a copy of a column from the table, identified by column name.\nReturns empty Array if column name …\n"],["count","Table","Table.html#method-i-count","(colname=nil, value=nil)","<p>Counts the number of instances of a particular string, given a column name,\nand returns an integer >= …\n"],["data","OrderedRow","OrderedRow.html#method-i-data","()","<p>Returns the row elements in an <code>Array</code>\n<p>Attributes\n<p>none\n"],["del_column","Table","Table.html#method-i-del_column","(colname)","<p>Delete a column from the Table. Raises ArgumentError if the column name\ndoes not exist.\n<p>Attributes\n<p>colname … — "],["del_row","Table","Table.html#method-i-del_row","(rownum)","<p>Delete a row from the Table. Raises ArgumentError if the row number is not\nfound\n<p>Attributes\n<p>rownum — <code>FixNum</code> …\n"],["each","Table","Table.html#method-i-each","()","<p>Defines an iterator for <code>Table</code> which produces rows of data\n(headers omitted) for its calling block.\n"],["empty?","Table","Table.html#method-i-empty-3F","()","<p>Return true if the Table is empty, false otherwise.\n"],["get_columns","Table","Table.html#method-i-get_columns","(*columns)",""],["get_rows","Table","Table.html#method-i-get_rows","(colname, condition=nil)",""],["intersect","Table","Table.html#method-i-intersect","(table2, colname, col2name=colname)","<p>Return an Array with the intersection of columns from different tables,\neliminating duplicates. Return …\n"],["join","Table","Table.html#method-i-join","(table2, colname, col2name=colname)","<p>Given a second table to join against, and a field/column, return a\n<code>Table</code> which contains a join of the …\n"],["length","Table","Table.html#method-i-length","(colname=nil, value=nil)",""],["new","OrderedRow","OrderedRow.html#method-c-new","(my_array, index)","<p>Creates a new OrderedRow. Callers must specify the index of the row element\nwhich will be used for order …\n"],["new","Table","Table.html#method-c-new","(input=nil)","<p>Instantiate a <code>Table</code> object using a tab-delimited file\n<p>Attributes\n<p>input — OPTIONAL <code>Array</code> of rows or <code>String</code> …\n"],["row","Table","Table.html#method-i-row","(index)","<p>Return a copy of a row from the table as an <code>Array</code>, given an\nindex (i.e. row number). Returns empty Array …\n"],["select","Table","Table.html#method-i-select","(*columns)","<p>Select columns from the table, given one or more column names. Returns an\ninstance of <code>Table</code> with the …\n"],["size","Table","Table.html#method-i-size","(colname=nil, value=nil)",""],["sort","Table","Table.html#method-i-sort","(column=nil, &block)","<p>Sort the table based on given column. Uses precedence as defined in the \ncolumn. By default will sort …\n"],["sort!","Table","Table.html#method-i-sort-21","(column=nil, &block)",""],["sub","Table","Table.html#method-i-sub","(colname, re, replace)","<p>Given a field/column, and a regular expression to match against, and a\nreplacement string, update the …\n"],["sub!","Table","Table.html#method-i-sub-21","(colname, re, replace)",""],["tally","Table","Table.html#method-i-tally","(colname)","<p>Count instances in a particular field/column and return a\n<code>Table</code> of the results. Raises ArgumentError …\n"],["to_a","Table","Table.html#method-i-to_a","()","<p>Converts a <code>Table</code> object to an array of arrays (each row). The\nfirst entry are the table headers.\n<p>Attributes …\n"],["to_s","Table","Table.html#method-i-to_s","()","<p>Converts a <code>Table</code> object to a tab-delimited string.\n<p>Attributes\n<p>none\n"],["top","Table","Table.html#method-i-top","(colname, num=1)","<p>Returns counts of the most frequent values found in a given column in the\nform of a Table. Raises ArgumentError …\n"],["union","Table","Table.html#method-i-union","(table2, colname, col2name=colname)","<p>Return Array with the union of elements columns in the given tables,\neliminating duplicates. Raises an …\n"],["where","Table","Table.html#method-i-where","(colname, condition=nil)","<p>Given a particular condition for a given column field/column, return a\nsubtable that matches the condition. …\n"],["write_file","Table","Table.html#method-i-write_file","(filename)","<p>Write a representation of the <code>Table</code> object to a file (tab\ndelimited).\n<p>Attributes\n<p>filename — <code>String</code> to identify …\n"]]}}
|
data/doc/table_of_contents.html
CHANGED
@@ -27,6 +27,9 @@
|
|
27
27
|
<h2 id="classes">Classes/Modules</h2>
|
28
28
|
<ul>
|
29
29
|
<li class="class">
|
30
|
+
<a href="OrderedRow.html">OrderedRow</a>
|
31
|
+
</li>
|
32
|
+
<li class="class">
|
30
33
|
<a href="Table.html">Table</a>
|
31
34
|
</li>
|
32
35
|
|
@@ -37,12 +40,32 @@
|
|
37
40
|
|
38
41
|
<li class="method"><a href="Table.html#method-c-new">::new — Table</a>
|
39
42
|
|
43
|
+
<li class="method"><a href="OrderedRow.html#method-c-new">::new — OrderedRow</a>
|
44
|
+
|
45
|
+
<li class="method"><a href="OrderedRow.html#method-i-3C-3D-3E">#<=> — OrderedRow</a>
|
46
|
+
|
47
|
+
<li class="method"><a href="Table.html#method-i-add_column">#add_column — Table</a>
|
48
|
+
|
49
|
+
<li class="method"><a href="Table.html#method-i-add_row">#add_row — Table</a>
|
50
|
+
|
51
|
+
<li class="method"><a href="Table.html#method-i-add_rows">#add_rows — Table</a>
|
52
|
+
|
40
53
|
<li class="method"><a href="Table.html#method-i-bottom">#bottom — Table</a>
|
41
54
|
|
42
55
|
<li class="method"><a href="Table.html#method-i-column">#column — Table</a>
|
43
56
|
|
44
57
|
<li class="method"><a href="Table.html#method-i-count">#count — Table</a>
|
45
58
|
|
59
|
+
<li class="method"><a href="OrderedRow.html#method-i-data">#data — OrderedRow</a>
|
60
|
+
|
61
|
+
<li class="method"><a href="Table.html#method-i-del_column">#del_column — Table</a>
|
62
|
+
|
63
|
+
<li class="method"><a href="Table.html#method-i-del_row">#del_row — Table</a>
|
64
|
+
|
65
|
+
<li class="method"><a href="Table.html#method-i-each">#each — Table</a>
|
66
|
+
|
67
|
+
<li class="method"><a href="Table.html#method-i-empty-3F">#empty? — Table</a>
|
68
|
+
|
46
69
|
<li class="method"><a href="Table.html#method-i-get_columns">#get_columns — Table</a>
|
47
70
|
|
48
71
|
<li class="method"><a href="Table.html#method-i-get_rows">#get_rows — Table</a>
|
@@ -59,6 +82,10 @@
|
|
59
82
|
|
60
83
|
<li class="method"><a href="Table.html#method-i-size">#size — Table</a>
|
61
84
|
|
85
|
+
<li class="method"><a href="Table.html#method-i-sort">#sort — Table</a>
|
86
|
+
|
87
|
+
<li class="method"><a href="Table.html#method-i-sort-21">#sort! — Table</a>
|
88
|
+
|
62
89
|
<li class="method"><a href="Table.html#method-i-sub">#sub — Table</a>
|
63
90
|
|
64
91
|
<li class="method"><a href="Table.html#method-i-sub-21">#sub! — Table</a>
|
data/lib/tablestakes.rb
CHANGED
@@ -16,8 +16,11 @@
|
|
16
16
|
# serving as the header names.
|
17
17
|
|
18
18
|
class Table
|
19
|
+
include Enumerable
|
20
|
+
|
19
21
|
# The headers attribute contains the table headers used to reference
|
20
22
|
# columns in the +Table+. All headers are represented as +String+ types.
|
23
|
+
#
|
21
24
|
attr_reader :headers
|
22
25
|
@headers =[]
|
23
26
|
@table = {}
|
@@ -28,7 +31,15 @@ class Table
|
|
28
31
|
|
29
32
|
# Instantiate a +Table+ object using a tab-delimited file
|
30
33
|
#
|
34
|
+
# ==== Attributes
|
31
35
|
# +input+:: OPTIONAL +Array+ of rows or +String+ to identify the name of the tab-delimited file to read
|
36
|
+
#
|
37
|
+
# ==== Examples
|
38
|
+
# cities = Table.new() # empty table
|
39
|
+
# cities = Table.new([ ["City", "State], ["New York", "NY"], ["Dallas", "TX"] ]) # create from Array of rows
|
40
|
+
# cities = Table.new("cities.txt") # read from file
|
41
|
+
# cities = Table.new(capitals) # create from table
|
42
|
+
#
|
32
43
|
def initialize(input=nil)
|
33
44
|
@headers = []
|
34
45
|
@table = {}
|
@@ -36,32 +47,49 @@ class Table
|
|
36
47
|
|
37
48
|
if input.respond_to?(:fetch)
|
38
49
|
if input[0].respond_to?(:fetch)
|
39
|
-
#create
|
50
|
+
#create Table from rows
|
40
51
|
add_rows(input)
|
41
52
|
end
|
42
53
|
elsif input.respond_to?(:upcase)
|
43
54
|
# a string, then read_file
|
44
55
|
read_file(input)
|
45
56
|
elsif input.respond_to?(:headers)
|
46
|
-
|
57
|
+
input.each {|row| add_row(row) }
|
47
58
|
end
|
48
59
|
# else create empty +Table+
|
49
60
|
end
|
61
|
+
|
62
|
+
# Defines an iterator for +Table+ which produces rows of data (headers omitted)
|
63
|
+
# for its calling block.
|
64
|
+
#
|
65
|
+
def each
|
66
|
+
@table[@headers.first].each_index do |index|
|
67
|
+
nextrow = []
|
68
|
+
@headers.each do |col|
|
69
|
+
nextrow << @table[col][index].clone
|
70
|
+
end
|
71
|
+
yield nextrow
|
72
|
+
end
|
73
|
+
end
|
50
74
|
|
51
75
|
# Return a copy of a column from the table, identified by column name.
|
52
|
-
# Returns
|
76
|
+
# Returns empty Array if column name not found.
|
53
77
|
#
|
78
|
+
# ==== Attributes
|
54
79
|
# +colname+:: +String+ to identify the name of the column
|
55
80
|
def column(colname)
|
56
|
-
#
|
57
|
-
|
58
|
-
|
59
|
-
|
81
|
+
# return empty Array if column name not found
|
82
|
+
unless @table.has_key?(colname)
|
83
|
+
Array.new()
|
84
|
+
else
|
85
|
+
Array(@table[colname])
|
86
|
+
end
|
60
87
|
end
|
61
88
|
|
62
89
|
# Return a copy of a row from the table as an +Array+, given an index
|
63
90
|
# (i.e. row number). Returns empty Array if the index is out of bounds.
|
64
91
|
#
|
92
|
+
# ==== Attributes
|
65
93
|
# +index+:: +FixNum+ indicating index of the row.
|
66
94
|
def row(index)
|
67
95
|
Array(get_row(index))
|
@@ -76,15 +104,16 @@ class Table
|
|
76
104
|
# Add a column to the Table. Raises ArgumentError if the column name is already taken
|
77
105
|
# or there are not the correct number of values.
|
78
106
|
#
|
79
|
-
#
|
80
|
-
# +
|
81
|
-
#
|
82
|
-
#
|
83
|
-
# add_column(
|
84
|
-
# add_column("
|
107
|
+
# ==== Attributes
|
108
|
+
# +args+:: Array of +String+ to identify the name of the column (see examples)
|
109
|
+
#
|
110
|
+
# ==== Examples
|
111
|
+
# cities.add_column("City", ["New York", "Dallas", "San Franscisco"])
|
112
|
+
# cities.add_column(["City","New York", "Dallas", "San Franscisco"])
|
113
|
+
# cities.add_column("City", "New York", "Dallas", "San Franscisco")
|
85
114
|
def add_column(*args)
|
86
115
|
if args.kind_of? Array
|
87
|
-
args
|
116
|
+
args.flatten!
|
88
117
|
colname = args.shift
|
89
118
|
column_vals = args
|
90
119
|
else
|
@@ -104,9 +133,11 @@ class Table
|
|
104
133
|
# there are not the correct number of values. The first row becomes the table headers
|
105
134
|
# if currently undefined.
|
106
135
|
#
|
136
|
+
# ==== Attributes
|
107
137
|
# +array_of_rows+:: +Array+ of +Arrays+ to hold the rows values
|
108
|
-
#
|
109
|
-
#
|
138
|
+
#
|
139
|
+
# ==== Examples
|
140
|
+
# cities.add_rows([ ["New York", "NY"], ["Austin", "TX"] ])
|
110
141
|
def add_rows(array_of_rows)
|
111
142
|
array_of_rows.each do |r|
|
112
143
|
add_row(r.clone)
|
@@ -117,10 +148,13 @@ class Table
|
|
117
148
|
# Add a row to the Table, appending it to the end. Raises ArgumentError if
|
118
149
|
# there are not the correct number of values.
|
119
150
|
#
|
151
|
+
# ==== Attributes
|
120
152
|
# +row+:: +Array+ to hold the row values
|
121
|
-
#
|
122
|
-
#
|
123
|
-
# add_row(
|
153
|
+
#
|
154
|
+
# ==== Examples
|
155
|
+
# cities = Table.new.add_row( ["City", "State"] ) # create new Table with headers
|
156
|
+
# cities.add_row("New York", "NY") # add data row to Table
|
157
|
+
#
|
124
158
|
def add_row(*row)
|
125
159
|
if row.kind_of? Array
|
126
160
|
row = row.flatten
|
@@ -140,7 +174,11 @@ class Table
|
|
140
174
|
|
141
175
|
# Delete a column from the Table. Raises ArgumentError if the column name does not exist.
|
142
176
|
#
|
177
|
+
# ==== Attributes
|
143
178
|
# +colname+:: +String+ to identify the name of the column
|
179
|
+
#
|
180
|
+
# ==== Examples
|
181
|
+
# cities.del_column("State") # returns table without "State" column
|
144
182
|
def del_column(colname)
|
145
183
|
# check arguments
|
146
184
|
raise ArgumentError, "Column name does not exist!" unless @table.has_key?(colname)
|
@@ -151,13 +189,19 @@ class Table
|
|
151
189
|
end
|
152
190
|
|
153
191
|
# Delete a row from the Table. Raises ArgumentError if
|
154
|
-
# the row number is not found
|
192
|
+
# the row number is not found
|
155
193
|
#
|
194
|
+
# ==== Attributes
|
156
195
|
# +rownum+:: +FixNum+ to hold the row number
|
196
|
+
#
|
197
|
+
# ==== Examples
|
198
|
+
# cities.del_row(3) # deletes row with index 3 (4th row)
|
199
|
+
# cities.del_row(-1) # deletes last row (per Ruby convention)
|
157
200
|
def del_row(rownum)
|
158
201
|
# check arguments
|
159
|
-
|
160
|
-
|
202
|
+
if self.empty? || rownum >= @table[@headers.first].length
|
203
|
+
raise ArgumentError, "Row number does not exist!"
|
204
|
+
end
|
161
205
|
@headers.each do |col|
|
162
206
|
@table[col].delete_at(rownum)
|
163
207
|
end
|
@@ -167,13 +211,14 @@ class Table
|
|
167
211
|
|
168
212
|
# Converts a +Table+ object to a tab-delimited string.
|
169
213
|
#
|
214
|
+
# ==== Attributes
|
170
215
|
# none
|
171
216
|
def to_s
|
172
217
|
result = @headers.join("\t") << "\n"
|
173
218
|
|
174
|
-
@table[@headers.first].
|
219
|
+
@table[@headers.first].each_index do |index|
|
175
220
|
@headers.each do |col|
|
176
|
-
result << @table[col][
|
221
|
+
result << @table[col][index].to_s
|
177
222
|
unless col == @headers.last
|
178
223
|
result << "\t"
|
179
224
|
else
|
@@ -184,16 +229,18 @@ class Table
|
|
184
229
|
result
|
185
230
|
end
|
186
231
|
|
187
|
-
# Converts a +Table+ object to an array of arrays (each row)
|
232
|
+
# Converts a +Table+ object to an array of arrays (each row). The first
|
233
|
+
# entry are the table headers.
|
188
234
|
#
|
235
|
+
# ==== Attributes
|
189
236
|
# none
|
190
237
|
def to_a
|
191
238
|
result = [ Array(@headers) ]
|
192
239
|
|
193
|
-
@table[@headers.first].
|
240
|
+
@table[@headers.first].each_index do |index|
|
194
241
|
items = []
|
195
242
|
@headers.each do |col|
|
196
|
-
items << @table[col][
|
243
|
+
items << @table[col][index]
|
197
244
|
end
|
198
245
|
result << items
|
199
246
|
end
|
@@ -204,16 +251,25 @@ class Table
|
|
204
251
|
# and returns an integer >= 0. Returns +nil+ if the column is not found. If
|
205
252
|
# no parameters are given, returns the number of rows in the table.
|
206
253
|
#
|
254
|
+
# ==== Attributes
|
207
255
|
# +colname+:: OPTIONAL +String+ to identify the column to count
|
208
256
|
# +value+:: OPTIONAL +String+ value to count
|
257
|
+
#
|
258
|
+
# ==== Examples
|
259
|
+
# cities.count # returns number of rows in cities Table
|
260
|
+
# cities.size # same as cities.count
|
261
|
+
# cities.length # same as cities.count
|
262
|
+
# cities.count("State", "NY") # returns the number of rows with State == "NY"
|
263
|
+
#
|
209
264
|
def count(colname=nil, value=nil)
|
210
265
|
if colname.nil? || value.nil?
|
211
266
|
if @table.size > 0
|
212
267
|
@table.each_key {|e| return @table.fetch(e).length }
|
213
268
|
else
|
214
|
-
return
|
269
|
+
return 0
|
215
270
|
end
|
216
271
|
end
|
272
|
+
raise ArgumentError, "Invalid column name" unless @headers.include?(colname)
|
217
273
|
|
218
274
|
if @table[colname]
|
219
275
|
result = 0
|
@@ -229,24 +285,36 @@ class Table
|
|
229
285
|
alias :size :count
|
230
286
|
alias :length :count
|
231
287
|
|
232
|
-
#
|
233
|
-
#
|
234
|
-
#
|
288
|
+
# Returns counts of the most frequent values found in a given column in the form of a
|
289
|
+
# Table. Raises ArgumentError if the column is not found. If no limit is given
|
290
|
+
# to the number of values, only the top value will be returned.
|
235
291
|
#
|
292
|
+
# ==== Attributes
|
236
293
|
# +colname+:: +String+ to identify the column to count
|
237
294
|
# +num+:: OPTIONAL +String+ number of values to return
|
295
|
+
#
|
296
|
+
# ==== Examples
|
297
|
+
# cities.top("State") # returns a Table with the most frequent state in the cities Table
|
298
|
+
# cities.top("State", 10) # returns a Table with the 10 most frequent states in the cities Table
|
299
|
+
#
|
238
300
|
def top(colname, num=1)
|
239
301
|
freq = tally(colname).to_a[1..-1].sort_by {|k,v| v }.reverse
|
240
302
|
return Table.new(freq[0..num-1].unshift([colname,"Count"]))
|
241
303
|
end
|
242
304
|
|
243
305
|
|
244
|
-
#
|
245
|
-
#
|
246
|
-
#
|
306
|
+
# Returns counts of the least frequent values found in a given column in the form of a
|
307
|
+
# Table. Raises ArgumentError if the column is not found. If no limit is given
|
308
|
+
# to the number of values, only the least frequent value will be returned.
|
247
309
|
#
|
310
|
+
# ==== Attributes
|
248
311
|
# +colname+:: +String+ to identify the column to count
|
249
312
|
# +num+:: OPTIONAL +String+ number of values to return
|
313
|
+
#
|
314
|
+
# ==== Examples
|
315
|
+
# cities.bottom("State") # returns a Table with the least frequent state in the cities Table
|
316
|
+
# cities.bottom("State", 10) # returns a Table with the 10 least frequent states in the cities Table
|
317
|
+
#
|
250
318
|
def bottom(colname, num=1)
|
251
319
|
freq = tally(colname).to_a[1..-1].sort_by {|k,v| v }
|
252
320
|
return Table.new(freq[0..num-1].unshift([colname,"Count"]))
|
@@ -255,12 +323,17 @@ class Table
|
|
255
323
|
|
256
324
|
|
257
325
|
# Count instances in a particular field/column and return a +Table+ of the results.
|
258
|
-
#
|
326
|
+
# Raises ArgumentError if the column is not found.
|
259
327
|
#
|
328
|
+
# ==== Attributes
|
260
329
|
# +colname+:: +String+ to identify the column to tally
|
330
|
+
#
|
331
|
+
# ==== Examples
|
332
|
+
# cities.tally("State") # returns each State in the cities Table with number of occurences
|
333
|
+
#
|
261
334
|
def tally(colname)
|
262
335
|
# check arguments
|
263
|
-
|
336
|
+
raise ArgumentError, "Invalid column name" unless @table.has_key?(colname)
|
264
337
|
|
265
338
|
result = {}
|
266
339
|
@table[colname].each do |val|
|
@@ -270,31 +343,35 @@ class Table
|
|
270
343
|
end
|
271
344
|
|
272
345
|
# Select columns from the table, given one or more column names. Returns an instance
|
273
|
-
# of +Table+ with the results.
|
346
|
+
# of +Table+ with the results. Raises ArgumentError if any column is not valid.
|
274
347
|
#
|
348
|
+
# ==== Attributes
|
275
349
|
# +columns+:: Variable +String+ arguments to identify the columns to select
|
350
|
+
#
|
351
|
+
# ==== Examples
|
352
|
+
# cities.select("City", "State") # returns a Table of "City" and "State" columns
|
353
|
+
# cities.select(cities.headers) # returns a new Table that is a duplicate of cities
|
354
|
+
#
|
276
355
|
def select(*columns)
|
277
356
|
# check arguments
|
357
|
+
raise ArgumentError, "Invalid column name(s)" unless columns
|
358
|
+
columns.kind_of?(Array) ? columns.flatten! : nil
|
278
359
|
columns.each do |c|
|
279
|
-
|
360
|
+
raise ArgumentError, "Invalid column name" unless @table.has_key?(c)
|
280
361
|
end
|
281
362
|
|
282
363
|
result = []
|
283
364
|
result_headers = []
|
284
365
|
columns.each { |col| @headers.include?(col) ? result_headers << col : nil }
|
285
366
|
result << result_headers
|
286
|
-
@table[@headers.first].
|
367
|
+
@table[@headers.first].each_index do |index|
|
287
368
|
this_row = []
|
288
369
|
result_headers.each do |col|
|
289
|
-
this_row << @table[col][
|
370
|
+
this_row << @table[col][index]
|
290
371
|
end
|
291
372
|
result << this_row
|
292
373
|
end
|
293
|
-
|
294
|
-
return Table.new(result)
|
295
|
-
else
|
296
|
-
return nil
|
297
|
-
end
|
374
|
+
result_headers.empty? ? Table.new() : Table.new(result)
|
298
375
|
end
|
299
376
|
|
300
377
|
alias :get_columns :select
|
@@ -302,24 +379,31 @@ class Table
|
|
302
379
|
# Given a particular condition for a given column field/column, return a subtable
|
303
380
|
# that matches the condition. If no condition is given, a new +Table+ is returned with
|
304
381
|
# all records.
|
305
|
-
# Returns
|
382
|
+
# Returns an empty table if the condition is not met or the column is not found.
|
306
383
|
#
|
384
|
+
# ==== Attributes
|
307
385
|
# +colname+:: +String+ to identify the column to tally
|
308
386
|
# +condition+:: OPTIONAL +String+ containing a ruby condition to evaluate
|
387
|
+
#
|
388
|
+
# ==== Examples
|
389
|
+
# cities.where("State", "=='NY'") # returns a Table of cities in New York state
|
390
|
+
# cities.where("State", "=~ /New.*/") # returns a Table of cities in states that start with "New"
|
391
|
+
# cities.where("Population", ".to_i > 1000000") # returns a Table of cities with population over 1 million
|
392
|
+
#
|
309
393
|
def where(colname, condition=nil)
|
310
394
|
# check arguments
|
311
|
-
|
395
|
+
raise ArgumentError, "Invalid Column Name" unless @headers.include?(colname)
|
312
396
|
|
313
397
|
result = []
|
314
398
|
result << @headers
|
315
|
-
|
399
|
+
self.each do |row|
|
316
400
|
if condition
|
317
|
-
eval(%q["#{
|
401
|
+
eval(%q["#{row[headers.index(colname)]}"] << "#{condition}") ? result << row : nil
|
318
402
|
else
|
319
|
-
result <<
|
403
|
+
result << row
|
320
404
|
end
|
321
405
|
end
|
322
|
-
result.length > 1 ? Table.new(result) :
|
406
|
+
result.length > 1 ? Table.new(result) : Table.new()
|
323
407
|
end
|
324
408
|
|
325
409
|
alias :get_rows :where
|
@@ -329,29 +413,23 @@ class Table
|
|
329
413
|
# the column name of the first table (if different from the name of thee second).
|
330
414
|
# All columns from both tables are returned. Returns +nil+ if the column is not found.
|
331
415
|
#
|
416
|
+
# ==== Attributes
|
332
417
|
# +table2+:: +Table+ to identify the secondary table in the join
|
333
418
|
# +colname+:: +String+ to identify the column to join on
|
334
419
|
# +col2name+:: OPTIONAL +String+ to identify the column in the second table to join on
|
335
|
-
|
420
|
+
#
|
421
|
+
# ==== Examples
|
422
|
+
# cities.join(capitals, "City", "Capital") # returns a Table of cities that are also state capitals
|
423
|
+
# capitals.join(cities, "State") # returns a Table of capital cities with populations info from the cities table
|
424
|
+
#
|
425
|
+
def join(table2, colname, col2name=colname)
|
336
426
|
# check arguments
|
337
427
|
raise ArgumentError, "Invalid table!" unless table2.is_a?(Table)
|
338
|
-
|
339
|
-
|
340
|
-
col2name = colname
|
341
|
-
end
|
428
|
+
raise ArgumentError, "Invalid column name" unless @table.has_key?(colname)
|
429
|
+
raise ArgumentError, "Invalid column name" unless table2.headers.include?(col2name)
|
342
430
|
t2_col_index = table2.headers.index(col2name)
|
343
|
-
return nil unless t2_col_index # is not nil
|
344
|
-
|
345
431
|
|
346
|
-
|
347
|
-
table2.headers.each do |h|
|
348
|
-
if @headers.include?(h)
|
349
|
-
update_header(h, '_' << h )
|
350
|
-
if h == colname
|
351
|
-
colname = '_' << colname
|
352
|
-
end
|
353
|
-
end
|
354
|
-
end
|
432
|
+
dedupe_headers(table2, colname)
|
355
433
|
|
356
434
|
result = [ Array(@headers) + Array(table2.headers) ]
|
357
435
|
@table[colname].each_index do |index|
|
@@ -372,14 +450,20 @@ class Table
|
|
372
450
|
# update the table such that it substitutes the column data with the replacement string.
|
373
451
|
# Returns +nil+ if the column is not found.
|
374
452
|
#
|
453
|
+
# ==== Attributes
|
375
454
|
# +colname+:: +String+ to identify the column to join on
|
376
455
|
# +re+:: +Regexp+ to match the value in the selected column
|
377
456
|
# +replace+:: +String+ to specify the replacement text for the given +Regexp+
|
457
|
+
#
|
458
|
+
# ==== Examples
|
459
|
+
# cities.sub("Population", /(.*?),(.*?)/, '\1\2') # eliminate commas
|
460
|
+
# capitals.sub("State", /NY/, "New York") # replace acronym with full name
|
461
|
+
#
|
378
462
|
def sub(colname, re, replace)
|
379
463
|
# check arguments
|
380
464
|
raise ArgumentError, "No regular expression to match against" unless re
|
381
465
|
raise ArgumentError, "No replacement string specified" unless replace
|
382
|
-
|
466
|
+
raise ArgumentError, "Invalid column name" unless @table.has_key?(colname)
|
383
467
|
|
384
468
|
@table[colname].each do |item|
|
385
469
|
item.sub!(re, replace)
|
@@ -387,46 +471,91 @@ class Table
|
|
387
471
|
return self
|
388
472
|
end
|
389
473
|
|
390
|
-
# Return the union of columns
|
391
|
-
#
|
474
|
+
# Return Array with the union of elements columns in the given tables, eliminating duplicates.
|
475
|
+
# Raises an ArgumentError if a column is not found.
|
392
476
|
#
|
477
|
+
# ==== Attributes
|
393
478
|
# +table2+:: +Table+ to identify the secondary table in the union
|
394
479
|
# +colname+:: +String+ to identify the column to union
|
395
480
|
# +col2name+:: OPTIONAL +String+ to identify the column in the second table to union
|
396
|
-
|
481
|
+
#
|
482
|
+
# ==== Examples
|
483
|
+
# cities.union(capitals, "City", "Capital") # returns Array with all cities in both tables
|
484
|
+
#
|
485
|
+
def union(table2, colname, col2name=colname)
|
397
486
|
# check arguments
|
398
487
|
raise ArgumentError, "Invalid table!" unless table2.is_a?(Table)
|
399
|
-
|
400
|
-
|
401
|
-
col2name = colname
|
402
|
-
end
|
403
|
-
return nil unless table2.headers.include?(col2name)
|
488
|
+
raise ArgumentError, "Invalid column name" unless @table.has_key?(colname)
|
489
|
+
raise ArgumentError, "Invalid column name" unless table2.headers.include?(col2name)
|
404
490
|
|
405
491
|
return self.column(colname) | table2.column(col2name)
|
406
492
|
end
|
407
493
|
|
408
|
-
# Return the intersection of columns from different tables, eliminating duplicates.
|
494
|
+
# Return an Array with the intersection of columns from different tables, eliminating duplicates.
|
409
495
|
# Return nil if a column is not found.
|
410
496
|
#
|
497
|
+
# ==== Attributes
|
411
498
|
# +table2+:: +Table+ to identify the secondary table in the intersection
|
412
499
|
# +colname+:: +String+ to identify the column to intersection
|
413
500
|
# +col2name+:: OPTIONAL +String+ to identify the column in the second table to intersection
|
414
|
-
|
501
|
+
#
|
502
|
+
# ==== Examples
|
503
|
+
# cities.intersect(capitals, "City", "Capital") # returns Array with all capitals that are also in the cities table
|
504
|
+
#
|
505
|
+
def intersect(table2, colname, col2name=colname)
|
415
506
|
# check arguments
|
416
507
|
raise ArgumentError, "Invalid table!" unless table2.is_a?(Table)
|
417
|
-
|
418
|
-
|
419
|
-
col2name = colname
|
420
|
-
end
|
421
|
-
return nil unless table2.headers.include?(col2name)
|
508
|
+
raise ArgumentError, "Invalid column name" unless @table.has_key?(colname)
|
509
|
+
raise ArgumentError, "Invalid column name" unless table2.headers.include?(col2name)
|
422
510
|
|
423
511
|
return self.column(colname) & table2.column(col2name)
|
424
512
|
end
|
425
513
|
|
426
514
|
alias :sub! :sub
|
427
|
-
|
515
|
+
|
516
|
+
# Sort the table based on given column. Uses precedence as defined in the
|
517
|
+
# column. By default will sort by the value in the first column.
|
518
|
+
#
|
519
|
+
# ==== Attributes
|
520
|
+
# +args+:: OPTIONAL +String+ to identify the column on which to sort
|
521
|
+
#
|
522
|
+
# ==== Options
|
523
|
+
# datatype => :Fixnum
|
524
|
+
# datatype => :Float
|
525
|
+
# datatype => :Date
|
526
|
+
#
|
527
|
+
# ==== Examples
|
528
|
+
# cities.sort("State") # Re-orders the cities table based on State name
|
529
|
+
# cities.sort { |a,b| b<=>a } # Reverse the order of the cities table
|
530
|
+
# cities.sort("State") { |a,b| b<=>a } # Sort by State in reverse alpha order
|
531
|
+
#
|
532
|
+
def sort(column=nil, &block)
|
533
|
+
col_index = 0
|
534
|
+
if column.kind_of? String
|
535
|
+
col_index = @headers.index(column)
|
536
|
+
elsif column.kind_of? Fixnum
|
537
|
+
col_index = column
|
538
|
+
end
|
539
|
+
# return empty Table if empty
|
540
|
+
if self.empty?
|
541
|
+
return Table.new()
|
542
|
+
end
|
543
|
+
|
544
|
+
neworder = []
|
545
|
+
self.each { |row| neworder << OrderedRow.new(row,col_index) }
|
546
|
+
|
547
|
+
result = [neworder.shift.data] # take off headers
|
548
|
+
block_given? ? neworder.sort!(&block) : neworder.sort!
|
549
|
+
neworder.each { |row| result << row.data }
|
550
|
+
|
551
|
+
return Table.new(result)
|
552
|
+
end
|
553
|
+
|
554
|
+
alias :sort! :sort
|
555
|
+
|
428
556
|
# Write a representation of the +Table+ object to a file (tab delimited).
|
429
557
|
#
|
558
|
+
# ==== Attributes
|
430
559
|
# +filename+:: +String+ to identify the name of the file to write
|
431
560
|
def write_file(filename)
|
432
561
|
file = File.open(filename, "w")
|
@@ -437,26 +566,19 @@ class Table
|
|
437
566
|
|
438
567
|
def read_file(filename)
|
439
568
|
file = File.open(filename, "r")
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
fields = line.chomp.split("\t")
|
444
|
-
if fields.length < @headers.length
|
445
|
-
(@headers.length - fields.length).times { fields << "" }
|
446
|
-
elsif fields.length > @headers.length
|
447
|
-
$stderr.write "INVALID NUMBER OF FIELDS: #{fields.join(';')}\n"
|
448
|
-
else
|
449
|
-
@headers.each { |col| @table[col] << fields.shift }
|
450
|
-
end
|
451
|
-
nil
|
569
|
+
result = []
|
570
|
+
file.each_line do |line|
|
571
|
+
result << line.chomp.split("\t")
|
452
572
|
end
|
573
|
+
add_rows(result)
|
453
574
|
end
|
454
575
|
|
455
576
|
def get_row(index)
|
456
577
|
result = []
|
457
|
-
@headers.
|
458
|
-
result
|
459
|
-
end
|
578
|
+
if index >= @table[@headers.first].length
|
579
|
+
return result
|
580
|
+
end
|
581
|
+
@headers.each { |col| result << @table[col][index].to_s }
|
460
582
|
return result
|
461
583
|
end
|
462
584
|
|
@@ -476,27 +598,63 @@ class Table
|
|
476
598
|
@table[colname] = Array.new(column_vals)
|
477
599
|
return self
|
478
600
|
end
|
479
|
-
|
480
|
-
def copy
|
481
|
-
result = []
|
482
|
-
result << @headers
|
483
|
-
@table[@headers.first].each_index do |index|
|
484
|
-
result << get_row(index)
|
485
|
-
end
|
486
|
-
result.length > 1 ? Table.new(result) : Table.new()
|
487
|
-
end
|
488
601
|
|
489
602
|
def update_header(item, new_item)
|
490
603
|
i = @headers.index(item)
|
491
604
|
@headers[i] = new_item unless i.nil?
|
492
605
|
@table.fetch(item,nil).nil? ? nil : @table[new_item] = @table[item]
|
493
606
|
end
|
494
|
-
|
495
|
-
def
|
496
|
-
|
497
|
-
|
498
|
-
@
|
607
|
+
|
608
|
+
def dedupe_headers(table2, colname)
|
609
|
+
# ensure no duplication of header values
|
610
|
+
table2.headers.each do |header|
|
611
|
+
if @headers.include?(header)
|
612
|
+
update_header(header, '_' << header )
|
613
|
+
if header == colname
|
614
|
+
colname = '_' << colname
|
615
|
+
end
|
616
|
+
end
|
499
617
|
end
|
500
|
-
@indices = {}
|
501
618
|
end
|
502
|
-
|
619
|
+
|
620
|
+
end #Table
|
621
|
+
|
622
|
+
# This class functions as a temporary representation of a row. The OrderedRow
|
623
|
+
# contains information about which column it should be sorted on, so that
|
624
|
+
# Comparable can be implemented.
|
625
|
+
|
626
|
+
class OrderedRow
|
627
|
+
# Contains data elements of the row
|
628
|
+
@data = []
|
629
|
+
# Indicates which row element (column) on which to sort
|
630
|
+
@sort_index = 0
|
631
|
+
|
632
|
+
# Creates a new OrderedRow. Callers must specify the index of the row
|
633
|
+
# element which will be used for order comparisons.
|
634
|
+
#
|
635
|
+
# ==== Attributes
|
636
|
+
# +my_array+:: An array representing a row from +Table+
|
637
|
+
# +index+:: A Fixnum value which represents the comparison value
|
638
|
+
#
|
639
|
+
def initialize(my_array, index)
|
640
|
+
@data = my_array
|
641
|
+
@sort_index = index
|
642
|
+
end
|
643
|
+
|
644
|
+
# Returns the row elements in an +Array+
|
645
|
+
#
|
646
|
+
# ==== Attributes
|
647
|
+
# none
|
648
|
+
def data
|
649
|
+
return @data
|
650
|
+
end
|
651
|
+
|
652
|
+
# Implements comparable
|
653
|
+
#
|
654
|
+
# ==== Attributes
|
655
|
+
# +other+:: The row to be compared
|
656
|
+
def <=>(other)
|
657
|
+
self.data[@sort_index] <=> other.data[@sort_index]
|
658
|
+
end
|
659
|
+
|
660
|
+
end
|