tables 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,3 @@
1
+ == Tables
2
+
3
+ Put appropriate LICENSE for your project here.
data/README ADDED
@@ -0,0 +1,3 @@
1
+ == Tables
2
+
3
+ You should document your project here.
@@ -0,0 +1,35 @@
1
+ #
2
+ #
3
+ # Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
4
+ # Copyright:: Copyright (c) 2010, Saul Caganoff
5
+ # License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
6
+ #
7
+
8
+
9
+ require 'rubygems'
10
+ require 'rake'
11
+ require 'rake/clean'
12
+ require 'rubygems/package_task'
13
+ require 'rdoc/task'
14
+ require 'rake/testtask'
15
+
16
+
17
+
18
+ Gem::PackageTask.new(spec) do |p|
19
+ p.gem_spec = spec
20
+ p.need_tar = true
21
+ p.need_zip = false
22
+ end
23
+
24
+ RDoc::Task.new do |rdoc|
25
+ files =['README', 'LICENSE', 'lib/**/*.rb']
26
+ rdoc.rdoc_files.add(files)
27
+ rdoc.main = "README" # page to start on
28
+ rdoc.title = "Tables Docs"
29
+ rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
30
+ rdoc.options << '--line-numbers'
31
+ end
32
+
33
+ Rake::TestTask.new do |t|
34
+ t.test_files = FileList['test/**/*.rb']
35
+ end
@@ -0,0 +1,14 @@
1
+ #
2
+ # Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
3
+ # Copyright:: Copyright (c) 2010, Saul Caganoff
4
+ # License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
5
+ #
6
+
7
+ $:.unshift File.dirname(__FILE__)
8
+
9
+ require 'tables/table'
10
+ require 'tables/table_reader'
11
+ require 'tables/excel_table_reader'
12
+ require 'tables/word_table_reader'
13
+ require 'tables/word_table_writer'
14
+
@@ -0,0 +1,175 @@
1
+ #
2
+ # Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
3
+ # Copyright:: Copyright (c) 2010, Saul Caganoff
4
+ # License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
5
+ #
6
+ require 'win32ole'
7
+
8
+ module Tables
9
+
10
+ class ExcelTableReader < TableReader
11
+
12
+ attr_reader :workbook, :table
13
+
14
+ def initialize(filename=nil)
15
+ @excel=WIN32OLE.new('Excel.Application')
16
+ open_file(filename) unless filename.nil?
17
+ super()
18
+ end
19
+
20
+ def open_file(filename)
21
+ path=get_file_path(filename)
22
+ @excel.Workbooks.Open(path)
23
+ puts "Open workbook '#{path}'" if $DEBUG
24
+ @workbook=@excel.Workbooks.Item(1)
25
+ @worksheets=[]
26
+ end
27
+
28
+ def create_file(filename)
29
+ path=get_file_path(filename)
30
+ @workbook=@excel.Workbooks.Add
31
+ @worksheets=[]
32
+ @workbook.SaveAs(path)
33
+ end
34
+
35
+ def extract_table(worksheet, options={})
36
+ rtf_columns=options[:rtf_columns]
37
+ rtf_columns ||= []
38
+ progress=options[:progress]
39
+ sheet=get_worksheet(worksheet)
40
+ range=sheet.UsedRange
41
+ ncols=range.Columns.Count
42
+ nrows=range.Rows.Count
43
+
44
+ result=[]
45
+ (1..nrows).each do |idx|
46
+ row=range.Rows(idx)
47
+ result << ExcelTableReader.extract_row(row,ncols,rtf_columns)
48
+ if progress and idx.modulo(50)==0 then
49
+ percent=(idx.fdiv(nrows)*100).round(0)
50
+ puts ">> extracting row #{idx} (#{percent}%)"
51
+ end
52
+ end
53
+ Table.new(result)
54
+ end
55
+
56
+ def write_table(table, worksheet=0, options={})
57
+ progress=options[:progress]
58
+ sheet=get_worksheet(worksheet)
59
+ idx=0
60
+ nrows=table.row_count
61
+ table.each_row do |row|
62
+ idx+=1
63
+ if progress and idx.modulo(50)==0 then
64
+ percent=(idx.fdiv(nrows)*100).round(0)
65
+ puts ">> writing row #{idx} (#{percent}%)"
66
+ end
67
+ r=sheet.Rows(idx)
68
+ row.each_with_index {|val,jdx| r.Cells(jdx+1).Value=val.to_s }
69
+ end
70
+ end
71
+
72
+ def write_column(table, column_name, worksheet=0, options={})
73
+ progress=options[:progress]
74
+ sheet=get_worksheet(worksheet)
75
+ values=table.get_column(column_name)
76
+ column_index=table.colindex[column_name]+1
77
+ nrows=table.row_count
78
+ values.each_with_index do |val,idx|
79
+ if progress and idx.modulo(50)==0 then
80
+ percent=(idx.fdiv(nrows)*100).round(0)
81
+ puts ">> updating row #{idx} (#{percent}%)"
82
+ end
83
+ r=sheet.Rows(idx+1)
84
+ c=r.Cells(column_index).Value=val.to_s
85
+ end
86
+ end
87
+
88
+
89
+ def table_count
90
+ @workbook.WorkSheets.Count
91
+ end
92
+
93
+ def clean
94
+ @tables.each do |table|
95
+ table.remove_blank_rows!(1)
96
+ table.remove_repeat_headers!
97
+ table.demerge!
98
+ end
99
+ end
100
+
101
+ def save
102
+ @workbook.save
103
+ end
104
+
105
+ def exit
106
+ @excel.quit
107
+ end
108
+
109
+ private
110
+
111
+ def get_worksheet(worksheet)
112
+ if worksheet.is_a?(Integer) then
113
+ if (worksheet+1) > @workbook.Worksheets.Count then
114
+ sheet=@workbook.Worksheets.Add
115
+ else
116
+ sheet=@workbook.Worksheets.Item(worksheet+1)
117
+ end
118
+ else
119
+ sheet=@workbook.Worksheets.Item(worksheet)
120
+ end
121
+ sheet
122
+ end
123
+
124
+ def get_file_path(filename)
125
+ fso=WIN32OLE.new('Scripting.FileSystemObject')
126
+ fso.GetAbsolutePathName(filename)
127
+ end
128
+
129
+ def ExcelTableReader.extract_row(excel_row,n,rtf_columns)
130
+
131
+ # convert zero-based rtf columns into 1-based for internal loop
132
+ rtf_cols=rtf_columns.map {|idx| idx+1 }
133
+
134
+ row=[]
135
+ (1..n).each do |idx|
136
+ if rtf_cols.include?(idx) then
137
+ row << slow_extract_text(excel_row.Cells(idx))
138
+ else
139
+ row << extract_text(excel_row.Cells(idx))
140
+ end
141
+ end
142
+ row
143
+ end
144
+
145
+ def ExcelTableReader.extract_text(range)
146
+ string=range.Text
147
+ string.sub("\a"," - ")
148
+ end
149
+
150
+ def ExcelTableReader.slow_extract_text(range)
151
+ string=""
152
+ n=range.Characters.Count
153
+ (1..n).each do |idx|
154
+ c=range.Characters(idx,1)
155
+ t=c.Text
156
+ if t=="\a" then
157
+ string+=" - "
158
+ else
159
+ string += t unless c.Font.Strikethrough
160
+ end
161
+ end
162
+ string
163
+ rescue
164
+ string=extract_text(range)
165
+ end
166
+
167
+ #def ExcelTableReader.extract_text(range)
168
+ # text = range.Text[0..-3]
169
+ # list_text = range.ListFormat.ListString
170
+ # text.empty? ? list_text : text # return list_text if text is empty
171
+ #end
172
+
173
+ end
174
+
175
+ end
@@ -0,0 +1,260 @@
1
+ #
2
+ # Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
3
+ # Copyright:: Copyright (c) 2010, Saul Caganoff
4
+ # License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
5
+ #
6
+
7
+ module Tables
8
+
9
+ class Table
10
+
11
+ attr_reader :table, :colindex, :idcolumn, :rowindex
12
+ attr_accessor :name
13
+
14
+ def initialize(table=nil,name=nil)
15
+ @table=[]
16
+ table.each {|row| @table<<row } unless table.nil?
17
+ build_column_index unless table.nil?
18
+ self.name=name unless name.nil?
19
+ end
20
+
21
+ def header
22
+ @table[0]
23
+ end
24
+
25
+ #def columns
26
+ # self.header.count
27
+ #end
28
+
29
+ def idcolumn=(column_name)
30
+ raise "Unknown column '#{column_name}'" unless @colindex.has_key?(column_name)
31
+ @idcolumn=column_name
32
+ build_row_index
33
+ end
34
+
35
+ def get_value(column_name,row_num)
36
+ col_num=@colindex[column_name]
37
+ raise "Unknown column name '#{column_name}'" if col_num.nil?
38
+ @table[row_num][col_num]
39
+ end
40
+
41
+ def get_row(arg)
42
+ result=nil
43
+ if arg.is_a? Integer then
44
+ result=get_row_by_num(arg)
45
+ else
46
+ result=get_row_by_num(@rowindex[arg])
47
+ end
48
+ end
49
+
50
+ def add_row(row)
51
+ add_row_array(row) if row.is_a?(Array)
52
+ add_row_hash(row) if (row.is_a?(Hash) and self.row_count>0)
53
+ add_first_row_hash(row) if (row.is_a?(Hash) and self.row_count==0)
54
+ end
55
+
56
+ def [](idx)
57
+ @table[idx]
58
+ end
59
+
60
+ def ==(other)
61
+ self.table==other.table
62
+ end
63
+
64
+ # obsolescent
65
+ def rows
66
+ puts "WARNING: use 'row_count' instead of 'rows'"
67
+ self.row_count
68
+ end
69
+
70
+ def column_count
71
+ self.header.count
72
+ end
73
+
74
+ def row_count
75
+ @table.count
76
+ end
77
+
78
+ def similar?(t2)
79
+ self.header==t2.header
80
+ end
81
+
82
+ def merge!(t2)
83
+ raise "tables are not similar" unless similar?(t2)
84
+ (1..t2.row_count-1).each do |idx|
85
+ begin
86
+ self<<t2[idx]
87
+ rescue Exception=>e
88
+ puts "ERROR: Error adding row #{idx} from '#{t2.name}' to '#{self.name}'"
89
+ puts "ERROR: #{e.message}"
90
+ end
91
+ end
92
+ end
93
+
94
+ # << is a synonym of add_row
95
+ def <<(row)
96
+ self.add_row(row)
97
+ end
98
+
99
+ def each_row
100
+ @table.each {|row| yield(row)}
101
+ end
102
+
103
+ def each_row_with_index
104
+ @table.each_with_index {|row,idx| yield(row,idx) }
105
+ end
106
+
107
+ def get_each_row(skip_header=true)
108
+ @table.each_with_index {|row,idx| yield(self.get_row(idx)) unless (idx==0 and skip_header)}
109
+ end
110
+
111
+ # TODO: How can we do this within the get_row closure?
112
+ def set_row(row)
113
+ idx=row[:rownum]
114
+ row.each_pair do |colname,value|
115
+ jdx=@colindex[colname]
116
+ @table[idx][jdx]=value unless jdx.nil?
117
+ end
118
+ end
119
+
120
+ def remove_blank_rows!(startcol=0)
121
+ remove_matched_rows! { |row| row[startcol..-1].join.strip=="" }
122
+ end
123
+
124
+ def remove_repeat_headers!
125
+ header=self.header
126
+ remove_matched_rows! { |row| row.eql?(header) and not(row.equal?(header)) }
127
+ end
128
+
129
+ def remove_matched_rows!
130
+ blanks=[]
131
+ @table.each do |row|
132
+ blanks << row if yield(row)
133
+ end
134
+ blanks.each {|br| @table.delete_if {|el| el.equal?(br) }} # delete if objects are the same
135
+ build_column_index
136
+ return self
137
+ end
138
+
139
+ def demerge!(colnum=0)
140
+ new_table=[@table[0]]
141
+ (1..@table.count-1).each do |idx|
142
+ demerge_it(colnum,idx, new_table)
143
+ end
144
+ @table=new_table
145
+ return self
146
+ end
147
+
148
+ def rename_column(old_name, new_name)
149
+ colnum=@colindex[old_name]
150
+ raise "Unknown column '#{name}'" if colnum.nil?
151
+ @colindex[new_name]=colnum
152
+ @colindex.delete(old_name)
153
+ @table[0][colnum]=new_name
154
+ end
155
+
156
+ def delete_column(name)
157
+ colnum=@colindex[name]
158
+ raise "Unknown column '#{name}'" if colnum.nil?
159
+ self.each_row {|row| row.delete_at(colnum)}
160
+ @colindex.delete(name)
161
+ end
162
+
163
+ def get_column(name)
164
+ colnum=@colindex[name]
165
+ raise "Unknown column '#{name}'" if colnum.nil?
166
+ result=[]
167
+ self.each_row {|row| result<<row[colnum]}
168
+ result
169
+ end
170
+
171
+ def column_copy(other_table)
172
+ other_table.get_each_row do |other_row|
173
+ self.add_row(other_row)
174
+ end
175
+ end
176
+
177
+ def signature
178
+ self.header.join(',')
179
+ end
180
+
181
+ private
182
+
183
+ def add_row_array(row)
184
+ raise "Argument must be an array" unless row.is_a? Array
185
+ unless self.header.nil? then
186
+ n=self.header.count
187
+ raise "Row '#{row[0]}' must have #{n} values...found only #{row.count}" unless row.count==n
188
+ end
189
+ @table<<row
190
+ build_column_index if @table.count==1
191
+ end
192
+
193
+ def add_row_hash(row)
194
+ raise "Argument must be a hash table" unless row.is_a? Hash
195
+ n=self.header.count unless self.header.nil?
196
+ new_row=Array.new(n)
197
+ row.each_pair do |k,v|
198
+ idx=@colindex[k]
199
+ new_row[idx]=v unless idx.nil?
200
+ end
201
+ @table<<new_row
202
+ end
203
+
204
+ def add_first_row_hash(row)
205
+ raise "Argument must be a hash table" unless row.is_a? Hash
206
+ new_header=[]
207
+ new_row=[]
208
+ row.each_pair do |k,v|
209
+ new_header<<k
210
+ new_row<<v
211
+ end
212
+ add_row_array(new_header)
213
+ add_row_array(new_row)
214
+ end
215
+
216
+ def build_row_index
217
+ @rowindex={}
218
+ self.get_each_row do |row|
219
+ id=row[@idcolumn]
220
+ @rowindex[id]=row[:rownum]
221
+ end
222
+ end
223
+
224
+ def build_column_index
225
+ @colindex={}
226
+ self.header.each_with_index {|value,idx| @colindex[value]=idx }
227
+ end
228
+
229
+ def get_row_by_num(row_num)
230
+ row=@table[row_num]
231
+ result={}
232
+ row.each_with_index {|v,idx| result[self.header[idx]]=v }
233
+ result[:rownum]=row_num
234
+ result
235
+ end
236
+
237
+ def demerge_it(colnum,idx,new_table)
238
+ next_row=@table[idx]
239
+ if next_row[colnum].strip=="" then
240
+ row=new_table.pop
241
+ demerged_row=demerge_two_rows(row, next_row)
242
+ new_table.push(demerged_row)
243
+ else
244
+ new_table.push(next_row)
245
+ end
246
+ end
247
+
248
+ def demerge_two_rows(r1, r2)
249
+ raise "Column number mismatch" if r1.count != r2.count
250
+ new_row=[]
251
+ (0..r1.count-1).each do |idx|
252
+ new_cell=r1[idx]+"\n"+r2[idx]
253
+ new_row << new_cell.strip
254
+ end
255
+ new_row
256
+ end
257
+
258
+ end
259
+
260
+ end