tables 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,3 @@
1
+ == Tables
2
+
3
+ Put appropriate LICENSE for your project here.
data/README ADDED
@@ -0,0 +1,3 @@
1
+ == Tables
2
+
3
+ You should document your project here.
@@ -0,0 +1,35 @@
1
+ #
2
+ #
3
+ # Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
4
+ # Copyright:: Copyright (c) 2010, Saul Caganoff
5
+ # License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
6
+ #
7
+
8
+
9
+ require 'rubygems'
10
+ require 'rake'
11
+ require 'rake/clean'
12
+ require 'rubygems/package_task'
13
+ require 'rdoc/task'
14
+ require 'rake/testtask'
15
+
16
+
17
+
18
+ Gem::PackageTask.new(spec) do |p|
19
+ p.gem_spec = spec
20
+ p.need_tar = true
21
+ p.need_zip = false
22
+ end
23
+
24
+ RDoc::Task.new do |rdoc|
25
+ files =['README', 'LICENSE', 'lib/**/*.rb']
26
+ rdoc.rdoc_files.add(files)
27
+ rdoc.main = "README" # page to start on
28
+ rdoc.title = "Tables Docs"
29
+ rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
30
+ rdoc.options << '--line-numbers'
31
+ end
32
+
33
+ Rake::TestTask.new do |t|
34
+ t.test_files = FileList['test/**/*.rb']
35
+ end
@@ -0,0 +1,14 @@
1
+ #
2
+ # Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
3
+ # Copyright:: Copyright (c) 2010, Saul Caganoff
4
+ # License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
5
+ #
6
+
7
+ $:.unshift File.dirname(__FILE__)
8
+
9
+ require 'tables/table'
10
+ require 'tables/table_reader'
11
+ require 'tables/excel_table_reader'
12
+ require 'tables/word_table_reader'
13
+ require 'tables/word_table_writer'
14
+
@@ -0,0 +1,175 @@
1
+ #
2
+ # Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
3
+ # Copyright:: Copyright (c) 2010, Saul Caganoff
4
+ # License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
5
+ #
6
+ require 'win32ole'
7
+
8
+ module Tables
9
+
10
+ class ExcelTableReader < TableReader
11
+
12
+ attr_reader :workbook, :table
13
+
14
+ def initialize(filename=nil)
15
+ @excel=WIN32OLE.new('Excel.Application')
16
+ open_file(filename) unless filename.nil?
17
+ super()
18
+ end
19
+
20
+ def open_file(filename)
21
+ path=get_file_path(filename)
22
+ @excel.Workbooks.Open(path)
23
+ puts "Open workbook '#{path}'" if $DEBUG
24
+ @workbook=@excel.Workbooks.Item(1)
25
+ @worksheets=[]
26
+ end
27
+
28
+ def create_file(filename)
29
+ path=get_file_path(filename)
30
+ @workbook=@excel.Workbooks.Add
31
+ @worksheets=[]
32
+ @workbook.SaveAs(path)
33
+ end
34
+
35
+ def extract_table(worksheet, options={})
36
+ rtf_columns=options[:rtf_columns]
37
+ rtf_columns ||= []
38
+ progress=options[:progress]
39
+ sheet=get_worksheet(worksheet)
40
+ range=sheet.UsedRange
41
+ ncols=range.Columns.Count
42
+ nrows=range.Rows.Count
43
+
44
+ result=[]
45
+ (1..nrows).each do |idx|
46
+ row=range.Rows(idx)
47
+ result << ExcelTableReader.extract_row(row,ncols,rtf_columns)
48
+ if progress and idx.modulo(50)==0 then
49
+ percent=(idx.fdiv(nrows)*100).round(0)
50
+ puts ">> extracting row #{idx} (#{percent}%)"
51
+ end
52
+ end
53
+ Table.new(result)
54
+ end
55
+
56
+ def write_table(table, worksheet=0, options={})
57
+ progress=options[:progress]
58
+ sheet=get_worksheet(worksheet)
59
+ idx=0
60
+ nrows=table.row_count
61
+ table.each_row do |row|
62
+ idx+=1
63
+ if progress and idx.modulo(50)==0 then
64
+ percent=(idx.fdiv(nrows)*100).round(0)
65
+ puts ">> writing row #{idx} (#{percent}%)"
66
+ end
67
+ r=sheet.Rows(idx)
68
+ row.each_with_index {|val,jdx| r.Cells(jdx+1).Value=val.to_s }
69
+ end
70
+ end
71
+
72
+ def write_column(table, column_name, worksheet=0, options={})
73
+ progress=options[:progress]
74
+ sheet=get_worksheet(worksheet)
75
+ values=table.get_column(column_name)
76
+ column_index=table.colindex[column_name]+1
77
+ nrows=table.row_count
78
+ values.each_with_index do |val,idx|
79
+ if progress and idx.modulo(50)==0 then
80
+ percent=(idx.fdiv(nrows)*100).round(0)
81
+ puts ">> updating row #{idx} (#{percent}%)"
82
+ end
83
+ r=sheet.Rows(idx+1)
84
+ c=r.Cells(column_index).Value=val.to_s
85
+ end
86
+ end
87
+
88
+
89
+ def table_count
90
+ @workbook.WorkSheets.Count
91
+ end
92
+
93
+ def clean
94
+ @tables.each do |table|
95
+ table.remove_blank_rows!(1)
96
+ table.remove_repeat_headers!
97
+ table.demerge!
98
+ end
99
+ end
100
+
101
+ def save
102
+ @workbook.save
103
+ end
104
+
105
+ def exit
106
+ @excel.quit
107
+ end
108
+
109
+ private
110
+
111
+ def get_worksheet(worksheet)
112
+ if worksheet.is_a?(Integer) then
113
+ if (worksheet+1) > @workbook.Worksheets.Count then
114
+ sheet=@workbook.Worksheets.Add
115
+ else
116
+ sheet=@workbook.Worksheets.Item(worksheet+1)
117
+ end
118
+ else
119
+ sheet=@workbook.Worksheets.Item(worksheet)
120
+ end
121
+ sheet
122
+ end
123
+
124
+ def get_file_path(filename)
125
+ fso=WIN32OLE.new('Scripting.FileSystemObject')
126
+ fso.GetAbsolutePathName(filename)
127
+ end
128
+
129
+ def ExcelTableReader.extract_row(excel_row,n,rtf_columns)
130
+
131
+ # convert zero-based rtf columns into 1-based for internal loop
132
+ rtf_cols=rtf_columns.map {|idx| idx+1 }
133
+
134
+ row=[]
135
+ (1..n).each do |idx|
136
+ if rtf_cols.include?(idx) then
137
+ row << slow_extract_text(excel_row.Cells(idx))
138
+ else
139
+ row << extract_text(excel_row.Cells(idx))
140
+ end
141
+ end
142
+ row
143
+ end
144
+
145
+ def ExcelTableReader.extract_text(range)
146
+ string=range.Text
147
+ string.sub("\a"," - ")
148
+ end
149
+
150
+ def ExcelTableReader.slow_extract_text(range)
151
+ string=""
152
+ n=range.Characters.Count
153
+ (1..n).each do |idx|
154
+ c=range.Characters(idx,1)
155
+ t=c.Text
156
+ if t=="\a" then
157
+ string+=" - "
158
+ else
159
+ string += t unless c.Font.Strikethrough
160
+ end
161
+ end
162
+ string
163
+ rescue
164
+ string=extract_text(range)
165
+ end
166
+
167
+ #def ExcelTableReader.extract_text(range)
168
+ # text = range.Text[0..-3]
169
+ # list_text = range.ListFormat.ListString
170
+ # text.empty? ? list_text : text # return list_text if text is empty
171
+ #end
172
+
173
+ end
174
+
175
+ end
@@ -0,0 +1,260 @@
1
+ #
2
+ # Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
3
+ # Copyright:: Copyright (c) 2010, Saul Caganoff
4
+ # License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
5
+ #
6
+
7
+ module Tables
8
+
9
+ class Table
10
+
11
+ attr_reader :table, :colindex, :idcolumn, :rowindex
12
+ attr_accessor :name
13
+
14
+ def initialize(table=nil,name=nil)
15
+ @table=[]
16
+ table.each {|row| @table<<row } unless table.nil?
17
+ build_column_index unless table.nil?
18
+ self.name=name unless name.nil?
19
+ end
20
+
21
+ def header
22
+ @table[0]
23
+ end
24
+
25
+ #def columns
26
+ # self.header.count
27
+ #end
28
+
29
+ def idcolumn=(column_name)
30
+ raise "Unknown column '#{column_name}'" unless @colindex.has_key?(column_name)
31
+ @idcolumn=column_name
32
+ build_row_index
33
+ end
34
+
35
+ def get_value(column_name,row_num)
36
+ col_num=@colindex[column_name]
37
+ raise "Unknown column name '#{column_name}'" if col_num.nil?
38
+ @table[row_num][col_num]
39
+ end
40
+
41
+ def get_row(arg)
42
+ result=nil
43
+ if arg.is_a? Integer then
44
+ result=get_row_by_num(arg)
45
+ else
46
+ result=get_row_by_num(@rowindex[arg])
47
+ end
48
+ end
49
+
50
+ def add_row(row)
51
+ add_row_array(row) if row.is_a?(Array)
52
+ add_row_hash(row) if (row.is_a?(Hash) and self.row_count>0)
53
+ add_first_row_hash(row) if (row.is_a?(Hash) and self.row_count==0)
54
+ end
55
+
56
+ def [](idx)
57
+ @table[idx]
58
+ end
59
+
60
+ def ==(other)
61
+ self.table==other.table
62
+ end
63
+
64
+ # obsolescent
65
+ def rows
66
+ puts "WARNING: use 'row_count' instead of 'rows'"
67
+ self.row_count
68
+ end
69
+
70
+ def column_count
71
+ self.header.count
72
+ end
73
+
74
+ def row_count
75
+ @table.count
76
+ end
77
+
78
+ def similar?(t2)
79
+ self.header==t2.header
80
+ end
81
+
82
+ def merge!(t2)
83
+ raise "tables are not similar" unless similar?(t2)
84
+ (1..t2.row_count-1).each do |idx|
85
+ begin
86
+ self<<t2[idx]
87
+ rescue Exception=>e
88
+ puts "ERROR: Error adding row #{idx} from '#{t2.name}' to '#{self.name}'"
89
+ puts "ERROR: #{e.message}"
90
+ end
91
+ end
92
+ end
93
+
94
+ # << is a synonym of add_row
95
+ def <<(row)
96
+ self.add_row(row)
97
+ end
98
+
99
+ def each_row
100
+ @table.each {|row| yield(row)}
101
+ end
102
+
103
+ def each_row_with_index
104
+ @table.each_with_index {|row,idx| yield(row,idx) }
105
+ end
106
+
107
+ def get_each_row(skip_header=true)
108
+ @table.each_with_index {|row,idx| yield(self.get_row(idx)) unless (idx==0 and skip_header)}
109
+ end
110
+
111
+ # TODO: How can we do this within the get_row closure?
112
+ def set_row(row)
113
+ idx=row[:rownum]
114
+ row.each_pair do |colname,value|
115
+ jdx=@colindex[colname]
116
+ @table[idx][jdx]=value unless jdx.nil?
117
+ end
118
+ end
119
+
120
+ def remove_blank_rows!(startcol=0)
121
+ remove_matched_rows! { |row| row[startcol..-1].join.strip=="" }
122
+ end
123
+
124
+ def remove_repeat_headers!
125
+ header=self.header
126
+ remove_matched_rows! { |row| row.eql?(header) and not(row.equal?(header)) }
127
+ end
128
+
129
+ def remove_matched_rows!
130
+ blanks=[]
131
+ @table.each do |row|
132
+ blanks << row if yield(row)
133
+ end
134
+ blanks.each {|br| @table.delete_if {|el| el.equal?(br) }} # delete if objects are the same
135
+ build_column_index
136
+ return self
137
+ end
138
+
139
+ def demerge!(colnum=0)
140
+ new_table=[@table[0]]
141
+ (1..@table.count-1).each do |idx|
142
+ demerge_it(colnum,idx, new_table)
143
+ end
144
+ @table=new_table
145
+ return self
146
+ end
147
+
148
+ def rename_column(old_name, new_name)
149
+ colnum=@colindex[old_name]
150
+ raise "Unknown column '#{name}'" if colnum.nil?
151
+ @colindex[new_name]=colnum
152
+ @colindex.delete(old_name)
153
+ @table[0][colnum]=new_name
154
+ end
155
+
156
+ def delete_column(name)
157
+ colnum=@colindex[name]
158
+ raise "Unknown column '#{name}'" if colnum.nil?
159
+ self.each_row {|row| row.delete_at(colnum)}
160
+ @colindex.delete(name)
161
+ end
162
+
163
+ def get_column(name)
164
+ colnum=@colindex[name]
165
+ raise "Unknown column '#{name}'" if colnum.nil?
166
+ result=[]
167
+ self.each_row {|row| result<<row[colnum]}
168
+ result
169
+ end
170
+
171
+ def column_copy(other_table)
172
+ other_table.get_each_row do |other_row|
173
+ self.add_row(other_row)
174
+ end
175
+ end
176
+
177
+ def signature
178
+ self.header.join(',')
179
+ end
180
+
181
+ private
182
+
183
+ def add_row_array(row)
184
+ raise "Argument must be an array" unless row.is_a? Array
185
+ unless self.header.nil? then
186
+ n=self.header.count
187
+ raise "Row '#{row[0]}' must have #{n} values...found only #{row.count}" unless row.count==n
188
+ end
189
+ @table<<row
190
+ build_column_index if @table.count==1
191
+ end
192
+
193
+ def add_row_hash(row)
194
+ raise "Argument must be a hash table" unless row.is_a? Hash
195
+ n=self.header.count unless self.header.nil?
196
+ new_row=Array.new(n)
197
+ row.each_pair do |k,v|
198
+ idx=@colindex[k]
199
+ new_row[idx]=v unless idx.nil?
200
+ end
201
+ @table<<new_row
202
+ end
203
+
204
+ def add_first_row_hash(row)
205
+ raise "Argument must be a hash table" unless row.is_a? Hash
206
+ new_header=[]
207
+ new_row=[]
208
+ row.each_pair do |k,v|
209
+ new_header<<k
210
+ new_row<<v
211
+ end
212
+ add_row_array(new_header)
213
+ add_row_array(new_row)
214
+ end
215
+
216
+ def build_row_index
217
+ @rowindex={}
218
+ self.get_each_row do |row|
219
+ id=row[@idcolumn]
220
+ @rowindex[id]=row[:rownum]
221
+ end
222
+ end
223
+
224
+ def build_column_index
225
+ @colindex={}
226
+ self.header.each_with_index {|value,idx| @colindex[value]=idx }
227
+ end
228
+
229
+ def get_row_by_num(row_num)
230
+ row=@table[row_num]
231
+ result={}
232
+ row.each_with_index {|v,idx| result[self.header[idx]]=v }
233
+ result[:rownum]=row_num
234
+ result
235
+ end
236
+
237
+ def demerge_it(colnum,idx,new_table)
238
+ next_row=@table[idx]
239
+ if next_row[colnum].strip=="" then
240
+ row=new_table.pop
241
+ demerged_row=demerge_two_rows(row, next_row)
242
+ new_table.push(demerged_row)
243
+ else
244
+ new_table.push(next_row)
245
+ end
246
+ end
247
+
248
+ def demerge_two_rows(r1, r2)
249
+ raise "Column number mismatch" if r1.count != r2.count
250
+ new_row=[]
251
+ (0..r1.count-1).each do |idx|
252
+ new_cell=r1[idx]+"\n"+r2[idx]
253
+ new_row << new_cell.strip
254
+ end
255
+ new_row
256
+ end
257
+
258
+ end
259
+
260
+ end