tables 0.0.14
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +3 -0
- data/README +3 -0
- data/Rakefile +35 -0
- data/lib/tables.rb +14 -0
- data/lib/tables/excel_table_reader.rb +175 -0
- data/lib/tables/table.rb +260 -0
- data/lib/tables/table_reader.rb +65 -0
- data/lib/tables/word_table_reader.rb +83 -0
- data/lib/tables/word_table_writer.rb +64 -0
- data/spec/copy_spec.rb +33 -0
- data/spec/excel_table_reader_spec.rb +132 -0
- data/spec/office_copy_spec.rb +55 -0
- data/spec/table_spec.rb +237 -0
- data/spec/text_handling_spec.rb +37 -0
- data/spec/word_table_reader_spec.rb +70 -0
- data/spec/word_table_writer_spec.rb +37 -0
- metadata +64 -0
data/LICENSE
ADDED
data/README
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#
|
2
|
+
#
|
3
|
+
# Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
|
4
|
+
# Copyright:: Copyright (c) 2010, Saul Caganoff
|
5
|
+
# License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
|
6
|
+
#
|
7
|
+
|
8
|
+
|
9
|
+
require 'rubygems'
|
10
|
+
require 'rake'
|
11
|
+
require 'rake/clean'
|
12
|
+
require 'rubygems/package_task'
|
13
|
+
require 'rdoc/task'
|
14
|
+
require 'rake/testtask'
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
Gem::PackageTask.new(spec) do |p|
|
19
|
+
p.gem_spec = spec
|
20
|
+
p.need_tar = true
|
21
|
+
p.need_zip = false
|
22
|
+
end
|
23
|
+
|
24
|
+
RDoc::Task.new do |rdoc|
|
25
|
+
files =['README', 'LICENSE', 'lib/**/*.rb']
|
26
|
+
rdoc.rdoc_files.add(files)
|
27
|
+
rdoc.main = "README" # page to start on
|
28
|
+
rdoc.title = "Tables Docs"
|
29
|
+
rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
|
30
|
+
rdoc.options << '--line-numbers'
|
31
|
+
end
|
32
|
+
|
33
|
+
Rake::TestTask.new do |t|
|
34
|
+
t.test_files = FileList['test/**/*.rb']
|
35
|
+
end
|
data/lib/tables.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#
|
2
|
+
# Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
|
3
|
+
# Copyright:: Copyright (c) 2010, Saul Caganoff
|
4
|
+
# License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
|
5
|
+
#
|
6
|
+
|
7
|
+
$:.unshift File.dirname(__FILE__)
|
8
|
+
|
9
|
+
require 'tables/table'
|
10
|
+
require 'tables/table_reader'
|
11
|
+
require 'tables/excel_table_reader'
|
12
|
+
require 'tables/word_table_reader'
|
13
|
+
require 'tables/word_table_writer'
|
14
|
+
|
@@ -0,0 +1,175 @@
|
|
1
|
+
#
|
2
|
+
# Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
|
3
|
+
# Copyright:: Copyright (c) 2010, Saul Caganoff
|
4
|
+
# License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
|
5
|
+
#
|
6
|
+
require 'win32ole'
|
7
|
+
|
8
|
+
module Tables
|
9
|
+
|
10
|
+
class ExcelTableReader < TableReader
|
11
|
+
|
12
|
+
attr_reader :workbook, :table
|
13
|
+
|
14
|
+
def initialize(filename=nil)
|
15
|
+
@excel=WIN32OLE.new('Excel.Application')
|
16
|
+
open_file(filename) unless filename.nil?
|
17
|
+
super()
|
18
|
+
end
|
19
|
+
|
20
|
+
def open_file(filename)
|
21
|
+
path=get_file_path(filename)
|
22
|
+
@excel.Workbooks.Open(path)
|
23
|
+
puts "Open workbook '#{path}'" if $DEBUG
|
24
|
+
@workbook=@excel.Workbooks.Item(1)
|
25
|
+
@worksheets=[]
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_file(filename)
|
29
|
+
path=get_file_path(filename)
|
30
|
+
@workbook=@excel.Workbooks.Add
|
31
|
+
@worksheets=[]
|
32
|
+
@workbook.SaveAs(path)
|
33
|
+
end
|
34
|
+
|
35
|
+
def extract_table(worksheet, options={})
|
36
|
+
rtf_columns=options[:rtf_columns]
|
37
|
+
rtf_columns ||= []
|
38
|
+
progress=options[:progress]
|
39
|
+
sheet=get_worksheet(worksheet)
|
40
|
+
range=sheet.UsedRange
|
41
|
+
ncols=range.Columns.Count
|
42
|
+
nrows=range.Rows.Count
|
43
|
+
|
44
|
+
result=[]
|
45
|
+
(1..nrows).each do |idx|
|
46
|
+
row=range.Rows(idx)
|
47
|
+
result << ExcelTableReader.extract_row(row,ncols,rtf_columns)
|
48
|
+
if progress and idx.modulo(50)==0 then
|
49
|
+
percent=(idx.fdiv(nrows)*100).round(0)
|
50
|
+
puts ">> extracting row #{idx} (#{percent}%)"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
Table.new(result)
|
54
|
+
end
|
55
|
+
|
56
|
+
def write_table(table, worksheet=0, options={})
|
57
|
+
progress=options[:progress]
|
58
|
+
sheet=get_worksheet(worksheet)
|
59
|
+
idx=0
|
60
|
+
nrows=table.row_count
|
61
|
+
table.each_row do |row|
|
62
|
+
idx+=1
|
63
|
+
if progress and idx.modulo(50)==0 then
|
64
|
+
percent=(idx.fdiv(nrows)*100).round(0)
|
65
|
+
puts ">> writing row #{idx} (#{percent}%)"
|
66
|
+
end
|
67
|
+
r=sheet.Rows(idx)
|
68
|
+
row.each_with_index {|val,jdx| r.Cells(jdx+1).Value=val.to_s }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def write_column(table, column_name, worksheet=0, options={})
|
73
|
+
progress=options[:progress]
|
74
|
+
sheet=get_worksheet(worksheet)
|
75
|
+
values=table.get_column(column_name)
|
76
|
+
column_index=table.colindex[column_name]+1
|
77
|
+
nrows=table.row_count
|
78
|
+
values.each_with_index do |val,idx|
|
79
|
+
if progress and idx.modulo(50)==0 then
|
80
|
+
percent=(idx.fdiv(nrows)*100).round(0)
|
81
|
+
puts ">> updating row #{idx} (#{percent}%)"
|
82
|
+
end
|
83
|
+
r=sheet.Rows(idx+1)
|
84
|
+
c=r.Cells(column_index).Value=val.to_s
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
def table_count
|
90
|
+
@workbook.WorkSheets.Count
|
91
|
+
end
|
92
|
+
|
93
|
+
def clean
|
94
|
+
@tables.each do |table|
|
95
|
+
table.remove_blank_rows!(1)
|
96
|
+
table.remove_repeat_headers!
|
97
|
+
table.demerge!
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def save
|
102
|
+
@workbook.save
|
103
|
+
end
|
104
|
+
|
105
|
+
def exit
|
106
|
+
@excel.quit
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def get_worksheet(worksheet)
|
112
|
+
if worksheet.is_a?(Integer) then
|
113
|
+
if (worksheet+1) > @workbook.Worksheets.Count then
|
114
|
+
sheet=@workbook.Worksheets.Add
|
115
|
+
else
|
116
|
+
sheet=@workbook.Worksheets.Item(worksheet+1)
|
117
|
+
end
|
118
|
+
else
|
119
|
+
sheet=@workbook.Worksheets.Item(worksheet)
|
120
|
+
end
|
121
|
+
sheet
|
122
|
+
end
|
123
|
+
|
124
|
+
def get_file_path(filename)
|
125
|
+
fso=WIN32OLE.new('Scripting.FileSystemObject')
|
126
|
+
fso.GetAbsolutePathName(filename)
|
127
|
+
end
|
128
|
+
|
129
|
+
def ExcelTableReader.extract_row(excel_row,n,rtf_columns)
|
130
|
+
|
131
|
+
# convert zero-based rtf columns into 1-based for internal loop
|
132
|
+
rtf_cols=rtf_columns.map {|idx| idx+1 }
|
133
|
+
|
134
|
+
row=[]
|
135
|
+
(1..n).each do |idx|
|
136
|
+
if rtf_cols.include?(idx) then
|
137
|
+
row << slow_extract_text(excel_row.Cells(idx))
|
138
|
+
else
|
139
|
+
row << extract_text(excel_row.Cells(idx))
|
140
|
+
end
|
141
|
+
end
|
142
|
+
row
|
143
|
+
end
|
144
|
+
|
145
|
+
def ExcelTableReader.extract_text(range)
|
146
|
+
string=range.Text
|
147
|
+
string.sub("\a"," - ")
|
148
|
+
end
|
149
|
+
|
150
|
+
def ExcelTableReader.slow_extract_text(range)
|
151
|
+
string=""
|
152
|
+
n=range.Characters.Count
|
153
|
+
(1..n).each do |idx|
|
154
|
+
c=range.Characters(idx,1)
|
155
|
+
t=c.Text
|
156
|
+
if t=="\a" then
|
157
|
+
string+=" - "
|
158
|
+
else
|
159
|
+
string += t unless c.Font.Strikethrough
|
160
|
+
end
|
161
|
+
end
|
162
|
+
string
|
163
|
+
rescue
|
164
|
+
string=extract_text(range)
|
165
|
+
end
|
166
|
+
|
167
|
+
#def ExcelTableReader.extract_text(range)
|
168
|
+
# text = range.Text[0..-3]
|
169
|
+
# list_text = range.ListFormat.ListString
|
170
|
+
# text.empty? ? list_text : text # return list_text if text is empty
|
171
|
+
#end
|
172
|
+
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
data/lib/tables/table.rb
ADDED
@@ -0,0 +1,260 @@
|
|
1
|
+
#
|
2
|
+
# Author:: Saul Caganoff (mailto:scaganoff@gmail.com)
|
3
|
+
# Copyright:: Copyright (c) 2010, Saul Caganoff
|
4
|
+
# License:: Creative Commons Attribution 3.0 Australia License (http://creativecommons.org/licenses/by/3.0/au/)
|
5
|
+
#
|
6
|
+
|
7
|
+
module Tables
|
8
|
+
|
9
|
+
class Table
|
10
|
+
|
11
|
+
attr_reader :table, :colindex, :idcolumn, :rowindex
|
12
|
+
attr_accessor :name
|
13
|
+
|
14
|
+
def initialize(table=nil,name=nil)
|
15
|
+
@table=[]
|
16
|
+
table.each {|row| @table<<row } unless table.nil?
|
17
|
+
build_column_index unless table.nil?
|
18
|
+
self.name=name unless name.nil?
|
19
|
+
end
|
20
|
+
|
21
|
+
def header
|
22
|
+
@table[0]
|
23
|
+
end
|
24
|
+
|
25
|
+
#def columns
|
26
|
+
# self.header.count
|
27
|
+
#end
|
28
|
+
|
29
|
+
def idcolumn=(column_name)
|
30
|
+
raise "Unknown column '#{column_name}'" unless @colindex.has_key?(column_name)
|
31
|
+
@idcolumn=column_name
|
32
|
+
build_row_index
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_value(column_name,row_num)
|
36
|
+
col_num=@colindex[column_name]
|
37
|
+
raise "Unknown column name '#{column_name}'" if col_num.nil?
|
38
|
+
@table[row_num][col_num]
|
39
|
+
end
|
40
|
+
|
41
|
+
def get_row(arg)
|
42
|
+
result=nil
|
43
|
+
if arg.is_a? Integer then
|
44
|
+
result=get_row_by_num(arg)
|
45
|
+
else
|
46
|
+
result=get_row_by_num(@rowindex[arg])
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def add_row(row)
|
51
|
+
add_row_array(row) if row.is_a?(Array)
|
52
|
+
add_row_hash(row) if (row.is_a?(Hash) and self.row_count>0)
|
53
|
+
add_first_row_hash(row) if (row.is_a?(Hash) and self.row_count==0)
|
54
|
+
end
|
55
|
+
|
56
|
+
def [](idx)
|
57
|
+
@table[idx]
|
58
|
+
end
|
59
|
+
|
60
|
+
def ==(other)
|
61
|
+
self.table==other.table
|
62
|
+
end
|
63
|
+
|
64
|
+
# obsolescent
|
65
|
+
def rows
|
66
|
+
puts "WARNING: use 'row_count' instead of 'rows'"
|
67
|
+
self.row_count
|
68
|
+
end
|
69
|
+
|
70
|
+
def column_count
|
71
|
+
self.header.count
|
72
|
+
end
|
73
|
+
|
74
|
+
def row_count
|
75
|
+
@table.count
|
76
|
+
end
|
77
|
+
|
78
|
+
def similar?(t2)
|
79
|
+
self.header==t2.header
|
80
|
+
end
|
81
|
+
|
82
|
+
def merge!(t2)
|
83
|
+
raise "tables are not similar" unless similar?(t2)
|
84
|
+
(1..t2.row_count-1).each do |idx|
|
85
|
+
begin
|
86
|
+
self<<t2[idx]
|
87
|
+
rescue Exception=>e
|
88
|
+
puts "ERROR: Error adding row #{idx} from '#{t2.name}' to '#{self.name}'"
|
89
|
+
puts "ERROR: #{e.message}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# << is a synonym of add_row
|
95
|
+
def <<(row)
|
96
|
+
self.add_row(row)
|
97
|
+
end
|
98
|
+
|
99
|
+
def each_row
|
100
|
+
@table.each {|row| yield(row)}
|
101
|
+
end
|
102
|
+
|
103
|
+
def each_row_with_index
|
104
|
+
@table.each_with_index {|row,idx| yield(row,idx) }
|
105
|
+
end
|
106
|
+
|
107
|
+
def get_each_row(skip_header=true)
|
108
|
+
@table.each_with_index {|row,idx| yield(self.get_row(idx)) unless (idx==0 and skip_header)}
|
109
|
+
end
|
110
|
+
|
111
|
+
# TODO: How can we do this within the get_row closure?
|
112
|
+
def set_row(row)
|
113
|
+
idx=row[:rownum]
|
114
|
+
row.each_pair do |colname,value|
|
115
|
+
jdx=@colindex[colname]
|
116
|
+
@table[idx][jdx]=value unless jdx.nil?
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def remove_blank_rows!(startcol=0)
|
121
|
+
remove_matched_rows! { |row| row[startcol..-1].join.strip=="" }
|
122
|
+
end
|
123
|
+
|
124
|
+
def remove_repeat_headers!
|
125
|
+
header=self.header
|
126
|
+
remove_matched_rows! { |row| row.eql?(header) and not(row.equal?(header)) }
|
127
|
+
end
|
128
|
+
|
129
|
+
def remove_matched_rows!
|
130
|
+
blanks=[]
|
131
|
+
@table.each do |row|
|
132
|
+
blanks << row if yield(row)
|
133
|
+
end
|
134
|
+
blanks.each {|br| @table.delete_if {|el| el.equal?(br) }} # delete if objects are the same
|
135
|
+
build_column_index
|
136
|
+
return self
|
137
|
+
end
|
138
|
+
|
139
|
+
def demerge!(colnum=0)
|
140
|
+
new_table=[@table[0]]
|
141
|
+
(1..@table.count-1).each do |idx|
|
142
|
+
demerge_it(colnum,idx, new_table)
|
143
|
+
end
|
144
|
+
@table=new_table
|
145
|
+
return self
|
146
|
+
end
|
147
|
+
|
148
|
+
def rename_column(old_name, new_name)
|
149
|
+
colnum=@colindex[old_name]
|
150
|
+
raise "Unknown column '#{name}'" if colnum.nil?
|
151
|
+
@colindex[new_name]=colnum
|
152
|
+
@colindex.delete(old_name)
|
153
|
+
@table[0][colnum]=new_name
|
154
|
+
end
|
155
|
+
|
156
|
+
def delete_column(name)
|
157
|
+
colnum=@colindex[name]
|
158
|
+
raise "Unknown column '#{name}'" if colnum.nil?
|
159
|
+
self.each_row {|row| row.delete_at(colnum)}
|
160
|
+
@colindex.delete(name)
|
161
|
+
end
|
162
|
+
|
163
|
+
def get_column(name)
|
164
|
+
colnum=@colindex[name]
|
165
|
+
raise "Unknown column '#{name}'" if colnum.nil?
|
166
|
+
result=[]
|
167
|
+
self.each_row {|row| result<<row[colnum]}
|
168
|
+
result
|
169
|
+
end
|
170
|
+
|
171
|
+
def column_copy(other_table)
|
172
|
+
other_table.get_each_row do |other_row|
|
173
|
+
self.add_row(other_row)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def signature
|
178
|
+
self.header.join(',')
|
179
|
+
end
|
180
|
+
|
181
|
+
private
|
182
|
+
|
183
|
+
def add_row_array(row)
|
184
|
+
raise "Argument must be an array" unless row.is_a? Array
|
185
|
+
unless self.header.nil? then
|
186
|
+
n=self.header.count
|
187
|
+
raise "Row '#{row[0]}' must have #{n} values...found only #{row.count}" unless row.count==n
|
188
|
+
end
|
189
|
+
@table<<row
|
190
|
+
build_column_index if @table.count==1
|
191
|
+
end
|
192
|
+
|
193
|
+
def add_row_hash(row)
|
194
|
+
raise "Argument must be a hash table" unless row.is_a? Hash
|
195
|
+
n=self.header.count unless self.header.nil?
|
196
|
+
new_row=Array.new(n)
|
197
|
+
row.each_pair do |k,v|
|
198
|
+
idx=@colindex[k]
|
199
|
+
new_row[idx]=v unless idx.nil?
|
200
|
+
end
|
201
|
+
@table<<new_row
|
202
|
+
end
|
203
|
+
|
204
|
+
def add_first_row_hash(row)
|
205
|
+
raise "Argument must be a hash table" unless row.is_a? Hash
|
206
|
+
new_header=[]
|
207
|
+
new_row=[]
|
208
|
+
row.each_pair do |k,v|
|
209
|
+
new_header<<k
|
210
|
+
new_row<<v
|
211
|
+
end
|
212
|
+
add_row_array(new_header)
|
213
|
+
add_row_array(new_row)
|
214
|
+
end
|
215
|
+
|
216
|
+
def build_row_index
|
217
|
+
@rowindex={}
|
218
|
+
self.get_each_row do |row|
|
219
|
+
id=row[@idcolumn]
|
220
|
+
@rowindex[id]=row[:rownum]
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
def build_column_index
|
225
|
+
@colindex={}
|
226
|
+
self.header.each_with_index {|value,idx| @colindex[value]=idx }
|
227
|
+
end
|
228
|
+
|
229
|
+
def get_row_by_num(row_num)
|
230
|
+
row=@table[row_num]
|
231
|
+
result={}
|
232
|
+
row.each_with_index {|v,idx| result[self.header[idx]]=v }
|
233
|
+
result[:rownum]=row_num
|
234
|
+
result
|
235
|
+
end
|
236
|
+
|
237
|
+
def demerge_it(colnum,idx,new_table)
|
238
|
+
next_row=@table[idx]
|
239
|
+
if next_row[colnum].strip=="" then
|
240
|
+
row=new_table.pop
|
241
|
+
demerged_row=demerge_two_rows(row, next_row)
|
242
|
+
new_table.push(demerged_row)
|
243
|
+
else
|
244
|
+
new_table.push(next_row)
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def demerge_two_rows(r1, r2)
|
249
|
+
raise "Column number mismatch" if r1.count != r2.count
|
250
|
+
new_row=[]
|
251
|
+
(0..r1.count-1).each do |idx|
|
252
|
+
new_cell=r1[idx]+"\n"+r2[idx]
|
253
|
+
new_row << new_cell.strip
|
254
|
+
end
|
255
|
+
new_row
|
256
|
+
end
|
257
|
+
|
258
|
+
end
|
259
|
+
|
260
|
+
end
|