tb 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +60 -0
- data/bin/tb +1137 -0
- data/lib/tb.rb +35 -0
- data/lib/tb/basic.rb +1071 -0
- data/lib/tb/csv.rb +125 -0
- data/lib/tb/enumerable.rb +284 -0
- data/lib/tb/fieldset.rb +96 -0
- data/lib/tb/pathfinder.rb +569 -0
- data/lib/tb/qtsv.rb +93 -0
- data/lib/tb/reader.rb +213 -0
- data/lib/tb/record.rb +129 -0
- data/lib/tb/tsv.rb +93 -0
- data/sample/excel2csv +270 -0
- data/sample/poi-xls2csv.rb +397 -0
- data/sample/poi-xls2csv.sh +39 -0
- data/test-all.rb +7 -0
- data/test/test_basic.rb +290 -0
- data/test/test_csv.rb +78 -0
- data/test/test_enumerable.rb +122 -0
- data/test/test_record.rb +12 -0
- data/test/test_tsv.rb +41 -0
- metadata +73 -0
data/sample/excel2csv
ADDED
@@ -0,0 +1,270 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# sample/excel2csv - XLS to CSV converter using Microsoft Excel via WIN32OLE.
|
4
|
+
#
|
5
|
+
# Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
|
6
|
+
#
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
9
|
+
#
|
10
|
+
# 1. Redistributions of source code must retain the above copyright notice, this
|
11
|
+
# list of conditions and the following disclaimer.
|
12
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
14
|
+
# and/or other materials provided with the distribution.
|
15
|
+
# 3. The name of the author may not be used to endorse or promote products
|
16
|
+
# derived from this software without specific prior written permission.
|
17
|
+
#
|
18
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
19
|
+
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
20
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
21
|
+
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
22
|
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
23
|
+
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
26
|
+
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
27
|
+
# OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
# works only on Windows with Microsoft Excel.
|
30
|
+
# (This script invokes Excel using win32ole.)
|
31
|
+
|
32
|
+
# usage:
|
33
|
+
# excel2csv [opts] filename.xls ...
|
34
|
+
#
|
35
|
+
# options:
|
36
|
+
# -h : show help message
|
37
|
+
# -a : convert all sheets (sheet name is prepended to all rows)
|
38
|
+
# -f : prepend filename for all rows
|
39
|
+
# --mergecells=fill : set the value for all cells in a merged cells
|
40
|
+
# --mergecells=topleft : set the value for the top left cell in a merged cells
|
41
|
+
|
42
|
+
require 'tb'
|
43
|
+
|
44
|
+
require 'win32ole'
|
45
|
+
require 'optparse'
|
46
|
+
require 'find'
|
47
|
+
require 'pp'
|
48
|
+
|
49
|
+
def getAbsolutePath(filename)
|
50
|
+
fso = WIN32OLE.new('Scripting.FileSystemObject')
|
51
|
+
return fso.GetAbsolutePathName(filename)
|
52
|
+
end
|
53
|
+
|
54
|
+
module Excel
|
55
|
+
end
|
56
|
+
|
57
|
+
$opt_all_sheets = false
|
58
|
+
$opt_prepend_filename = false
|
59
|
+
$opt_mergecells = 'fill'
|
60
|
+
$opt_border = false
|
61
|
+
|
62
|
+
op = OptionParser.new
|
63
|
+
op.def_option('-h', 'show help message') { puts op; exit true }
|
64
|
+
op.def_option('-a', 'convert all sheets (sheet name is prepended to all rows)') { $opt_all_sheets = true }
|
65
|
+
op.def_option('-f', 'prepend filename for all rows') { $opt_prepend_filename = true }
|
66
|
+
op.def_option('--mergecells=mode', '"fill" or "topleft"') {|v| $opt_mergecells = v }
|
67
|
+
op.def_option('--border', 'extract borders') { $opt_border = true }
|
68
|
+
op.parse!(ARGV)
|
69
|
+
|
70
|
+
def convert_cell(cell)
|
71
|
+
if !cell.MergeCells
|
72
|
+
v = cell.Value
|
73
|
+
else
|
74
|
+
c_row = cell.Row
|
75
|
+
c_col = cell.Column
|
76
|
+
mergearea = cell.MergeArea
|
77
|
+
m_row = mergearea.Row
|
78
|
+
m_col = mergearea.Column
|
79
|
+
m_numrows = mergearea.Rows.Count
|
80
|
+
m_numcols = mergearea.Columns.Count
|
81
|
+
val = cell.MergeArea.Item(1,1).Value
|
82
|
+
if $opt_mergecells == 'topleft'
|
83
|
+
if c_row == m_row && c_col == m_col
|
84
|
+
v = val
|
85
|
+
else
|
86
|
+
v = nil
|
87
|
+
end
|
88
|
+
else
|
89
|
+
v = val
|
90
|
+
end
|
91
|
+
end
|
92
|
+
v
|
93
|
+
end
|
94
|
+
|
95
|
+
def convert_horizontal_borders(sheet_xrange, sheet_yrange, usedrange, upper_y)
|
96
|
+
# upper_y is base 1.
|
97
|
+
lower_y = upper_y+1
|
98
|
+
min = 0
|
99
|
+
max = usedrange.Columns.Count*2
|
100
|
+
usedrange_xoff = usedrange.Column-1
|
101
|
+
usedrange_yoff = usedrange.Row-1
|
102
|
+
usedrange_width = usedrange.Columns.Count
|
103
|
+
usedrange_height = usedrange.Rows.Count
|
104
|
+
ary = []
|
105
|
+
min.upto(max) {|i|
|
106
|
+
if (i & 1) == 0
|
107
|
+
# border
|
108
|
+
left_x = i / 2
|
109
|
+
right_x = left_x + 1
|
110
|
+
upperleft_cell = (sheet_yrange.include?(usedrange_yoff+upper_y) && sheet_yrange.include?(usedrange_xoff+left_x)) ?
|
111
|
+
usedrange.Cells(upper_y, left_x) : nil
|
112
|
+
upperright_cell = (sheet_yrange.include?(usedrange_yoff+upper_y) && sheet_xrange.include?(usedrange_xoff+right_x)) ?
|
113
|
+
usedrange.Cells(upper_y, right_x) : nil
|
114
|
+
lowerleft_cell = (sheet_yrange.include?(usedrange_yoff+lower_y) && sheet_xrange.include?(usedrange_xoff+left_x)) ?
|
115
|
+
usedrange.Cells(lower_y, left_x) : nil
|
116
|
+
lowerright_cell = (sheet_yrange.include?(usedrange_yoff+lower_y) && sheet_xrange.include?(usedrange_xoff+right_x)) ?
|
117
|
+
usedrange.Cells(lower_y, right_x) : nil
|
118
|
+
upper_line = lower_line = left_line = right_line = false
|
119
|
+
if (!upperleft_cell || !upperright_cell ||
|
120
|
+
!upperleft_cell.MergeCells || !upperright_cell.MergeCells ||
|
121
|
+
upperleft_cell.MergeArea.Row != upperright_cell.MergeArea.Row ||
|
122
|
+
upperleft_cell.MergeArea.Column != upperright_cell.MergeArea.Column) &&
|
123
|
+
((upperleft_cell && upperleft_cell.borders(Excel::XlEdgeRight).linestyle != Excel::XlNone) ||
|
124
|
+
(upperright_cell && upperright_cell.borders(Excel::XlEdgeLeft).linestyle != Excel::XlNone))
|
125
|
+
upper_line = true
|
126
|
+
end
|
127
|
+
if (!lowerleft_cell || !lowerright_cell ||
|
128
|
+
!lowerleft_cell.MergeCells || !lowerright_cell.MergeCells ||
|
129
|
+
lowerleft_cell.MergeArea.Row != lowerright_cell.MergeArea.Row ||
|
130
|
+
lowerleft_cell.MergeArea.Column != lowerright_cell.MergeArea.Column) &&
|
131
|
+
((lowerleft_cell && lowerleft_cell.borders(Excel::XlEdgeRight).linestyle != Excel::XlNone) ||
|
132
|
+
(lowerright_cell && lowerright_cell.borders(Excel::XlEdgeLeft).linestyle != Excel::XlNone))
|
133
|
+
lower_line = true
|
134
|
+
end
|
135
|
+
if (!upperleft_cell || !lowerleft_cell ||
|
136
|
+
!upperleft_cell.MergeCells || !lowerleft_cell.MergeCells ||
|
137
|
+
upperleft_cell.MergeArea.Row != lowerleft_cell.MergeArea.Row ||
|
138
|
+
upperleft_cell.MergeArea.Column != lowerleft_cell.MergeArea.Column) &&
|
139
|
+
((upperleft_cell && upperleft_cell.borders(Excel::XlEdgeBottom).linestyle != Excel::XlNone) ||
|
140
|
+
(lowerleft_cell && lowerleft_cell.borders(Excel::XlEdgeTop).linestyle != Excel::XlNone))
|
141
|
+
left_line = true
|
142
|
+
end
|
143
|
+
if (!upperright_cell || !lowerright_cell ||
|
144
|
+
!upperright_cell.MergeCells || !lowerright_cell.MergeCells ||
|
145
|
+
upperright_cell.MergeArea.Row != lowerright_cell.MergeArea.Row ||
|
146
|
+
upperright_cell.MergeArea.Column != lowerright_cell.MergeArea.Column) &&
|
147
|
+
((upperright_cell && upperright_cell.borders(Excel::XlEdgeBottom).linestyle != Excel::XlNone) ||
|
148
|
+
(lowerright_cell && lowerright_cell.borders(Excel::XlEdgeTop).linestyle != Excel::XlNone))
|
149
|
+
right_line = true
|
150
|
+
end
|
151
|
+
if upper_line && lower_line && !left_line && !right_line
|
152
|
+
joint = '|'
|
153
|
+
elsif !upper_line && !lower_line && left_line && right_line
|
154
|
+
joint = '-'
|
155
|
+
elsif upper_line || lower_line || left_line || right_line
|
156
|
+
joint = '+'
|
157
|
+
else
|
158
|
+
joint = nil
|
159
|
+
end
|
160
|
+
ary << joint
|
161
|
+
else
|
162
|
+
# cell
|
163
|
+
hborder = nil
|
164
|
+
cell_x = (i+1) / 2
|
165
|
+
upper_cell = (0 < upper_y) ? usedrange.Cells(upper_y, cell_x) : nil
|
166
|
+
lower_cell = (0 < lower_y) ? usedrange.Cells(lower_y, cell_x) : nil
|
167
|
+
if (!upper_cell || !lower_cell ||
|
168
|
+
!upper_cell.MergeCells || !lower_cell.MergeCells ||
|
169
|
+
upper_cell.MergeArea.Row != lower_cell.MergeArea.Row ||
|
170
|
+
upper_cell.MergeArea.Column != lower_cell.MergeArea.Column) &&
|
171
|
+
((upper_cell && upper_cell.borders(Excel::XlEdgeBottom).linestyle != Excel::XlNone) ||
|
172
|
+
(lower_cell && lower_cell.borders(Excel::XlEdgeTop).linestyle != Excel::XlNone))
|
173
|
+
hborder = '-'
|
174
|
+
end
|
175
|
+
ary << hborder
|
176
|
+
end
|
177
|
+
}
|
178
|
+
#puts ary.map {|c| c || '.' }.join('')
|
179
|
+
#ary = ary.map {|c| c || ' ' }
|
180
|
+
ary
|
181
|
+
end
|
182
|
+
|
183
|
+
def convert_vertical_border(sheet_xrange, sheet_yrange, usedrange, y, left_x)
|
184
|
+
usedrange_xoff = usedrange.Column-1
|
185
|
+
right_x = left_x+1
|
186
|
+
left_cell = sheet_xrange.include?(usedrange_xoff+left_x) ? usedrange.Cells(y, left_x) : nil
|
187
|
+
right_cell = sheet_xrange.include?(usedrange_xoff+right_x) ? usedrange.Cells(y, right_x) : nil
|
188
|
+
vborder = nil
|
189
|
+
if (!left_cell || !right_cell ||
|
190
|
+
!left_cell.MergeCells || !right_cell.MergeCells ||
|
191
|
+
left_cell.MergeArea.Row != right_cell.MergeArea.Row ||
|
192
|
+
left_cell.MergeArea.Column != right_cell.MergeArea.Column) &&
|
193
|
+
((left_cell && left_cell.borders(Excel::XlEdgeRight).linestyle != Excel::XlNone) ||
|
194
|
+
(right_cell && right_cell.borders(Excel::XlEdgeLeft).linestyle != Excel::XlNone))
|
195
|
+
vborder = '|'
|
196
|
+
end
|
197
|
+
#vborder ||= ' '
|
198
|
+
vborder
|
199
|
+
end
|
200
|
+
|
201
|
+
def generate_csv_row(csvgen, row)
|
202
|
+
row = row.dup
|
203
|
+
row.pop while !row.empty? && row.last.nil?
|
204
|
+
csvgen << row
|
205
|
+
end
|
206
|
+
|
207
|
+
def convert_sheet(filename, sheet, csvgen)
|
208
|
+
sheetname = sheet.Name
|
209
|
+
allrange = sheet.Cells
|
210
|
+
sheet_xrange = 1..(allrange.Columns.Count)
|
211
|
+
sheet_yrange = 1..(allrange.Rows.Count)
|
212
|
+
usedrange = sheet.UsedRange
|
213
|
+
cell_ul = sheet.Cells(1,1)
|
214
|
+
cell_br = sheet.Cells(usedrange.Row+usedrange.Rows.Count-1, usedrange.Column+usedrange.Columns.Count-1)
|
215
|
+
usedrange = sheet.Range(cell_ul, cell_br)
|
216
|
+
sheet_header = []
|
217
|
+
sheet_header << filename if $opt_prepend_filename
|
218
|
+
sheet_header << sheetname if $opt_all_sheets
|
219
|
+
generate_csv_row(csvgen, sheet_header + convert_horizontal_borders(sheet_xrange, sheet_yrange, usedrange, 0)) if $opt_border
|
220
|
+
y = 1
|
221
|
+
usedrange.Rows.each do |row|
|
222
|
+
record = []
|
223
|
+
record << convert_vertical_border(sheet_xrange, sheet_yrange, usedrange, y, 0) if $opt_border
|
224
|
+
x = 1
|
225
|
+
row.Columns.each do |cell|
|
226
|
+
v = convert_cell(cell)
|
227
|
+
record << v
|
228
|
+
#record << ' '
|
229
|
+
record << convert_vertical_border(sheet_xrange, sheet_yrange, usedrange, y, x) if $opt_border
|
230
|
+
x += 1
|
231
|
+
end
|
232
|
+
generate_csv_row(csvgen, sheet_header + record)
|
233
|
+
generate_csv_row(csvgen, sheet_header + convert_horizontal_borders(sheet_xrange, sheet_yrange, usedrange, y)) if $opt_border
|
234
|
+
y += 1
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
excel = WIN32OLE.new('Excel.Application')
|
239
|
+
WIN32OLE.const_load(excel,Excel)
|
240
|
+
begin
|
241
|
+
excel.displayAlerts = false
|
242
|
+
Tb.csv_stream_output(STDOUT) {|csvgen|
|
243
|
+
ARGV.each {|excel_filename|
|
244
|
+
if File.directory? excel_filename
|
245
|
+
filenames = []
|
246
|
+
Find.find(excel_filename) {|path| filenames << path if /\.xls\z/ =~ path }
|
247
|
+
else
|
248
|
+
filenames = [excel_filename]
|
249
|
+
end
|
250
|
+
filenames.each {|ifn0|
|
251
|
+
ifn = getAbsolutePath(ifn0)
|
252
|
+
wb = excel.Workbooks.Open(ifn)
|
253
|
+
begin
|
254
|
+
if $opt_all_sheets
|
255
|
+
wb.Worksheets.each {|sheet|
|
256
|
+
sheetname = sheet.Name
|
257
|
+
convert_sheet(ifn0, sheet, csvgen)
|
258
|
+
}
|
259
|
+
else
|
260
|
+
convert_sheet(ifn0, wb.Worksheets(1), csvgen)
|
261
|
+
end
|
262
|
+
ensure
|
263
|
+
wb.Close
|
264
|
+
end
|
265
|
+
}
|
266
|
+
}
|
267
|
+
}
|
268
|
+
ensure
|
269
|
+
excel.Quit
|
270
|
+
end
|
@@ -0,0 +1,397 @@
|
|
1
|
+
#!/usr/bin/jruby
|
2
|
+
|
3
|
+
# sample/poi-xls2csv.rb - XLS to CSV convert using Apache POI with JRuby.
|
4
|
+
#
|
5
|
+
# Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
|
6
|
+
#
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
9
|
+
#
|
10
|
+
# 1. Redistributions of source code must retain the above copyright notice, this
|
11
|
+
# list of conditions and the following disclaimer.
|
12
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
14
|
+
# and/or other materials provided with the distribution.
|
15
|
+
# 3. The name of the author may not be used to endorse or promote products
|
16
|
+
# derived from this software without specific prior written permission.
|
17
|
+
#
|
18
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
19
|
+
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
20
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
21
|
+
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
22
|
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
23
|
+
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
26
|
+
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
27
|
+
# OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
$KCODE = 'u'
|
30
|
+
|
31
|
+
require 'tb'
|
32
|
+
require 'optparse'
|
33
|
+
|
34
|
+
require 'jakarta-poi.jar'
|
35
|
+
Java.include_class 'org.apache.poi.hssf.usermodel.HSSFWorkbook'
|
36
|
+
Java.include_class 'org.apache.poi.hssf.usermodel.HSSFDateUtil'
|
37
|
+
Java.include_class 'org.apache.poi.hssf.usermodel.HSSFCellStyle'
|
38
|
+
|
39
|
+
$opt_all_sheets = false
|
40
|
+
$opt_prepend_filename = false
|
41
|
+
$opt_mergecells = 'fill'
|
42
|
+
$opt_border = false
|
43
|
+
$opt_type = false
|
44
|
+
|
45
|
+
op = OptionParser.new
|
46
|
+
op.def_option('-h', 'show help message') { puts op; exit true }
|
47
|
+
op.def_option('-a', 'convert all sheets (sheet name is prepended to all rows)') { $opt_all_sheets = true }
|
48
|
+
op.def_option('-f', 'prepend filename for all rows') { $opt_prepend_filename = true }
|
49
|
+
op.def_option('--mergecells=mode', '"fill" or "topleft"') {|v| $opt_mergecells = v }
|
50
|
+
op.def_option('--border', 'extract borders') { $opt_border = true }
|
51
|
+
op.def_option('-t', '--type', 'add type suffix') { $opt_type = true }
|
52
|
+
op.parse!(ARGV)
|
53
|
+
|
54
|
+
# http://sc.openoffice.org/excelfileformat.pdf 5.49 FORMAT
|
55
|
+
ExcelDateStyles = [
|
56
|
+
15, 16, 17, 18, 19, 20, 21, 27, 28, 29,
|
57
|
+
30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 49,
|
58
|
+
50, 51, 52, 53, 54, 55, 56, 57, 58,
|
59
|
+
1432, 2232,
|
60
|
+
]
|
61
|
+
ExcelDateStylesHash = {}
|
62
|
+
ExcelDateStyles.each {|i| ExcelDateStylesHash[i] = true }
|
63
|
+
|
64
|
+
BorderName2Num = {}
|
65
|
+
BorderNum2Name = {}
|
66
|
+
Java::HSSFCellStyle::constants.each {|n|
|
67
|
+
next if /\ABORDER_/ !~ n
|
68
|
+
suffix = $'
|
69
|
+
BorderNum2Name[Java::HSSFCellStyle::const_get(n)] = suffix
|
70
|
+
BorderName2Num[suffix] = Java::HSSFCellStyle::const_get(n)
|
71
|
+
}
|
72
|
+
|
73
|
+
def convert_single_cell(cell)
|
74
|
+
if !cell
|
75
|
+
return nil
|
76
|
+
end
|
77
|
+
style = cell.getCellStyle
|
78
|
+
case cell.getCellType
|
79
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_NUMERIC
|
80
|
+
if Java::HSSFDateUtil.isCellDateFormatted(cell) ||
|
81
|
+
ExcelDateStylesHash[style.getDataFormat]
|
82
|
+
d = cell.getDateCellValue
|
83
|
+
val = "%d-%02d-%02d %02d:%02d:%02d" % [
|
84
|
+
d.getYear+1900, d.getMonth+1, d.getDate, d.getHours, d.getMinutes, d.getSeconds
|
85
|
+
]
|
86
|
+
val = val + ":date" if $opt_type
|
87
|
+
else
|
88
|
+
val = cell.getNumericCellValue
|
89
|
+
val = val.to_s + ":numeric" if $opt_type
|
90
|
+
end
|
91
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_STRING
|
92
|
+
str = cell.getRichStringCellValue.getString
|
93
|
+
val = str
|
94
|
+
val = val + ":string" if $opt_type
|
95
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_FORMULA
|
96
|
+
val = cell.getCellFormula
|
97
|
+
val = val.to_s + ":formula" if $opt_type
|
98
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_BLANK
|
99
|
+
val = nil
|
100
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_BOOLEAN
|
101
|
+
val = cell.getBooleanCellValue
|
102
|
+
val = val.to_s + ":boolean" if $opt_type
|
103
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_ERROR
|
104
|
+
val = "\#ERR#{cell.getErrorCellValue}"
|
105
|
+
val = val + ":error" if $opt_type
|
106
|
+
else
|
107
|
+
raise "unexpected cell type: #{cell.getCellType.inspect}"
|
108
|
+
end
|
109
|
+
val
|
110
|
+
end
|
111
|
+
|
112
|
+
def convert_cell(sheet, merged, row, x, y)
|
113
|
+
if merged[[x,y]]
|
114
|
+
x1, y1, x2, y2 = merged[[x,y]]
|
115
|
+
topleft_cell = sheet.getRow(y1).getCell(x1)
|
116
|
+
if $opt_mergecells == 'topleft'
|
117
|
+
if x == x1 && y == y1
|
118
|
+
val = convert_single_cell(topleft_cell)
|
119
|
+
else
|
120
|
+
val = nil
|
121
|
+
end
|
122
|
+
else
|
123
|
+
val = convert_single_cell(topleft_cell)
|
124
|
+
end
|
125
|
+
if $opt_type
|
126
|
+
val = val.to_s + ":mergedarea(#{x2-x1+1}x#{y2-y1+1},#{x1+1},#{y1+1})"
|
127
|
+
end
|
128
|
+
else
|
129
|
+
val = convert_single_cell(row.getCell(x))
|
130
|
+
end
|
131
|
+
val
|
132
|
+
end
|
133
|
+
|
134
|
+
def get_merged_regions(sheet)
|
135
|
+
merged = {}
|
136
|
+
sheet.getNumMergedRegions.times {|j|
|
137
|
+
r = sheet.getMergedRegionAt(j)
|
138
|
+
x1 = r.getColumnFrom
|
139
|
+
y1 = r.getRowFrom
|
140
|
+
x2 = r.getColumnTo
|
141
|
+
y2 = r.getRowTo
|
142
|
+
rid = [x1, y1, x2, y2]
|
143
|
+
y1.upto(y2) {|y|
|
144
|
+
x1.upto(x2) {|x|
|
145
|
+
merged[[x,y]] = rid
|
146
|
+
}
|
147
|
+
}
|
148
|
+
}
|
149
|
+
merged
|
150
|
+
end
|
151
|
+
|
152
|
+
def bordertype(border)
|
153
|
+
case border
|
154
|
+
when Java::HSSFCellStyle::BORDER_NONE then "border(none)"
|
155
|
+
when Java::HSSFCellStyle::BORDER_THIN then "border(thin)"
|
156
|
+
when Java::HSSFCellStyle::BORDER_MEDIUM then "border(medium)"
|
157
|
+
when Java::HSSFCellStyle::BORDER_DASHED then "border(dashed)"
|
158
|
+
when Java::HSSFCellStyle::BORDER_DOTTED then "border(dotted)"
|
159
|
+
when Java::HSSFCellStyle::BORDER_THICK then "border(thick)"
|
160
|
+
when Java::HSSFCellStyle::BORDER_DOUBLE then "border(double)"
|
161
|
+
when Java::HSSFCellStyle::BORDER_HAIR then "border(hair)"
|
162
|
+
when Java::HSSFCellStyle::BORDER_MEDIUM_DASHED then "border(medium_dashed)"
|
163
|
+
when Java::HSSFCellStyle::BORDER_DASH_DOT then "border(dash_dot)"
|
164
|
+
when Java::HSSFCellStyle::BORDER_MEDIUM_DASH_DOT then "border(medium_dash_dot)"
|
165
|
+
when Java::HSSFCellStyle::BORDER_DASH_DOT_DOT then "border(dash_dot_dot)"
|
166
|
+
when Java::HSSFCellStyle::BORDER_MEDIUM_DASH_DOT_DOT then "border(medium_dash_dot_dot)"
|
167
|
+
when Java::HSSFCellStyle::BORDER_SLANTED_DASH_DOT then "border(slanted_dash_dot)"
|
168
|
+
else
|
169
|
+
"border(#{border})"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def convert_horizontal_borders(sheet, merged, upper_y, min_firstcol)
|
174
|
+
rownums = sheet.getFirstRowNum..sheet.getLastRowNum
|
175
|
+
lower_y = upper_y+1
|
176
|
+
min = 0
|
177
|
+
max = 0
|
178
|
+
if rownums.include? upper_y and
|
179
|
+
upper_row = sheet.getRow(upper_y) and
|
180
|
+
(upper_cellrange = upper_row.getFirstCellNum...upper_row.getLastCellNum) and
|
181
|
+
upper_cellrange.begin != -1 and
|
182
|
+
upper_cellrange.end != -1
|
183
|
+
if max < (upper_cellrange.end-min_firstcol)*2
|
184
|
+
max = (upper_cellrange.end-min_firstcol)*2
|
185
|
+
end
|
186
|
+
end
|
187
|
+
if rownums.include? lower_y and
|
188
|
+
lower_row = sheet.getRow(lower_y) and
|
189
|
+
(lower_cellrange = lower_row.getFirstCellNum...lower_row.getLastCellNum) and
|
190
|
+
lower_cellrange.begin != -1 and
|
191
|
+
lower_cellrange.end != -1
|
192
|
+
if max < (lower_cellrange.end-min_firstcol)*2
|
193
|
+
max = (lower_cellrange.end-min_firstcol)*2
|
194
|
+
end
|
195
|
+
end
|
196
|
+
ary = []
|
197
|
+
min.upto(max) {|i|
|
198
|
+
if (i & 1) == 0
|
199
|
+
# border
|
200
|
+
right_x = min_firstcol + i / 2
|
201
|
+
left_x = right_x - 1
|
202
|
+
upper_line = lower_line = left_line = right_line = false
|
203
|
+
if upper_row
|
204
|
+
if !merged[[left_x, upper_y]] || !merged[[right_x, upper_y]] ||
|
205
|
+
merged[[left_x, upper_y]] != merged[[right_x, upper_y]]
|
206
|
+
if !upper_line && upper_cellrange.include?(left_x) &&
|
207
|
+
(upperleft_cell = upper_row.getCell(left_x)) &&
|
208
|
+
upperleft_cell.getCellStyle.getBorderRight != Java::HSSFCellStyle::BORDER_NONE
|
209
|
+
upper_line = true
|
210
|
+
end
|
211
|
+
if !upper_line && upper_cellrange.include?(right_x) &&
|
212
|
+
(upperright_cell = upper_row.getCell(right_x)) &&
|
213
|
+
upperright_cell.getCellStyle.getBorderLeft != Java::HSSFCellStyle::BORDER_NONE
|
214
|
+
upper_line = true
|
215
|
+
end
|
216
|
+
end
|
217
|
+
if !merged[[left_x, upper_y]] || !merged[[left_x, lower_y]] ||
|
218
|
+
merged[[left_x, upper_y]] != merged[[left_x, lower_y]]
|
219
|
+
if !left_line && upper_cellrange.include?(left_x) &&
|
220
|
+
(upperleft_cell = upper_row.getCell(left_x)) &&
|
221
|
+
upperleft_cell.getCellStyle.getBorderBottom != Java::HSSFCellStyle::BORDER_NONE
|
222
|
+
left_line = true
|
223
|
+
end
|
224
|
+
end
|
225
|
+
if !merged[[right_x, upper_y]] || !merged[[right_x, lower_y]] ||
|
226
|
+
merged[[right_x, upper_y]] != merged[[right_x, lower_y]]
|
227
|
+
if !right_line && upper_cellrange.include?(right_x) &&
|
228
|
+
(upperright_cell = upper_row.getCell(right_x)) &&
|
229
|
+
upperright_cell.getCellStyle.getBorderBottom != Java::HSSFCellStyle::BORDER_NONE
|
230
|
+
right_line = true
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
if lower_row
|
235
|
+
if !merged[[left_x, lower_y]] || !merged[[right_x, lower_y]] ||
|
236
|
+
merged[[left_x, lower_y]] != merged[[right_x, lower_y]]
|
237
|
+
if !lower_line && lower_cellrange.include?(left_x) &&
|
238
|
+
(lowerleft_cell = lower_row.getCell(left_x)) &&
|
239
|
+
lowerleft_cell.getCellStyle.getBorderRight != Java::HSSFCellStyle::BORDER_NONE
|
240
|
+
lower_line = true
|
241
|
+
end
|
242
|
+
if !lower_line && lower_cellrange.include?(right_x) &&
|
243
|
+
(lowerright_cell = lower_row.getCell(right_x)) &&
|
244
|
+
lowerright_cell.getCellStyle.getBorderLeft != Java::HSSFCellStyle::BORDER_NONE
|
245
|
+
lower_line = true
|
246
|
+
end
|
247
|
+
end
|
248
|
+
if !merged[[left_x, upper_y]] || !merged[[left_x, lower_y]] ||
|
249
|
+
merged[[left_x, upper_y]] != merged[[left_x, lower_y]]
|
250
|
+
if !left_line && lower_cellrange.include?(left_x) &&
|
251
|
+
(lowerleft_cell = lower_row.getCell(left_x)) &&
|
252
|
+
lowerleft_cell.getCellStyle.getBorderTop != Java::HSSFCellStyle::BORDER_NONE
|
253
|
+
left_line = true
|
254
|
+
end
|
255
|
+
end
|
256
|
+
if !merged[[right_x, upper_y]] || !merged[[right_x, lower_y]] ||
|
257
|
+
merged[[right_x, upper_y]] != merged[[right_x, lower_y]]
|
258
|
+
if !right_line && lower_cellrange.include?(right_x) &&
|
259
|
+
(lowerright_cell = lower_row.getCell(right_x)) &&
|
260
|
+
lowerright_cell.getCellStyle.getBorderTop != Java::HSSFCellStyle::BORDER_NONE
|
261
|
+
right_line = true
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
if upper_line && lower_line && !left_line && !right_line
|
266
|
+
joint = '|'
|
267
|
+
elsif !upper_line && !lower_line && left_line && right_line
|
268
|
+
joint = '-'
|
269
|
+
elsif upper_line || lower_line || left_line || right_line
|
270
|
+
joint = '+'
|
271
|
+
else
|
272
|
+
joint = nil
|
273
|
+
end
|
274
|
+
#joint ||= ' '
|
275
|
+
joint = joint + ":joint" if joint && $opt_type
|
276
|
+
ary << joint
|
277
|
+
else
|
278
|
+
# cell
|
279
|
+
hborder = nil
|
280
|
+
cell_x = min_firstcol + i / 2
|
281
|
+
if !merged[[cell_x, upper_y]] || !merged[[cell_x, lower_y]] ||
|
282
|
+
merged[[cell_x, upper_y]] != merged[[cell_x, lower_y]]
|
283
|
+
if !hborder && upper_row && upper_cellrange.include?(cell_x) &&
|
284
|
+
(upper_cell = upper_row.getCell(cell_x)) &&
|
285
|
+
upper_cell.getCellStyle.getBorderBottom != Java::HSSFCellStyle::BORDER_NONE
|
286
|
+
hborder = '-'
|
287
|
+
hborder = hborder + ":#{bordertype(upper_cell.getCellStyle.getBorderBottom)}" if $opt_type
|
288
|
+
end
|
289
|
+
if !hborder && lower_row && lower_cellrange.include?(cell_x) &&
|
290
|
+
(lower_cell = lower_row.getCell(cell_x)) &&
|
291
|
+
lower_cell.getCellStyle.getBorderTop != Java::HSSFCellStyle::BORDER_NONE
|
292
|
+
hborder = '-'
|
293
|
+
hborder = hborder + ":#{bordertype(lower_cell.getCellStyle.getBorderTop)}" if $opt_type
|
294
|
+
end
|
295
|
+
end
|
296
|
+
#hborder ||= ' '
|
297
|
+
ary << hborder
|
298
|
+
end
|
299
|
+
}
|
300
|
+
ary
|
301
|
+
end
|
302
|
+
|
303
|
+
def convert_vertical_border(sheet, merged, cell_y, left_x)
|
304
|
+
right_x = left_x+1
|
305
|
+
row = sheet.getRow(cell_y)
|
306
|
+
return nil if !row
|
307
|
+
cellrange = row.getFirstCellNum...row.getLastCellNum
|
308
|
+
return nil if cellrange.begin == -1 || cellrange.end == -1
|
309
|
+
vborder = nil
|
310
|
+
if !merged[[left_x, cell_y]] || !merged[[right_x, cell_y]] ||
|
311
|
+
merged[[left_x, cell_y]] != merged[[right_x, cell_y]]
|
312
|
+
if !vborder && cellrange.include?(left_x) &&
|
313
|
+
(left_cell = row.getCell(left_x)) &&
|
314
|
+
left_cell.getCellStyle.getBorderRight != Java::HSSFCellStyle::BORDER_NONE
|
315
|
+
vborder = '|'
|
316
|
+
vborder = vborder + ":#{bordertype(left_cell.getCellStyle.getBorderRight)}" if $opt_type
|
317
|
+
end
|
318
|
+
if !vborder && cellrange.include?(right_x) &&
|
319
|
+
(right_cell = row.getCell(right_x)) &&
|
320
|
+
right_cell.getCellStyle.getBorderLeft != Java::HSSFCellStyle::BORDER_NONE
|
321
|
+
vborder = '|'
|
322
|
+
vborder = vborder + ":#{bordertype(right_cell.getCellStyle.getBorderLeft)}" if $opt_type
|
323
|
+
end
|
324
|
+
end
|
325
|
+
#vborder ||= ' '
|
326
|
+
vborder
|
327
|
+
end
|
328
|
+
|
329
|
+
def convert_sheet(filename, book, i, csvgen)
|
330
|
+
sheet = book.getSheetAt(i)
|
331
|
+
sheetname = book.getSheetName(i)
|
332
|
+
merged = get_merged_regions(sheet)
|
333
|
+
rownums = 0..sheet.getLastRowNum
|
334
|
+
min_firstcol = 0
|
335
|
+
max_lastcol = rownums.map {|y|
|
336
|
+
if !(row = sheet.getRow(y))
|
337
|
+
nil
|
338
|
+
elsif (n = row.getLastCellNum) == -1
|
339
|
+
nil
|
340
|
+
else
|
341
|
+
n-1
|
342
|
+
end
|
343
|
+
}.compact.max
|
344
|
+
sheet_header = []
|
345
|
+
if $opt_prepend_filename
|
346
|
+
filename += ":filename" if $opt_type
|
347
|
+
sheet_header << filename
|
348
|
+
end
|
349
|
+
if $opt_all_sheets
|
350
|
+
sheetname += ":sheetname" if $opt_type
|
351
|
+
sheet_header << sheetname
|
352
|
+
end
|
353
|
+
csvgen << (sheet_header + convert_horizontal_borders(sheet, merged, rownums.first-1, min_firstcol)) if $opt_border
|
354
|
+
rownums.each {|y|
|
355
|
+
record = []
|
356
|
+
row = sheet.getRow(y)
|
357
|
+
if row
|
358
|
+
row_cellrange = row.getFirstCellNum...row.getLastCellNum
|
359
|
+
if row_cellrange.begin != -1 && row_cellrange.end != -1
|
360
|
+
record << convert_vertical_border(sheet, merged, y, min_firstcol-1) if $opt_border
|
361
|
+
min_firstcol.upto(row_cellrange.end-1) {|x|
|
362
|
+
val = row_cellrange.include?(x) ? convert_cell(sheet, merged, row, x, y) : nil
|
363
|
+
record << val
|
364
|
+
#record << ' '
|
365
|
+
record << convert_vertical_border(sheet, merged, y, x) if $opt_border
|
366
|
+
}
|
367
|
+
end
|
368
|
+
end
|
369
|
+
csvgen << (sheet_header + record)
|
370
|
+
csvgen << (sheet_header + convert_horizontal_borders(sheet, merged, y, min_firstcol)) if $opt_border
|
371
|
+
}
|
372
|
+
end
|
373
|
+
|
374
|
+
def convert_book(filename, input, csvgen)
|
375
|
+
book = Java::HSSFWorkbook.new(input)
|
376
|
+
if $opt_all_sheets
|
377
|
+
0.upto(book.getNumberOfSheets-1) {|i|
|
378
|
+
convert_sheet(filename, book, i, csvgen)
|
379
|
+
}
|
380
|
+
else
|
381
|
+
convert_sheet(filename, book, 0, csvgen)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
Tb.csv_stream_output(STDOUT) {|csvgen|
|
386
|
+
argv = ARGV.empty? ? ['-'] : ARGV
|
387
|
+
argv.each {|filename|
|
388
|
+
if filename == '-'
|
389
|
+
input = java.lang.System.in
|
390
|
+
else
|
391
|
+
input = java.io.FileInputStream.new(filename)
|
392
|
+
end
|
393
|
+
convert_book(filename, input, csvgen)
|
394
|
+
}
|
395
|
+
}
|
396
|
+
|
397
|
+
exit true
|