tb 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +60 -0
- data/bin/tb +1137 -0
- data/lib/tb.rb +35 -0
- data/lib/tb/basic.rb +1071 -0
- data/lib/tb/csv.rb +125 -0
- data/lib/tb/enumerable.rb +284 -0
- data/lib/tb/fieldset.rb +96 -0
- data/lib/tb/pathfinder.rb +569 -0
- data/lib/tb/qtsv.rb +93 -0
- data/lib/tb/reader.rb +213 -0
- data/lib/tb/record.rb +129 -0
- data/lib/tb/tsv.rb +93 -0
- data/sample/excel2csv +270 -0
- data/sample/poi-xls2csv.rb +397 -0
- data/sample/poi-xls2csv.sh +39 -0
- data/test-all.rb +7 -0
- data/test/test_basic.rb +290 -0
- data/test/test_csv.rb +78 -0
- data/test/test_enumerable.rb +122 -0
- data/test/test_record.rb +12 -0
- data/test/test_tsv.rb +41 -0
- metadata +73 -0
data/sample/excel2csv
ADDED
@@ -0,0 +1,270 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# sample/excel2csv - XLS to CSV converter using Microsoft Excel via WIN32OLE.
|
4
|
+
#
|
5
|
+
# Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
|
6
|
+
#
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
9
|
+
#
|
10
|
+
# 1. Redistributions of source code must retain the above copyright notice, this
|
11
|
+
# list of conditions and the following disclaimer.
|
12
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
14
|
+
# and/or other materials provided with the distribution.
|
15
|
+
# 3. The name of the author may not be used to endorse or promote products
|
16
|
+
# derived from this software without specific prior written permission.
|
17
|
+
#
|
18
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
19
|
+
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
20
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
21
|
+
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
22
|
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
23
|
+
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
26
|
+
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
27
|
+
# OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
# works only on Windows with Microsoft Excel.
|
30
|
+
# (This script invokes Excel using win32ole.)
|
31
|
+
|
32
|
+
# usage:
|
33
|
+
# excel2csv [opts] filename.xls ...
|
34
|
+
#
|
35
|
+
# options:
|
36
|
+
# -h : show help message
|
37
|
+
# -a : convert all sheets (sheet name is prepended to all rows)
|
38
|
+
# -f : prepend filename for all rows
|
39
|
+
# --mergecells=fill : set the value for all cells in a merged cells
|
40
|
+
# --mergecells=topleft : set the value for the top left cell in a merged cells
|
41
|
+
|
42
|
+
require 'tb'
|
43
|
+
|
44
|
+
require 'win32ole'
|
45
|
+
require 'optparse'
|
46
|
+
require 'find'
|
47
|
+
require 'pp'
|
48
|
+
|
49
|
+
def getAbsolutePath(filename)
|
50
|
+
fso = WIN32OLE.new('Scripting.FileSystemObject')
|
51
|
+
return fso.GetAbsolutePathName(filename)
|
52
|
+
end
|
53
|
+
|
54
|
+
module Excel
|
55
|
+
end
|
56
|
+
|
57
|
+
$opt_all_sheets = false
|
58
|
+
$opt_prepend_filename = false
|
59
|
+
$opt_mergecells = 'fill'
|
60
|
+
$opt_border = false
|
61
|
+
|
62
|
+
op = OptionParser.new
|
63
|
+
op.def_option('-h', 'show help message') { puts op; exit true }
|
64
|
+
op.def_option('-a', 'convert all sheets (sheet name is prepended to all rows)') { $opt_all_sheets = true }
|
65
|
+
op.def_option('-f', 'prepend filename for all rows') { $opt_prepend_filename = true }
|
66
|
+
op.def_option('--mergecells=mode', '"fill" or "topleft"') {|v| $opt_mergecells = v }
|
67
|
+
op.def_option('--border', 'extract borders') { $opt_border = true }
|
68
|
+
op.parse!(ARGV)
|
69
|
+
|
70
|
+
def convert_cell(cell)
|
71
|
+
if !cell.MergeCells
|
72
|
+
v = cell.Value
|
73
|
+
else
|
74
|
+
c_row = cell.Row
|
75
|
+
c_col = cell.Column
|
76
|
+
mergearea = cell.MergeArea
|
77
|
+
m_row = mergearea.Row
|
78
|
+
m_col = mergearea.Column
|
79
|
+
m_numrows = mergearea.Rows.Count
|
80
|
+
m_numcols = mergearea.Columns.Count
|
81
|
+
val = cell.MergeArea.Item(1,1).Value
|
82
|
+
if $opt_mergecells == 'topleft'
|
83
|
+
if c_row == m_row && c_col == m_col
|
84
|
+
v = val
|
85
|
+
else
|
86
|
+
v = nil
|
87
|
+
end
|
88
|
+
else
|
89
|
+
v = val
|
90
|
+
end
|
91
|
+
end
|
92
|
+
v
|
93
|
+
end
|
94
|
+
|
95
|
+
def convert_horizontal_borders(sheet_xrange, sheet_yrange, usedrange, upper_y)
|
96
|
+
# upper_y is base 1.
|
97
|
+
lower_y = upper_y+1
|
98
|
+
min = 0
|
99
|
+
max = usedrange.Columns.Count*2
|
100
|
+
usedrange_xoff = usedrange.Column-1
|
101
|
+
usedrange_yoff = usedrange.Row-1
|
102
|
+
usedrange_width = usedrange.Columns.Count
|
103
|
+
usedrange_height = usedrange.Rows.Count
|
104
|
+
ary = []
|
105
|
+
min.upto(max) {|i|
|
106
|
+
if (i & 1) == 0
|
107
|
+
# border
|
108
|
+
left_x = i / 2
|
109
|
+
right_x = left_x + 1
|
110
|
+
upperleft_cell = (sheet_yrange.include?(usedrange_yoff+upper_y) && sheet_yrange.include?(usedrange_xoff+left_x)) ?
|
111
|
+
usedrange.Cells(upper_y, left_x) : nil
|
112
|
+
upperright_cell = (sheet_yrange.include?(usedrange_yoff+upper_y) && sheet_xrange.include?(usedrange_xoff+right_x)) ?
|
113
|
+
usedrange.Cells(upper_y, right_x) : nil
|
114
|
+
lowerleft_cell = (sheet_yrange.include?(usedrange_yoff+lower_y) && sheet_xrange.include?(usedrange_xoff+left_x)) ?
|
115
|
+
usedrange.Cells(lower_y, left_x) : nil
|
116
|
+
lowerright_cell = (sheet_yrange.include?(usedrange_yoff+lower_y) && sheet_xrange.include?(usedrange_xoff+right_x)) ?
|
117
|
+
usedrange.Cells(lower_y, right_x) : nil
|
118
|
+
upper_line = lower_line = left_line = right_line = false
|
119
|
+
if (!upperleft_cell || !upperright_cell ||
|
120
|
+
!upperleft_cell.MergeCells || !upperright_cell.MergeCells ||
|
121
|
+
upperleft_cell.MergeArea.Row != upperright_cell.MergeArea.Row ||
|
122
|
+
upperleft_cell.MergeArea.Column != upperright_cell.MergeArea.Column) &&
|
123
|
+
((upperleft_cell && upperleft_cell.borders(Excel::XlEdgeRight).linestyle != Excel::XlNone) ||
|
124
|
+
(upperright_cell && upperright_cell.borders(Excel::XlEdgeLeft).linestyle != Excel::XlNone))
|
125
|
+
upper_line = true
|
126
|
+
end
|
127
|
+
if (!lowerleft_cell || !lowerright_cell ||
|
128
|
+
!lowerleft_cell.MergeCells || !lowerright_cell.MergeCells ||
|
129
|
+
lowerleft_cell.MergeArea.Row != lowerright_cell.MergeArea.Row ||
|
130
|
+
lowerleft_cell.MergeArea.Column != lowerright_cell.MergeArea.Column) &&
|
131
|
+
((lowerleft_cell && lowerleft_cell.borders(Excel::XlEdgeRight).linestyle != Excel::XlNone) ||
|
132
|
+
(lowerright_cell && lowerright_cell.borders(Excel::XlEdgeLeft).linestyle != Excel::XlNone))
|
133
|
+
lower_line = true
|
134
|
+
end
|
135
|
+
if (!upperleft_cell || !lowerleft_cell ||
|
136
|
+
!upperleft_cell.MergeCells || !lowerleft_cell.MergeCells ||
|
137
|
+
upperleft_cell.MergeArea.Row != lowerleft_cell.MergeArea.Row ||
|
138
|
+
upperleft_cell.MergeArea.Column != lowerleft_cell.MergeArea.Column) &&
|
139
|
+
((upperleft_cell && upperleft_cell.borders(Excel::XlEdgeBottom).linestyle != Excel::XlNone) ||
|
140
|
+
(lowerleft_cell && lowerleft_cell.borders(Excel::XlEdgeTop).linestyle != Excel::XlNone))
|
141
|
+
left_line = true
|
142
|
+
end
|
143
|
+
if (!upperright_cell || !lowerright_cell ||
|
144
|
+
!upperright_cell.MergeCells || !lowerright_cell.MergeCells ||
|
145
|
+
upperright_cell.MergeArea.Row != lowerright_cell.MergeArea.Row ||
|
146
|
+
upperright_cell.MergeArea.Column != lowerright_cell.MergeArea.Column) &&
|
147
|
+
((upperright_cell && upperright_cell.borders(Excel::XlEdgeBottom).linestyle != Excel::XlNone) ||
|
148
|
+
(lowerright_cell && lowerright_cell.borders(Excel::XlEdgeTop).linestyle != Excel::XlNone))
|
149
|
+
right_line = true
|
150
|
+
end
|
151
|
+
if upper_line && lower_line && !left_line && !right_line
|
152
|
+
joint = '|'
|
153
|
+
elsif !upper_line && !lower_line && left_line && right_line
|
154
|
+
joint = '-'
|
155
|
+
elsif upper_line || lower_line || left_line || right_line
|
156
|
+
joint = '+'
|
157
|
+
else
|
158
|
+
joint = nil
|
159
|
+
end
|
160
|
+
ary << joint
|
161
|
+
else
|
162
|
+
# cell
|
163
|
+
hborder = nil
|
164
|
+
cell_x = (i+1) / 2
|
165
|
+
upper_cell = (0 < upper_y) ? usedrange.Cells(upper_y, cell_x) : nil
|
166
|
+
lower_cell = (0 < lower_y) ? usedrange.Cells(lower_y, cell_x) : nil
|
167
|
+
if (!upper_cell || !lower_cell ||
|
168
|
+
!upper_cell.MergeCells || !lower_cell.MergeCells ||
|
169
|
+
upper_cell.MergeArea.Row != lower_cell.MergeArea.Row ||
|
170
|
+
upper_cell.MergeArea.Column != lower_cell.MergeArea.Column) &&
|
171
|
+
((upper_cell && upper_cell.borders(Excel::XlEdgeBottom).linestyle != Excel::XlNone) ||
|
172
|
+
(lower_cell && lower_cell.borders(Excel::XlEdgeTop).linestyle != Excel::XlNone))
|
173
|
+
hborder = '-'
|
174
|
+
end
|
175
|
+
ary << hborder
|
176
|
+
end
|
177
|
+
}
|
178
|
+
#puts ary.map {|c| c || '.' }.join('')
|
179
|
+
#ary = ary.map {|c| c || ' ' }
|
180
|
+
ary
|
181
|
+
end
|
182
|
+
|
183
|
+
def convert_vertical_border(sheet_xrange, sheet_yrange, usedrange, y, left_x)
|
184
|
+
usedrange_xoff = usedrange.Column-1
|
185
|
+
right_x = left_x+1
|
186
|
+
left_cell = sheet_xrange.include?(usedrange_xoff+left_x) ? usedrange.Cells(y, left_x) : nil
|
187
|
+
right_cell = sheet_xrange.include?(usedrange_xoff+right_x) ? usedrange.Cells(y, right_x) : nil
|
188
|
+
vborder = nil
|
189
|
+
if (!left_cell || !right_cell ||
|
190
|
+
!left_cell.MergeCells || !right_cell.MergeCells ||
|
191
|
+
left_cell.MergeArea.Row != right_cell.MergeArea.Row ||
|
192
|
+
left_cell.MergeArea.Column != right_cell.MergeArea.Column) &&
|
193
|
+
((left_cell && left_cell.borders(Excel::XlEdgeRight).linestyle != Excel::XlNone) ||
|
194
|
+
(right_cell && right_cell.borders(Excel::XlEdgeLeft).linestyle != Excel::XlNone))
|
195
|
+
vborder = '|'
|
196
|
+
end
|
197
|
+
#vborder ||= ' '
|
198
|
+
vborder
|
199
|
+
end
|
200
|
+
|
201
|
+
def generate_csv_row(csvgen, row)
|
202
|
+
row = row.dup
|
203
|
+
row.pop while !row.empty? && row.last.nil?
|
204
|
+
csvgen << row
|
205
|
+
end
|
206
|
+
|
207
|
+
def convert_sheet(filename, sheet, csvgen)
|
208
|
+
sheetname = sheet.Name
|
209
|
+
allrange = sheet.Cells
|
210
|
+
sheet_xrange = 1..(allrange.Columns.Count)
|
211
|
+
sheet_yrange = 1..(allrange.Rows.Count)
|
212
|
+
usedrange = sheet.UsedRange
|
213
|
+
cell_ul = sheet.Cells(1,1)
|
214
|
+
cell_br = sheet.Cells(usedrange.Row+usedrange.Rows.Count-1, usedrange.Column+usedrange.Columns.Count-1)
|
215
|
+
usedrange = sheet.Range(cell_ul, cell_br)
|
216
|
+
sheet_header = []
|
217
|
+
sheet_header << filename if $opt_prepend_filename
|
218
|
+
sheet_header << sheetname if $opt_all_sheets
|
219
|
+
generate_csv_row(csvgen, sheet_header + convert_horizontal_borders(sheet_xrange, sheet_yrange, usedrange, 0)) if $opt_border
|
220
|
+
y = 1
|
221
|
+
usedrange.Rows.each do |row|
|
222
|
+
record = []
|
223
|
+
record << convert_vertical_border(sheet_xrange, sheet_yrange, usedrange, y, 0) if $opt_border
|
224
|
+
x = 1
|
225
|
+
row.Columns.each do |cell|
|
226
|
+
v = convert_cell(cell)
|
227
|
+
record << v
|
228
|
+
#record << ' '
|
229
|
+
record << convert_vertical_border(sheet_xrange, sheet_yrange, usedrange, y, x) if $opt_border
|
230
|
+
x += 1
|
231
|
+
end
|
232
|
+
generate_csv_row(csvgen, sheet_header + record)
|
233
|
+
generate_csv_row(csvgen, sheet_header + convert_horizontal_borders(sheet_xrange, sheet_yrange, usedrange, y)) if $opt_border
|
234
|
+
y += 1
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
excel = WIN32OLE.new('Excel.Application')
|
239
|
+
WIN32OLE.const_load(excel,Excel)
|
240
|
+
begin
|
241
|
+
excel.displayAlerts = false
|
242
|
+
Tb.csv_stream_output(STDOUT) {|csvgen|
|
243
|
+
ARGV.each {|excel_filename|
|
244
|
+
if File.directory? excel_filename
|
245
|
+
filenames = []
|
246
|
+
Find.find(excel_filename) {|path| filenames << path if /\.xls\z/ =~ path }
|
247
|
+
else
|
248
|
+
filenames = [excel_filename]
|
249
|
+
end
|
250
|
+
filenames.each {|ifn0|
|
251
|
+
ifn = getAbsolutePath(ifn0)
|
252
|
+
wb = excel.Workbooks.Open(ifn)
|
253
|
+
begin
|
254
|
+
if $opt_all_sheets
|
255
|
+
wb.Worksheets.each {|sheet|
|
256
|
+
sheetname = sheet.Name
|
257
|
+
convert_sheet(ifn0, sheet, csvgen)
|
258
|
+
}
|
259
|
+
else
|
260
|
+
convert_sheet(ifn0, wb.Worksheets(1), csvgen)
|
261
|
+
end
|
262
|
+
ensure
|
263
|
+
wb.Close
|
264
|
+
end
|
265
|
+
}
|
266
|
+
}
|
267
|
+
}
|
268
|
+
ensure
|
269
|
+
excel.Quit
|
270
|
+
end
|
@@ -0,0 +1,397 @@
|
|
1
|
+
#!/usr/bin/jruby
|
2
|
+
|
3
|
+
# sample/poi-xls2csv.rb - XLS to CSV convert using Apache POI with JRuby.
|
4
|
+
#
|
5
|
+
# Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
|
6
|
+
#
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
9
|
+
#
|
10
|
+
# 1. Redistributions of source code must retain the above copyright notice, this
|
11
|
+
# list of conditions and the following disclaimer.
|
12
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
14
|
+
# and/or other materials provided with the distribution.
|
15
|
+
# 3. The name of the author may not be used to endorse or promote products
|
16
|
+
# derived from this software without specific prior written permission.
|
17
|
+
#
|
18
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
19
|
+
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
20
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
21
|
+
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
22
|
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
23
|
+
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
26
|
+
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
27
|
+
# OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
$KCODE = 'u'
|
30
|
+
|
31
|
+
require 'tb'
|
32
|
+
require 'optparse'
|
33
|
+
|
34
|
+
require 'jakarta-poi.jar'
|
35
|
+
Java.include_class 'org.apache.poi.hssf.usermodel.HSSFWorkbook'
|
36
|
+
Java.include_class 'org.apache.poi.hssf.usermodel.HSSFDateUtil'
|
37
|
+
Java.include_class 'org.apache.poi.hssf.usermodel.HSSFCellStyle'
|
38
|
+
|
39
|
+
$opt_all_sheets = false
|
40
|
+
$opt_prepend_filename = false
|
41
|
+
$opt_mergecells = 'fill'
|
42
|
+
$opt_border = false
|
43
|
+
$opt_type = false
|
44
|
+
|
45
|
+
op = OptionParser.new
|
46
|
+
op.def_option('-h', 'show help message') { puts op; exit true }
|
47
|
+
op.def_option('-a', 'convert all sheets (sheet name is prepended to all rows)') { $opt_all_sheets = true }
|
48
|
+
op.def_option('-f', 'prepend filename for all rows') { $opt_prepend_filename = true }
|
49
|
+
op.def_option('--mergecells=mode', '"fill" or "topleft"') {|v| $opt_mergecells = v }
|
50
|
+
op.def_option('--border', 'extract borders') { $opt_border = true }
|
51
|
+
op.def_option('-t', '--type', 'add type suffix') { $opt_type = true }
|
52
|
+
op.parse!(ARGV)
|
53
|
+
|
54
|
+
# http://sc.openoffice.org/excelfileformat.pdf 5.49 FORMAT
|
55
|
+
ExcelDateStyles = [
|
56
|
+
15, 16, 17, 18, 19, 20, 21, 27, 28, 29,
|
57
|
+
30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 49,
|
58
|
+
50, 51, 52, 53, 54, 55, 56, 57, 58,
|
59
|
+
1432, 2232,
|
60
|
+
]
|
61
|
+
ExcelDateStylesHash = {}
|
62
|
+
ExcelDateStyles.each {|i| ExcelDateStylesHash[i] = true }
|
63
|
+
|
64
|
+
BorderName2Num = {}
|
65
|
+
BorderNum2Name = {}
|
66
|
+
Java::HSSFCellStyle::constants.each {|n|
|
67
|
+
next if /\ABORDER_/ !~ n
|
68
|
+
suffix = $'
|
69
|
+
BorderNum2Name[Java::HSSFCellStyle::const_get(n)] = suffix
|
70
|
+
BorderName2Num[suffix] = Java::HSSFCellStyle::const_get(n)
|
71
|
+
}
|
72
|
+
|
73
|
+
def convert_single_cell(cell)
|
74
|
+
if !cell
|
75
|
+
return nil
|
76
|
+
end
|
77
|
+
style = cell.getCellStyle
|
78
|
+
case cell.getCellType
|
79
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_NUMERIC
|
80
|
+
if Java::HSSFDateUtil.isCellDateFormatted(cell) ||
|
81
|
+
ExcelDateStylesHash[style.getDataFormat]
|
82
|
+
d = cell.getDateCellValue
|
83
|
+
val = "%d-%02d-%02d %02d:%02d:%02d" % [
|
84
|
+
d.getYear+1900, d.getMonth+1, d.getDate, d.getHours, d.getMinutes, d.getSeconds
|
85
|
+
]
|
86
|
+
val = val + ":date" if $opt_type
|
87
|
+
else
|
88
|
+
val = cell.getNumericCellValue
|
89
|
+
val = val.to_s + ":numeric" if $opt_type
|
90
|
+
end
|
91
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_STRING
|
92
|
+
str = cell.getRichStringCellValue.getString
|
93
|
+
val = str
|
94
|
+
val = val + ":string" if $opt_type
|
95
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_FORMULA
|
96
|
+
val = cell.getCellFormula
|
97
|
+
val = val.to_s + ":formula" if $opt_type
|
98
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_BLANK
|
99
|
+
val = nil
|
100
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_BOOLEAN
|
101
|
+
val = cell.getBooleanCellValue
|
102
|
+
val = val.to_s + ":boolean" if $opt_type
|
103
|
+
when Java::OrgApachePoiHssfUsermodel::HSSFCell::CELL_TYPE_ERROR
|
104
|
+
val = "\#ERR#{cell.getErrorCellValue}"
|
105
|
+
val = val + ":error" if $opt_type
|
106
|
+
else
|
107
|
+
raise "unexpected cell type: #{cell.getCellType.inspect}"
|
108
|
+
end
|
109
|
+
val
|
110
|
+
end
|
111
|
+
|
112
|
+
def convert_cell(sheet, merged, row, x, y)
|
113
|
+
if merged[[x,y]]
|
114
|
+
x1, y1, x2, y2 = merged[[x,y]]
|
115
|
+
topleft_cell = sheet.getRow(y1).getCell(x1)
|
116
|
+
if $opt_mergecells == 'topleft'
|
117
|
+
if x == x1 && y == y1
|
118
|
+
val = convert_single_cell(topleft_cell)
|
119
|
+
else
|
120
|
+
val = nil
|
121
|
+
end
|
122
|
+
else
|
123
|
+
val = convert_single_cell(topleft_cell)
|
124
|
+
end
|
125
|
+
if $opt_type
|
126
|
+
val = val.to_s + ":mergedarea(#{x2-x1+1}x#{y2-y1+1},#{x1+1},#{y1+1})"
|
127
|
+
end
|
128
|
+
else
|
129
|
+
val = convert_single_cell(row.getCell(x))
|
130
|
+
end
|
131
|
+
val
|
132
|
+
end
|
133
|
+
|
134
|
+
def get_merged_regions(sheet)
|
135
|
+
merged = {}
|
136
|
+
sheet.getNumMergedRegions.times {|j|
|
137
|
+
r = sheet.getMergedRegionAt(j)
|
138
|
+
x1 = r.getColumnFrom
|
139
|
+
y1 = r.getRowFrom
|
140
|
+
x2 = r.getColumnTo
|
141
|
+
y2 = r.getRowTo
|
142
|
+
rid = [x1, y1, x2, y2]
|
143
|
+
y1.upto(y2) {|y|
|
144
|
+
x1.upto(x2) {|x|
|
145
|
+
merged[[x,y]] = rid
|
146
|
+
}
|
147
|
+
}
|
148
|
+
}
|
149
|
+
merged
|
150
|
+
end
|
151
|
+
|
152
|
+
def bordertype(border)
|
153
|
+
case border
|
154
|
+
when Java::HSSFCellStyle::BORDER_NONE then "border(none)"
|
155
|
+
when Java::HSSFCellStyle::BORDER_THIN then "border(thin)"
|
156
|
+
when Java::HSSFCellStyle::BORDER_MEDIUM then "border(medium)"
|
157
|
+
when Java::HSSFCellStyle::BORDER_DASHED then "border(dashed)"
|
158
|
+
when Java::HSSFCellStyle::BORDER_DOTTED then "border(dotted)"
|
159
|
+
when Java::HSSFCellStyle::BORDER_THICK then "border(thick)"
|
160
|
+
when Java::HSSFCellStyle::BORDER_DOUBLE then "border(double)"
|
161
|
+
when Java::HSSFCellStyle::BORDER_HAIR then "border(hair)"
|
162
|
+
when Java::HSSFCellStyle::BORDER_MEDIUM_DASHED then "border(medium_dashed)"
|
163
|
+
when Java::HSSFCellStyle::BORDER_DASH_DOT then "border(dash_dot)"
|
164
|
+
when Java::HSSFCellStyle::BORDER_MEDIUM_DASH_DOT then "border(medium_dash_dot)"
|
165
|
+
when Java::HSSFCellStyle::BORDER_DASH_DOT_DOT then "border(dash_dot_dot)"
|
166
|
+
when Java::HSSFCellStyle::BORDER_MEDIUM_DASH_DOT_DOT then "border(medium_dash_dot_dot)"
|
167
|
+
when Java::HSSFCellStyle::BORDER_SLANTED_DASH_DOT then "border(slanted_dash_dot)"
|
168
|
+
else
|
169
|
+
"border(#{border})"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def convert_horizontal_borders(sheet, merged, upper_y, min_firstcol)
|
174
|
+
rownums = sheet.getFirstRowNum..sheet.getLastRowNum
|
175
|
+
lower_y = upper_y+1
|
176
|
+
min = 0
|
177
|
+
max = 0
|
178
|
+
if rownums.include? upper_y and
|
179
|
+
upper_row = sheet.getRow(upper_y) and
|
180
|
+
(upper_cellrange = upper_row.getFirstCellNum...upper_row.getLastCellNum) and
|
181
|
+
upper_cellrange.begin != -1 and
|
182
|
+
upper_cellrange.end != -1
|
183
|
+
if max < (upper_cellrange.end-min_firstcol)*2
|
184
|
+
max = (upper_cellrange.end-min_firstcol)*2
|
185
|
+
end
|
186
|
+
end
|
187
|
+
if rownums.include? lower_y and
|
188
|
+
lower_row = sheet.getRow(lower_y) and
|
189
|
+
(lower_cellrange = lower_row.getFirstCellNum...lower_row.getLastCellNum) and
|
190
|
+
lower_cellrange.begin != -1 and
|
191
|
+
lower_cellrange.end != -1
|
192
|
+
if max < (lower_cellrange.end-min_firstcol)*2
|
193
|
+
max = (lower_cellrange.end-min_firstcol)*2
|
194
|
+
end
|
195
|
+
end
|
196
|
+
ary = []
|
197
|
+
min.upto(max) {|i|
|
198
|
+
if (i & 1) == 0
|
199
|
+
# border
|
200
|
+
right_x = min_firstcol + i / 2
|
201
|
+
left_x = right_x - 1
|
202
|
+
upper_line = lower_line = left_line = right_line = false
|
203
|
+
if upper_row
|
204
|
+
if !merged[[left_x, upper_y]] || !merged[[right_x, upper_y]] ||
|
205
|
+
merged[[left_x, upper_y]] != merged[[right_x, upper_y]]
|
206
|
+
if !upper_line && upper_cellrange.include?(left_x) &&
|
207
|
+
(upperleft_cell = upper_row.getCell(left_x)) &&
|
208
|
+
upperleft_cell.getCellStyle.getBorderRight != Java::HSSFCellStyle::BORDER_NONE
|
209
|
+
upper_line = true
|
210
|
+
end
|
211
|
+
if !upper_line && upper_cellrange.include?(right_x) &&
|
212
|
+
(upperright_cell = upper_row.getCell(right_x)) &&
|
213
|
+
upperright_cell.getCellStyle.getBorderLeft != Java::HSSFCellStyle::BORDER_NONE
|
214
|
+
upper_line = true
|
215
|
+
end
|
216
|
+
end
|
217
|
+
if !merged[[left_x, upper_y]] || !merged[[left_x, lower_y]] ||
|
218
|
+
merged[[left_x, upper_y]] != merged[[left_x, lower_y]]
|
219
|
+
if !left_line && upper_cellrange.include?(left_x) &&
|
220
|
+
(upperleft_cell = upper_row.getCell(left_x)) &&
|
221
|
+
upperleft_cell.getCellStyle.getBorderBottom != Java::HSSFCellStyle::BORDER_NONE
|
222
|
+
left_line = true
|
223
|
+
end
|
224
|
+
end
|
225
|
+
if !merged[[right_x, upper_y]] || !merged[[right_x, lower_y]] ||
|
226
|
+
merged[[right_x, upper_y]] != merged[[right_x, lower_y]]
|
227
|
+
if !right_line && upper_cellrange.include?(right_x) &&
|
228
|
+
(upperright_cell = upper_row.getCell(right_x)) &&
|
229
|
+
upperright_cell.getCellStyle.getBorderBottom != Java::HSSFCellStyle::BORDER_NONE
|
230
|
+
right_line = true
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
if lower_row
|
235
|
+
if !merged[[left_x, lower_y]] || !merged[[right_x, lower_y]] ||
|
236
|
+
merged[[left_x, lower_y]] != merged[[right_x, lower_y]]
|
237
|
+
if !lower_line && lower_cellrange.include?(left_x) &&
|
238
|
+
(lowerleft_cell = lower_row.getCell(left_x)) &&
|
239
|
+
lowerleft_cell.getCellStyle.getBorderRight != Java::HSSFCellStyle::BORDER_NONE
|
240
|
+
lower_line = true
|
241
|
+
end
|
242
|
+
if !lower_line && lower_cellrange.include?(right_x) &&
|
243
|
+
(lowerright_cell = lower_row.getCell(right_x)) &&
|
244
|
+
lowerright_cell.getCellStyle.getBorderLeft != Java::HSSFCellStyle::BORDER_NONE
|
245
|
+
lower_line = true
|
246
|
+
end
|
247
|
+
end
|
248
|
+
if !merged[[left_x, upper_y]] || !merged[[left_x, lower_y]] ||
|
249
|
+
merged[[left_x, upper_y]] != merged[[left_x, lower_y]]
|
250
|
+
if !left_line && lower_cellrange.include?(left_x) &&
|
251
|
+
(lowerleft_cell = lower_row.getCell(left_x)) &&
|
252
|
+
lowerleft_cell.getCellStyle.getBorderTop != Java::HSSFCellStyle::BORDER_NONE
|
253
|
+
left_line = true
|
254
|
+
end
|
255
|
+
end
|
256
|
+
if !merged[[right_x, upper_y]] || !merged[[right_x, lower_y]] ||
|
257
|
+
merged[[right_x, upper_y]] != merged[[right_x, lower_y]]
|
258
|
+
if !right_line && lower_cellrange.include?(right_x) &&
|
259
|
+
(lowerright_cell = lower_row.getCell(right_x)) &&
|
260
|
+
lowerright_cell.getCellStyle.getBorderTop != Java::HSSFCellStyle::BORDER_NONE
|
261
|
+
right_line = true
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
if upper_line && lower_line && !left_line && !right_line
|
266
|
+
joint = '|'
|
267
|
+
elsif !upper_line && !lower_line && left_line && right_line
|
268
|
+
joint = '-'
|
269
|
+
elsif upper_line || lower_line || left_line || right_line
|
270
|
+
joint = '+'
|
271
|
+
else
|
272
|
+
joint = nil
|
273
|
+
end
|
274
|
+
#joint ||= ' '
|
275
|
+
joint = joint + ":joint" if joint && $opt_type
|
276
|
+
ary << joint
|
277
|
+
else
|
278
|
+
# cell
|
279
|
+
hborder = nil
|
280
|
+
cell_x = min_firstcol + i / 2
|
281
|
+
if !merged[[cell_x, upper_y]] || !merged[[cell_x, lower_y]] ||
|
282
|
+
merged[[cell_x, upper_y]] != merged[[cell_x, lower_y]]
|
283
|
+
if !hborder && upper_row && upper_cellrange.include?(cell_x) &&
|
284
|
+
(upper_cell = upper_row.getCell(cell_x)) &&
|
285
|
+
upper_cell.getCellStyle.getBorderBottom != Java::HSSFCellStyle::BORDER_NONE
|
286
|
+
hborder = '-'
|
287
|
+
hborder = hborder + ":#{bordertype(upper_cell.getCellStyle.getBorderBottom)}" if $opt_type
|
288
|
+
end
|
289
|
+
if !hborder && lower_row && lower_cellrange.include?(cell_x) &&
|
290
|
+
(lower_cell = lower_row.getCell(cell_x)) &&
|
291
|
+
lower_cell.getCellStyle.getBorderTop != Java::HSSFCellStyle::BORDER_NONE
|
292
|
+
hborder = '-'
|
293
|
+
hborder = hborder + ":#{bordertype(lower_cell.getCellStyle.getBorderTop)}" if $opt_type
|
294
|
+
end
|
295
|
+
end
|
296
|
+
#hborder ||= ' '
|
297
|
+
ary << hborder
|
298
|
+
end
|
299
|
+
}
|
300
|
+
ary
|
301
|
+
end
|
302
|
+
|
303
|
+
def convert_vertical_border(sheet, merged, cell_y, left_x)
|
304
|
+
right_x = left_x+1
|
305
|
+
row = sheet.getRow(cell_y)
|
306
|
+
return nil if !row
|
307
|
+
cellrange = row.getFirstCellNum...row.getLastCellNum
|
308
|
+
return nil if cellrange.begin == -1 || cellrange.end == -1
|
309
|
+
vborder = nil
|
310
|
+
if !merged[[left_x, cell_y]] || !merged[[right_x, cell_y]] ||
|
311
|
+
merged[[left_x, cell_y]] != merged[[right_x, cell_y]]
|
312
|
+
if !vborder && cellrange.include?(left_x) &&
|
313
|
+
(left_cell = row.getCell(left_x)) &&
|
314
|
+
left_cell.getCellStyle.getBorderRight != Java::HSSFCellStyle::BORDER_NONE
|
315
|
+
vborder = '|'
|
316
|
+
vborder = vborder + ":#{bordertype(left_cell.getCellStyle.getBorderRight)}" if $opt_type
|
317
|
+
end
|
318
|
+
if !vborder && cellrange.include?(right_x) &&
|
319
|
+
(right_cell = row.getCell(right_x)) &&
|
320
|
+
right_cell.getCellStyle.getBorderLeft != Java::HSSFCellStyle::BORDER_NONE
|
321
|
+
vborder = '|'
|
322
|
+
vborder = vborder + ":#{bordertype(right_cell.getCellStyle.getBorderLeft)}" if $opt_type
|
323
|
+
end
|
324
|
+
end
|
325
|
+
#vborder ||= ' '
|
326
|
+
vborder
|
327
|
+
end
|
328
|
+
|
329
|
+
def convert_sheet(filename, book, i, csvgen)
|
330
|
+
sheet = book.getSheetAt(i)
|
331
|
+
sheetname = book.getSheetName(i)
|
332
|
+
merged = get_merged_regions(sheet)
|
333
|
+
rownums = 0..sheet.getLastRowNum
|
334
|
+
min_firstcol = 0
|
335
|
+
max_lastcol = rownums.map {|y|
|
336
|
+
if !(row = sheet.getRow(y))
|
337
|
+
nil
|
338
|
+
elsif (n = row.getLastCellNum) == -1
|
339
|
+
nil
|
340
|
+
else
|
341
|
+
n-1
|
342
|
+
end
|
343
|
+
}.compact.max
|
344
|
+
sheet_header = []
|
345
|
+
if $opt_prepend_filename
|
346
|
+
filename += ":filename" if $opt_type
|
347
|
+
sheet_header << filename
|
348
|
+
end
|
349
|
+
if $opt_all_sheets
|
350
|
+
sheetname += ":sheetname" if $opt_type
|
351
|
+
sheet_header << sheetname
|
352
|
+
end
|
353
|
+
csvgen << (sheet_header + convert_horizontal_borders(sheet, merged, rownums.first-1, min_firstcol)) if $opt_border
|
354
|
+
rownums.each {|y|
|
355
|
+
record = []
|
356
|
+
row = sheet.getRow(y)
|
357
|
+
if row
|
358
|
+
row_cellrange = row.getFirstCellNum...row.getLastCellNum
|
359
|
+
if row_cellrange.begin != -1 && row_cellrange.end != -1
|
360
|
+
record << convert_vertical_border(sheet, merged, y, min_firstcol-1) if $opt_border
|
361
|
+
min_firstcol.upto(row_cellrange.end-1) {|x|
|
362
|
+
val = row_cellrange.include?(x) ? convert_cell(sheet, merged, row, x, y) : nil
|
363
|
+
record << val
|
364
|
+
#record << ' '
|
365
|
+
record << convert_vertical_border(sheet, merged, y, x) if $opt_border
|
366
|
+
}
|
367
|
+
end
|
368
|
+
end
|
369
|
+
csvgen << (sheet_header + record)
|
370
|
+
csvgen << (sheet_header + convert_horizontal_borders(sheet, merged, y, min_firstcol)) if $opt_border
|
371
|
+
}
|
372
|
+
end
|
373
|
+
|
374
|
+
def convert_book(filename, input, csvgen)
|
375
|
+
book = Java::HSSFWorkbook.new(input)
|
376
|
+
if $opt_all_sheets
|
377
|
+
0.upto(book.getNumberOfSheets-1) {|i|
|
378
|
+
convert_sheet(filename, book, i, csvgen)
|
379
|
+
}
|
380
|
+
else
|
381
|
+
convert_sheet(filename, book, 0, csvgen)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
Tb.csv_stream_output(STDOUT) {|csvgen|
|
386
|
+
argv = ARGV.empty? ? ['-'] : ARGV
|
387
|
+
argv.each {|filename|
|
388
|
+
if filename == '-'
|
389
|
+
input = java.lang.System.in
|
390
|
+
else
|
391
|
+
input = java.io.FileInputStream.new(filename)
|
392
|
+
end
|
393
|
+
convert_book(filename, input, csvgen)
|
394
|
+
}
|
395
|
+
}
|
396
|
+
|
397
|
+
exit true
|