xls_to_csv-paperclip-processor 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/xls_to_csv-paperclip-processor.rb +3 -3
- data/xls_to_csv-paperclip-processor.gemspec +2 -7
- metadata +4 -10
- data/bin/xls2csv.rb +0 -6
- data/bin/xlsx2csv.rb +0 -6
- data/libexec/xls2csv +0 -0
- data/libexec/xlsx2csv +0 -446
data/Rakefile
CHANGED
@@ -21,7 +21,6 @@ Jeweler::Tasks.new do |gem|
|
|
21
21
|
gem.description = %Q{If you want to convert .xls to .csv simply and unwittingly, then this gem is for you!}
|
22
22
|
gem.email = "igor.alexandrov@gmail.com"
|
23
23
|
gem.authors = ["Igor Alexandrov"]
|
24
|
-
gem.executables = [ 'xls2csv.rb', 'xlsx2csv.rb']
|
25
24
|
# dependencies defined in Gemfile
|
26
25
|
end
|
27
26
|
Jeweler::RubygemsDotOrgTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.4
|
@@ -29,9 +29,9 @@ protected
|
|
29
29
|
def command
|
30
30
|
case @current_format
|
31
31
|
when '.xls'
|
32
|
-
'xls2csv
|
32
|
+
'xls2csv'
|
33
33
|
when '.xlsx'
|
34
|
-
'xlsx2csv
|
34
|
+
'xlsx2csv'
|
35
35
|
else
|
36
36
|
'cp'
|
37
37
|
end
|
@@ -40,7 +40,7 @@ protected
|
|
40
40
|
def parameters(src, dst)
|
41
41
|
p = []
|
42
42
|
|
43
|
-
if self.command == 'xls2csv
|
43
|
+
if self.command == 'xls2csv'
|
44
44
|
p << [@params, "#{File.expand_path(src.path)}", "> #{File.expand_path(dst.path)}"]
|
45
45
|
else
|
46
46
|
p << ["#{File.expand_path(src.path)}", "#{File.expand_path(dst.path)}"]
|
@@ -5,14 +5,13 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "xls_to_csv-paperclip-processor"
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Igor Alexandrov"]
|
12
|
-
s.date = "2012-11-
|
12
|
+
s.date = "2012-11-28"
|
13
13
|
s.description = "If you want to convert .xls to .csv simply and unwittingly, then this gem is for you!"
|
14
14
|
s.email = "igor.alexandrov@gmail.com"
|
15
|
-
s.executables = ["xls2csv.rb", "xlsx2csv.rb"]
|
16
15
|
s.extra_rdoc_files = [
|
17
16
|
"README.md"
|
18
17
|
]
|
@@ -21,11 +20,7 @@ Gem::Specification.new do |s|
|
|
21
20
|
"README.md",
|
22
21
|
"Rakefile",
|
23
22
|
"VERSION",
|
24
|
-
"bin/xls2csv.rb",
|
25
|
-
"bin/xlsx2csv.rb",
|
26
23
|
"lib/xls_to_csv-paperclip-processor.rb",
|
27
|
-
"libexec/xls2csv",
|
28
|
-
"libexec/xlsx2csv",
|
29
24
|
"xls_to_csv-paperclip-processor.gemspec"
|
30
25
|
]
|
31
26
|
s.homepage = "http://github.com/igor-alexandrov/xls_to_csv-paperclip-processor"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xls_to_csv-paperclip-processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: paperclip
|
@@ -62,9 +62,7 @@ dependencies:
|
|
62
62
|
description: If you want to convert .xls to .csv simply and unwittingly, then this
|
63
63
|
gem is for you!
|
64
64
|
email: igor.alexandrov@gmail.com
|
65
|
-
executables:
|
66
|
-
- xls2csv.rb
|
67
|
-
- xlsx2csv.rb
|
65
|
+
executables: []
|
68
66
|
extensions: []
|
69
67
|
extra_rdoc_files:
|
70
68
|
- README.md
|
@@ -73,11 +71,7 @@ files:
|
|
73
71
|
- README.md
|
74
72
|
- Rakefile
|
75
73
|
- VERSION
|
76
|
-
- bin/xls2csv.rb
|
77
|
-
- bin/xlsx2csv.rb
|
78
74
|
- lib/xls_to_csv-paperclip-processor.rb
|
79
|
-
- libexec/xls2csv
|
80
|
-
- libexec/xlsx2csv
|
81
75
|
- xls_to_csv-paperclip-processor.gemspec
|
82
76
|
homepage: http://github.com/igor-alexandrov/xls_to_csv-paperclip-processor
|
83
77
|
licenses:
|
@@ -94,7 +88,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
94
88
|
version: '0'
|
95
89
|
segments:
|
96
90
|
- 0
|
97
|
-
hash:
|
91
|
+
hash: 4423377577935453605
|
98
92
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
93
|
none: false
|
100
94
|
requirements:
|
data/bin/xls2csv.rb
DELETED
data/bin/xlsx2csv.rb
DELETED
data/libexec/xls2csv
DELETED
Binary file
|
data/libexec/xlsx2csv
DELETED
@@ -1,446 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
#
|
3
|
-
# Copyright information
|
4
|
-
#
|
5
|
-
# Copyright (C) 2010-2012 Dilshod Temirkhodjaev <tdilshod@gmail.com>
|
6
|
-
#
|
7
|
-
# License
|
8
|
-
#
|
9
|
-
# This program is free software; you can redistribute it and/or modify
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation; either version 2 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# This program is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
21
|
-
|
22
|
-
__author__ = "Dilshod Temirkhodjaev <tdilshod@gmail.com>"
|
23
|
-
__license__ = "GPL-2+"
|
24
|
-
|
25
|
-
import csv, datetime, zipfile, sys, os
|
26
|
-
import xml.parsers.expat
|
27
|
-
from xml.dom import minidom
|
28
|
-
from optparse import OptionParser
|
29
|
-
|
30
|
-
# see also ruby-roo lib at: http://github.com/hmcgowan/roo
|
31
|
-
FORMATS = {
|
32
|
-
'general' : 'float',
|
33
|
-
'0' : 'float',
|
34
|
-
'0.00' : 'float',
|
35
|
-
'#,##0' : 'float',
|
36
|
-
'#,##0.00' : 'float',
|
37
|
-
'0%' : 'percentage',
|
38
|
-
'0.00%' : 'percentage',
|
39
|
-
'0.00e+00' : 'float',
|
40
|
-
'mm-dd-yy' : 'date',
|
41
|
-
'd-mmm-yy' : 'date',
|
42
|
-
'd-mmm' : 'date',
|
43
|
-
'mmm-yy' : 'date',
|
44
|
-
'h:mm am/pm' : 'date',
|
45
|
-
'h:mm:ss am/pm' : 'date',
|
46
|
-
'h:mm' : 'time',
|
47
|
-
'h:mm:ss' : 'time',
|
48
|
-
'm/d/yy h:mm' : 'date',
|
49
|
-
'#,##0 ;(#,##0)' : 'float',
|
50
|
-
'#,##0 ;[red](#,##0)' : 'float',
|
51
|
-
'#,##0.00;(#,##0.00)' : 'float',
|
52
|
-
'#,##0.00;[red](#,##0.00)' : 'float',
|
53
|
-
'mm:ss' : 'time',
|
54
|
-
'[h]:mm:ss' : 'time',
|
55
|
-
'mmss.0' : 'time',
|
56
|
-
'##0.0e+0' : 'float',
|
57
|
-
'@' : 'float',
|
58
|
-
'yyyy\\-mm\\-dd' : 'date',
|
59
|
-
'dd/mm/yy' : 'date',
|
60
|
-
'hh:mm:ss' : 'time',
|
61
|
-
"dd/mm/yy\\ hh:mm" : 'date',
|
62
|
-
'dd/mm/yyyy hh:mm:ss' : 'date',
|
63
|
-
'yy-mm-dd' : 'date',
|
64
|
-
'd-mmm-yyyy' : 'date',
|
65
|
-
'm/d/yy' : 'date',
|
66
|
-
'm/d/yyyy' : 'date',
|
67
|
-
'dd-mmm-yyyy' : 'date',
|
68
|
-
'dd/mm/yyyy' : 'date',
|
69
|
-
'mm/dd/yy hh:mm am/pm' : 'date',
|
70
|
-
'mm/dd/yyyy hh:mm:ss' : 'date',
|
71
|
-
'yyyy-mm-dd hh:mm:ss' : 'date',
|
72
|
-
}
|
73
|
-
STANDARD_FORMATS = {
|
74
|
-
0 : 'general',
|
75
|
-
1 : '0',
|
76
|
-
2 : '0.00',
|
77
|
-
3 : '#,##0',
|
78
|
-
4 : '#,##0.00',
|
79
|
-
9 : '0%',
|
80
|
-
10 : '0.00%',
|
81
|
-
11 : '0.00e+00',
|
82
|
-
12 : '# ?/?',
|
83
|
-
13 : '# ??/??',
|
84
|
-
14 : 'mm-dd-yy',
|
85
|
-
15 : 'd-mmm-yy',
|
86
|
-
16 : 'd-mmm',
|
87
|
-
17 : 'mmm-yy',
|
88
|
-
18 : 'h:mm am/pm',
|
89
|
-
19 : 'h:mm:ss am/pm',
|
90
|
-
20 : 'h:mm',
|
91
|
-
21 : 'h:mm:ss',
|
92
|
-
22 : 'm/d/yy h:mm',
|
93
|
-
37 : '#,##0 ;(#,##0)',
|
94
|
-
38 : '#,##0 ;[red](#,##0)',
|
95
|
-
39 : '#,##0.00;(#,##0.00)',
|
96
|
-
40 : '#,##0.00;[red](#,##0.00)',
|
97
|
-
45 : 'mm:ss',
|
98
|
-
46 : '[h]:mm:ss',
|
99
|
-
47 : 'mmss.0',
|
100
|
-
48 : '##0.0e+0',
|
101
|
-
49 : '@',
|
102
|
-
}
|
103
|
-
|
104
|
-
#
|
105
|
-
# usage: xlsx2csv("test.xslx", open("test.csv", "w+"))
|
106
|
-
# parameters:
|
107
|
-
# sheetid - sheet no to convert (0 for all sheets)
|
108
|
-
# dateformat - override date/time format
|
109
|
-
# delimiter - csv columns delimiter symbol
|
110
|
-
# sheet_delimiter - sheets delimiter used when processing all sheets
|
111
|
-
# skip_empty_lines - skip empty lines
|
112
|
-
#
|
113
|
-
def xlsx2csv(infilepath, outfile, sheetid=1, dateformat=None, delimiter=",", sheetdelimiter="--------", skip_empty_lines=False):
|
114
|
-
writer = csv.writer(outfile, quoting=csv.QUOTE_MINIMAL, delimiter=delimiter)
|
115
|
-
ziphandle = zipfile.ZipFile(infilepath)
|
116
|
-
try:
|
117
|
-
shared_strings = parse(ziphandle, SharedStrings, "xl/sharedStrings.xml")
|
118
|
-
styles = parse(ziphandle, Styles, "xl/styles.xml")
|
119
|
-
workbook = parse(ziphandle, Workbook, "xl/workbook.xml")
|
120
|
-
|
121
|
-
if sheetid > 0:
|
122
|
-
sheet = None
|
123
|
-
for s in workbook.sheets:
|
124
|
-
if s['id'] == sheetid:
|
125
|
-
sheet = Sheet(workbook, shared_strings, styles, ziphandle.read("xl/worksheets/sheet%i.xml" %s['id']))
|
126
|
-
break
|
127
|
-
if not sheet:
|
128
|
-
raise Exception("Sheet %i Not Found" %sheetid)
|
129
|
-
sheet.set_dateformat(dateformat)
|
130
|
-
sheet.set_skip_empty_lines(skip_empty_lines)
|
131
|
-
sheet.to_csv(writer)
|
132
|
-
else:
|
133
|
-
for s in workbook.sheets:
|
134
|
-
if sheetdelimiter != "":
|
135
|
-
outfile.write(sheetdelimiter + " " + str(s['id']) + " - " + s['name'].encode('utf-8') + "\r\n")
|
136
|
-
sheet = Sheet(workbook, shared_strings, styles, ziphandle.read("xl/worksheets/sheet%i.xml" %s['id']))
|
137
|
-
sheet.set_dateformat(dateformat)
|
138
|
-
sheet.set_skip_empty_lines(skip_empty_lines)
|
139
|
-
sheet.to_csv(writer)
|
140
|
-
finally:
|
141
|
-
ziphandle.close()
|
142
|
-
|
143
|
-
def parse(ziphandle, klass, filename):
|
144
|
-
instance = klass()
|
145
|
-
if filename in ziphandle.namelist():
|
146
|
-
instance.parse(ziphandle.read(filename))
|
147
|
-
return instance
|
148
|
-
|
149
|
-
class Workbook:
|
150
|
-
def __init__(self):
|
151
|
-
self.sheets = []
|
152
|
-
self.date1904 = False
|
153
|
-
|
154
|
-
def parse(self, data):
|
155
|
-
workbookDoc = minidom.parseString(data)
|
156
|
-
if len(workbookDoc.firstChild.getElementsByTagName("fileVersion")) == 0:
|
157
|
-
self.appName = 'unknown'
|
158
|
-
else:
|
159
|
-
self.appName = workbookDoc.firstChild.getElementsByTagName("fileVersion")[0]._attrs['appName'].value
|
160
|
-
try:
|
161
|
-
self.date1904 = workbookDoc.firstChild.getElementsByTagName("workbookPr")[0]._attrs['date1904'].value.lower().strip() != "false"
|
162
|
-
except:
|
163
|
-
pass
|
164
|
-
|
165
|
-
sheets = workbookDoc.firstChild.getElementsByTagName("sheets")[0]
|
166
|
-
for sheetNode in sheets.getElementsByTagName("sheet"):
|
167
|
-
attrs = sheetNode._attrs
|
168
|
-
name = attrs["name"].value
|
169
|
-
if self.appName == 'xl':
|
170
|
-
if attrs.has_key('r:id'): id = int(attrs["r:id"].value[3:])
|
171
|
-
else: id = int(attrs['sheetId'].value)
|
172
|
-
else:
|
173
|
-
if attrs.has_key('sheetId'): id = int(attrs["sheetId"].value)
|
174
|
-
else: id = int(attrs['r:id'].value[3:])
|
175
|
-
self.sheets.append({'name': name, 'id': id})
|
176
|
-
|
177
|
-
class Styles:
|
178
|
-
def __init__(self):
|
179
|
-
self.numFmts = {}
|
180
|
-
self.cellXfs = []
|
181
|
-
|
182
|
-
def parse(self, data):
|
183
|
-
styles = minidom.parseString(data).firstChild
|
184
|
-
# numFmts
|
185
|
-
numFmtsElement = styles.getElementsByTagName("numFmts")
|
186
|
-
if len(numFmtsElement) == 1:
|
187
|
-
for numFmt in numFmtsElement[0].childNodes:
|
188
|
-
numFmtId = int(numFmt._attrs['numFmtId'].value)
|
189
|
-
formatCode = numFmt._attrs['formatCode'].value.lower().replace('\\', '')
|
190
|
-
self.numFmts[numFmtId] = formatCode
|
191
|
-
# cellXfs
|
192
|
-
cellXfsElement = styles.getElementsByTagName("cellXfs")
|
193
|
-
if len(cellXfsElement) == 1:
|
194
|
-
for cellXfs in cellXfsElement[0].childNodes:
|
195
|
-
if (cellXfs.nodeName != "xf"):
|
196
|
-
continue
|
197
|
-
numFmtId = int(cellXfs._attrs['numFmtId'].value)
|
198
|
-
self.cellXfs.append(numFmtId)
|
199
|
-
|
200
|
-
class SharedStrings:
|
201
|
-
def __init__(self):
|
202
|
-
self.parser = None
|
203
|
-
self.strings = []
|
204
|
-
self.si = False
|
205
|
-
self.t = False
|
206
|
-
self.rPh = False
|
207
|
-
self.value = ""
|
208
|
-
|
209
|
-
def parse(self, data):
|
210
|
-
self.parser = xml.parsers.expat.ParserCreate()
|
211
|
-
self.parser.CharacterDataHandler = self.handleCharData
|
212
|
-
self.parser.StartElementHandler = self.handleStartElement
|
213
|
-
self.parser.EndElementHandler = self.handleEndElement
|
214
|
-
self.parser.Parse(data)
|
215
|
-
|
216
|
-
def handleCharData(self, data):
|
217
|
-
if self.t:
|
218
|
-
self.value+= data
|
219
|
-
|
220
|
-
def handleStartElement(self, name, attrs):
|
221
|
-
if name == 'si':
|
222
|
-
self.si = True
|
223
|
-
self.value = ""
|
224
|
-
elif name == 't' and self.rPh:
|
225
|
-
self.t = False
|
226
|
-
elif name == 't' and self.si:
|
227
|
-
self.t = True
|
228
|
-
elif name == 'rPh':
|
229
|
-
self.rPh = True
|
230
|
-
|
231
|
-
def handleEndElement(self, name):
|
232
|
-
if name == 'si':
|
233
|
-
self.si = False
|
234
|
-
self.strings.append(self.value)
|
235
|
-
elif name == 't':
|
236
|
-
self.t = False
|
237
|
-
elif name == 'rPh':
|
238
|
-
self.rPh = False
|
239
|
-
|
240
|
-
class Sheet:
|
241
|
-
def __init__(self, workbook, sharedString, styles, data):
|
242
|
-
self.parser = None
|
243
|
-
self.writer = None
|
244
|
-
self.sharedString = None
|
245
|
-
self.styles = None
|
246
|
-
|
247
|
-
self.in_sheet = False
|
248
|
-
self.in_row = False
|
249
|
-
self.in_cell = False
|
250
|
-
self.in_cell_value = False
|
251
|
-
self.in_cell_formula = False
|
252
|
-
|
253
|
-
self.columns = {}
|
254
|
-
self.rowNum = None
|
255
|
-
self.colType = None
|
256
|
-
self.s_attr = None
|
257
|
-
self.data = None
|
258
|
-
|
259
|
-
self.dateformat = None
|
260
|
-
self.skip_empty_lines = False
|
261
|
-
|
262
|
-
self.data = data
|
263
|
-
self.workbook = workbook
|
264
|
-
self.sharedStrings = sharedString.strings
|
265
|
-
self.styles = styles
|
266
|
-
|
267
|
-
def set_dateformat(self, dateformat):
|
268
|
-
self.dateformat = dateformat
|
269
|
-
|
270
|
-
def set_skip_empty_lines(self, skip):
|
271
|
-
self.skip_empty_lines = skip
|
272
|
-
|
273
|
-
def to_csv(self, writer):
|
274
|
-
self.writer = writer
|
275
|
-
self.parser = xml.parsers.expat.ParserCreate()
|
276
|
-
self.parser.CharacterDataHandler = self.handleCharData
|
277
|
-
self.parser.StartElementHandler = self.handleStartElement
|
278
|
-
self.parser.EndElementHandler = self.handleEndElement
|
279
|
-
self.parser.Parse(self.data)
|
280
|
-
|
281
|
-
def handleCharData(self, data):
|
282
|
-
if self.in_cell_value:
|
283
|
-
self.data = data # default value
|
284
|
-
if self.colType == "s": # shared string
|
285
|
-
self.data = self.sharedStrings[int(data)]
|
286
|
-
elif self.colType == "b": # boolean
|
287
|
-
self.data = (int(data) == 1 and "TRUE") or (int(data) == 0 and "FALSE") or data
|
288
|
-
elif self.s_attr:
|
289
|
-
s = int(self.s_attr)
|
290
|
-
|
291
|
-
# get cell format
|
292
|
-
format = None
|
293
|
-
xfs_numfmt = self.styles.cellXfs[s]
|
294
|
-
if self.styles.numFmts.has_key(xfs_numfmt):
|
295
|
-
format = self.styles.numFmts[xfs_numfmt]
|
296
|
-
elif STANDARD_FORMATS.has_key(xfs_numfmt):
|
297
|
-
format = STANDARD_FORMATS[xfs_numfmt]
|
298
|
-
# get format type
|
299
|
-
if format and FORMATS.has_key(format):
|
300
|
-
format_type = FORMATS[format]
|
301
|
-
|
302
|
-
if format_type == 'date': # date/time
|
303
|
-
try:
|
304
|
-
if self.workbook.date1904:
|
305
|
-
date = datetime.datetime(1904, 01, 01) + datetime.timedelta(float(data))
|
306
|
-
else:
|
307
|
-
date = datetime.datetime(1899, 12, 30) + datetime.timedelta(float(data))
|
308
|
-
if self.dateformat:
|
309
|
-
# str(dateformat) - python2.5 bug, see: http://bugs.python.org/issue2782
|
310
|
-
self.data = date.strftime(str(self.dateformat))
|
311
|
-
else:
|
312
|
-
dateformat = format.replace("yyyy", "%Y").replace("yy", "%y"). \
|
313
|
-
replace("hh:mm", "%H:%M").replace("h", "%H").replace("%H%H", "%H").replace("ss", "%S"). \
|
314
|
-
replace("d", "%e").replace("%e%e", "%d"). \
|
315
|
-
replace("mmmm", "%B").replace("mmm", "%b").replace(":mm", ":%M").replace("m", "%m").replace("%m%m", "%m"). \
|
316
|
-
replace("am/pm", "%p")
|
317
|
-
self.data = date.strftime(str(dateformat)).strip()
|
318
|
-
except (ValueError, OverflowError):
|
319
|
-
# invalid date format
|
320
|
-
self.data = data
|
321
|
-
elif format_type == 'time': # time
|
322
|
-
self.data = str(float(data) * 24*60*60)
|
323
|
-
# does not support it
|
324
|
-
#elif self.in_cell_formula:
|
325
|
-
# self.formula = data
|
326
|
-
|
327
|
-
def handleStartElement(self, name, attrs):
|
328
|
-
if self.in_row and name == 'c':
|
329
|
-
self.colType = attrs.get("t")
|
330
|
-
self.s_attr = attrs.get("s")
|
331
|
-
cellId = attrs.get("r")
|
332
|
-
if cellId:
|
333
|
-
self.colNum = cellId[:len(cellId)-len(self.rowNum)]
|
334
|
-
self.colIndex = 0
|
335
|
-
else:
|
336
|
-
self.colIndex+= 1
|
337
|
-
#self.formula = None
|
338
|
-
self.data = ""
|
339
|
-
self.in_cell = True
|
340
|
-
elif self.in_cell and name == 'v':
|
341
|
-
self.in_cell_value = True
|
342
|
-
#elif self.in_cell and name == 'f':
|
343
|
-
# self.in_cell_formula = True
|
344
|
-
elif self.in_sheet and name == 'row' and attrs.has_key('r'):
|
345
|
-
self.rowNum = attrs['r']
|
346
|
-
self.in_row = True
|
347
|
-
self.columns = {}
|
348
|
-
self.spans = None
|
349
|
-
if attrs.has_key('spans'):
|
350
|
-
self.spans = [int(i) for i in attrs['spans'].split(":")]
|
351
|
-
elif name == 'sheetData':
|
352
|
-
self.in_sheet = True
|
353
|
-
|
354
|
-
def handleEndElement(self, name):
|
355
|
-
if self.in_cell and name == 'v':
|
356
|
-
self.in_cell_value = False
|
357
|
-
#elif self.in_cell and name == 'f':
|
358
|
-
# self.in_cell_formula = False
|
359
|
-
elif self.in_cell and name == 'c':
|
360
|
-
t = 0
|
361
|
-
for i in self.colNum: t = t*26 + ord(i) - 64
|
362
|
-
self.columns[t - 1 + self.colIndex] = self.data
|
363
|
-
self.in_cell = False
|
364
|
-
if self.in_row and name == 'row':
|
365
|
-
if len(self.columns.keys()) > 0:
|
366
|
-
d = [""] * (max(self.columns.keys()) + 1)
|
367
|
-
for k in self.columns.keys():
|
368
|
-
d[k] = self.columns[k].encode("utf-8")
|
369
|
-
if self.spans:
|
370
|
-
l = self.spans[0] + self.spans[1] - 1
|
371
|
-
if len(d) < l:
|
372
|
-
d+= (l - len(d)) * ['']
|
373
|
-
# write line to csv
|
374
|
-
if not self.skip_empty_lines or d.count('') != len(d):
|
375
|
-
self.writer.writerow(d)
|
376
|
-
self.in_row = False
|
377
|
-
elif self.in_sheet and name == 'sheetData':
|
378
|
-
self.in_sheet = False
|
379
|
-
|
380
|
-
def convert_recursive(path, kwargs):
|
381
|
-
for name in os.listdir(path):
|
382
|
-
fullpath = os.path.join(path, name)
|
383
|
-
if os.path.isdir(fullpath):
|
384
|
-
convert_recursive(fullpath, kwargs)
|
385
|
-
else:
|
386
|
-
if fullpath.lower().endswith(".xlsx"):
|
387
|
-
outfilepath = fullpath[:-4] + 'csv'
|
388
|
-
print("Converting %s to %s" %(fullpath, outfilepath))
|
389
|
-
f = open(outfilepath, 'w+b')
|
390
|
-
try:
|
391
|
-
xlsx2csv(fullpath, f, **kwargs)
|
392
|
-
except zipfile.BadZipfile:
|
393
|
-
print("File is not a zip file")
|
394
|
-
f.close()
|
395
|
-
|
396
|
-
if __name__ == "__main__":
|
397
|
-
parser = OptionParser(usage = "%prog [options] infile [outfile]", version="0.11")
|
398
|
-
parser.add_option("-d", "--delimiter", dest="delimiter", default=",",
|
399
|
-
help="delimiter - csv columns delimiter, 'tab' or 'x09' for tab (comma is default)")
|
400
|
-
parser.add_option("-f", "--dateformat", dest="dateformat",
|
401
|
-
help="override date/time format (ex. %Y/%m/%d)")
|
402
|
-
parser.add_option("-i", "--ignoreempty", dest="skip_empty_lines", default=False, action="store_true",
|
403
|
-
help="skip empty lines")
|
404
|
-
parser.add_option("-p", "--sheetdelimiter", dest="sheetdelimiter", default="--------",
|
405
|
-
help="sheets delimiter used to separate sheets, pass '' if you don't want delimiters (default '--------')")
|
406
|
-
parser.add_option("-r", "--recursive", dest="recursive", default=False, action="store_true",
|
407
|
-
help="convert recursively")
|
408
|
-
parser.add_option("-s", "--sheet", dest="sheetid", default=1, type="int",
|
409
|
-
help="sheet no to convert (0 for all sheets)")
|
410
|
-
|
411
|
-
(options, args) = parser.parse_args()
|
412
|
-
|
413
|
-
if len(options.delimiter) == 1:
|
414
|
-
delimiter = options.delimiter
|
415
|
-
elif options.delimiter == 'tab':
|
416
|
-
delimiter = '\t'
|
417
|
-
elif options.delimiter == 'comma':
|
418
|
-
delimiter = ','
|
419
|
-
elif options.delimiter[0] == 'x':
|
420
|
-
delimiter = chr(int(options.delimiter[1:]))
|
421
|
-
else:
|
422
|
-
raise Exception("Invalid delimiter")
|
423
|
-
|
424
|
-
kwargs = {
|
425
|
-
'sheetid' : options.sheetid,
|
426
|
-
'delimiter' : delimiter,
|
427
|
-
'sheetdelimiter' : options.sheetdelimiter,
|
428
|
-
'dateformat' : options.dateformat,
|
429
|
-
'skip_empty_lines' : options.skip_empty_lines
|
430
|
-
}
|
431
|
-
|
432
|
-
if options.recursive:
|
433
|
-
if len(args) == 1:
|
434
|
-
convert_recursive(args[0], kwargs)
|
435
|
-
else:
|
436
|
-
parser.print_help()
|
437
|
-
else:
|
438
|
-
if len(args) < 1:
|
439
|
-
parser.print_help()
|
440
|
-
else:
|
441
|
-
if len(args) > 1:
|
442
|
-
outfile = open(args[1], 'w+b')
|
443
|
-
xlsx2csv(args[0], outfile, **kwargs)
|
444
|
-
outfile.close()
|
445
|
-
else:
|
446
|
-
xlsx2csv(args[0], sys.stdout, **kwargs)
|