poi2csv 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/build.xml +70 -0
- data/classes/ToCSV$ExcelFilenameFilter.class +0 -0
- data/classes/ToCSV.class +0 -0
- data/lib/.DS_Store +0 -0
- data/lib/commons-codec-1.5.jar +0 -0
- data/lib/commons-logging-1.1.jar +0 -0
- data/lib/dom4j-1.6.1.jar +0 -0
- data/lib/junit-3.8.1.jar +0 -0
- data/lib/log4j-1.2.13.jar +0 -0
- data/lib/poi-3.9-20121203.jar +0 -0
- data/lib/poi-examples-3.9-20121203.jar +0 -0
- data/lib/poi-excelant-3.9-20121203.jar +0 -0
- data/lib/poi-ooxml-3.9-20121203.jar +0 -0
- data/lib/poi-ooxml-schemas-3.9-20121203.jar +0 -0
- data/lib/poi-scratchpad-3.9-20121203.jar +0 -0
- data/lib/poi2csv/version.rb +3 -0
- data/lib/poi2csv.rb +12 -0
- data/lib/stax-api-1.0.1.jar +0 -0
- data/lib/xmlbeans-2.3.0.jar +0 -0
- data/poi2csv.gemspec +23 -0
- data/src/ToCSV.java +758 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3f419a351e1981e983019c5d6c94be41e8aa77d7
|
4
|
+
data.tar.gz: d469b9326fae229ffd4d460f5b9eff199e2455d7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c8ee1be9dce6a77176aab26eae45577a93378fe6cb80b00ecb1fec2de915969dba906c0c267a8628350d52f59141b405d11babcb482a775c7c9a02feac86971b
|
7
|
+
data.tar.gz: adcd33315bd73bca8543afcb523c3547b206a914412847e18912e4ee1581a5cb98c094092924a7cbb6b1f6efb360b602fd4dab09b86397aa931998185bcfaf2c
|
data/.DS_Store
ADDED
Binary file
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Douglas English
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Poi2csv
|
2
|
+
|
3
|
+
This GEM provides a wrapper to the http://poi.apache.org/ library for converting Excel (.xls and .xlsx) files to CSV.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'poi2csv'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install poi2csv
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
Poi2csv::to_csv(input_file_path, output_folder_path, separator=nil, formating_convention=nil)
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/build.xml
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
2
|
+
<project basedir="." default="build" name="poi2csv">
|
3
|
+
<property environment="env"/>
|
4
|
+
<property name="ECLIPSE_HOME" value="../../../../Applications/eclipse"/>
|
5
|
+
<property name="debuglevel" value="source,lines,vars"/>
|
6
|
+
<property name="target" value="1.6"/>
|
7
|
+
<property name="source" value="1.6"/>
|
8
|
+
<path id="poi2csv.classpath">
|
9
|
+
<pathelement location="classes"/>
|
10
|
+
<pathelement location="lib/commons-codec-1.5.jar"/>
|
11
|
+
<pathelement location="lib/commons-logging-1.1.jar"/>
|
12
|
+
<pathelement location="lib/dom4j-1.6.1.jar"/>
|
13
|
+
<pathelement location="lib/junit-3.8.1.jar"/>
|
14
|
+
<pathelement location="lib/log4j-1.2.13.jar"/>
|
15
|
+
<pathelement location="lib/poi-3.9-20121203.jar"/>
|
16
|
+
<pathelement location="lib/poi-examples-3.9-20121203.jar"/>
|
17
|
+
<pathelement location="lib/poi-excelant-3.9-20121203.jar"/>
|
18
|
+
<pathelement location="lib/poi-ooxml-3.9-20121203.jar"/>
|
19
|
+
<pathelement location="lib/poi-ooxml-schemas-3.9-20121203.jar"/>
|
20
|
+
<pathelement location="lib/poi-scratchpad-3.9-20121203.jar"/>
|
21
|
+
<pathelement location="lib/stax-api-1.0.1.jar"/>
|
22
|
+
<pathelement location="lib/xmlbeans-2.3.0.jar"/>
|
23
|
+
</path>
|
24
|
+
<target name="init">
|
25
|
+
<mkdir dir="classes"/>
|
26
|
+
<copy includeemptydirs="false" todir="classes">
|
27
|
+
<fileset dir="src">
|
28
|
+
<exclude name="**/*.launch"/>
|
29
|
+
<exclude name="**/*.java"/>
|
30
|
+
</fileset>
|
31
|
+
</copy>
|
32
|
+
</target>
|
33
|
+
<target name="clean">
|
34
|
+
<delete dir="classes"/>
|
35
|
+
</target>
|
36
|
+
<target depends="clean" name="cleanall"/>
|
37
|
+
<target depends="build-subprojects,build-project" name="build"/>
|
38
|
+
<target name="build-subprojects"/>
|
39
|
+
<target depends="init" name="build-project">
|
40
|
+
<echo message="${ant.project.name}: ${ant.file}"/>
|
41
|
+
<javac debug="true" debuglevel="${debuglevel}" destdir="classes" includeantruntime="false" source="${source}" target="${target}">
|
42
|
+
<src path="src"/>
|
43
|
+
<classpath refid="poi2csv.classpath"/>
|
44
|
+
</javac>
|
45
|
+
</target>
|
46
|
+
<target description="Build all projects which reference this project. Useful to propagate changes." name="build-refprojects"/>
|
47
|
+
<target description="copy Eclipse compiler jars to ant lib directory" name="init-eclipse-compiler">
|
48
|
+
<copy todir="${ant.library.dir}">
|
49
|
+
<fileset dir="${ECLIPSE_HOME}/plugins" includes="org.eclipse.jdt.core_*.jar"/>
|
50
|
+
</copy>
|
51
|
+
<unzip dest="${ant.library.dir}">
|
52
|
+
<patternset includes="jdtCompilerAdapter.jar"/>
|
53
|
+
<fileset dir="${ECLIPSE_HOME}/plugins" includes="org.eclipse.jdt.core_*.jar"/>
|
54
|
+
</unzip>
|
55
|
+
</target>
|
56
|
+
<target description="compile project with Eclipse compiler" name="build-eclipse-compiler">
|
57
|
+
<property name="build.compiler" value="org.eclipse.jdt.core.JDTCompilerAdapter"/>
|
58
|
+
<antcall target="build"/>
|
59
|
+
</target>
|
60
|
+
<target name="ExtractExcelToCSV">
|
61
|
+
<java classname="ExtractExcelToCSV" failonerror="true" fork="yes">
|
62
|
+
<classpath refid="poi2csv.classpath"/>
|
63
|
+
</java>
|
64
|
+
</target>
|
65
|
+
<target name="ToCSV">
|
66
|
+
<java classname="ToCSV" failonerror="true" fork="yes">
|
67
|
+
<classpath refid="poi2csv.classpath"/>
|
68
|
+
</java>
|
69
|
+
</target>
|
70
|
+
</project>
|
Binary file
|
data/classes/ToCSV.class
ADDED
Binary file
|
data/lib/.DS_Store
ADDED
Binary file
|
Binary file
|
Binary file
|
data/lib/dom4j-1.6.1.jar
ADDED
Binary file
|
data/lib/junit-3.8.1.jar
ADDED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/poi2csv.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require "poi2csv/version"
|
2
|
+
|
3
|
+
module Poi2csv
|
4
|
+
|
5
|
+
def self.to_csv(input_file_path, output_folder_path, separator=nil, formating_convention=nil)
|
6
|
+
`java -cp #{classpath} ToCSV #{input_file_path} #{output_folder_path} #{separator} #{formating_convention}`
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.classpath
|
10
|
+
@_classpath ||= File.expand_path(File.join(File.dirname(__FILE__),'*')) + File::PATH_SEPARATOR + File.expand_path(File.join(File.dirname(__FILE__),'..', 'classes'))
|
11
|
+
end
|
12
|
+
end
|
Binary file
|
Binary file
|
data/poi2csv.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'poi2csv/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "poi2csv"
|
8
|
+
spec.version = Poi2csv::VERSION
|
9
|
+
spec.authors = ["Douglas English"]
|
10
|
+
spec.email = ["douglas.english@gmail.com"]
|
11
|
+
spec.description = %q{Converts Excel .xls and .xlsx files to CSV.}
|
12
|
+
spec.summary = %q{This GEM provides a wrapper to the http://poi.apache.org/ library for converting Excel (.xls and .xlsx) files to CSV.}
|
13
|
+
spec.homepage = "https://github.com/denglish/poi2csv"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
end
|
data/src/ToCSV.java
ADDED
@@ -0,0 +1,758 @@
|
|
1
|
+
/* ====================================================================
|
2
|
+
Licensed to the Apache Software Foundation (ASF) under one or more
|
3
|
+
contributor license agreements. See the NOTICE file distributed with
|
4
|
+
this work for additional information regarding copyright ownership.
|
5
|
+
The ASF licenses this file to You under the Apache License, Version 2.0
|
6
|
+
(the "License"); you may not use this file except in compliance with
|
7
|
+
the License. You may obtain a copy of the License at
|
8
|
+
|
9
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
|
11
|
+
Unless required by applicable law or agreed to in writing, software
|
12
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
See the License for the specific language governing permissions and
|
15
|
+
limitations under the License.
|
16
|
+
==================================================================== */
|
17
|
+
|
18
|
+
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
19
|
+
import org.apache.poi.ss.usermodel.Workbook;
|
20
|
+
import org.apache.poi.ss.usermodel.Sheet;
|
21
|
+
import org.apache.poi.ss.usermodel.Row;
|
22
|
+
import org.apache.poi.ss.usermodel.Cell;
|
23
|
+
import org.apache.poi.ss.usermodel.DataFormatter;
|
24
|
+
import org.apache.poi.ss.usermodel.FormulaEvaluator;
|
25
|
+
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
26
|
+
|
27
|
+
import java.io.File;
|
28
|
+
import java.io.FileInputStream;
|
29
|
+
import java.io.FileWriter;
|
30
|
+
import java.io.BufferedWriter;
|
31
|
+
import java.io.FilenameFilter;
|
32
|
+
import java.io.IOException;
|
33
|
+
import java.io.FileNotFoundException;
|
34
|
+
import java.util.ArrayList;
|
35
|
+
|
36
|
+
/**
|
37
|
+
* Demonstrates <em>one</em> way to convert an Excel spreadsheet into a CSV
|
38
|
+
* file. This class makes the following assumptions;
|
39
|
+
* <list>
|
40
|
+
* <li>1. Where the Excel workbook contains more that one worksheet, then a single
|
41
|
+
* CSV file will contain the data from all of the worksheets.</li>
|
42
|
+
* <li>2. The data matrix contained in the CSV file will be square. This means that
|
43
|
+
* the number of fields in each record of the CSV file will match the number
|
44
|
+
* of cells in the longest row found in the Excel workbook. Any short records
|
45
|
+
* will be 'padded' with empty fields - an empty field is represented in the
|
46
|
+
* the CSV file in this way - ,,.</li>
|
47
|
+
* <li>3. Empty fields will represent missing cells.</li>
|
48
|
+
* <li>4. A record consisting of empty fields will be used to represent an empty row
|
49
|
+
* in the Excel workbook.</li>
|
50
|
+
* </list>
|
51
|
+
* Therefore, if the worksheet looked like this;
|
52
|
+
*
|
53
|
+
* <pre>
|
54
|
+
* ___________________________________________
|
55
|
+
* | | | | | |
|
56
|
+
* | A | B | C | D | E |
|
57
|
+
* ___|_______|_______|_______|_______|_______|
|
58
|
+
* | | | | | |
|
59
|
+
* 1 | 1 | 2 | 3 | 4 | 5 |
|
60
|
+
* ___|_______|_______|_______|_______|_______|
|
61
|
+
* | | | | | |
|
62
|
+
* 2 | | | | | |
|
63
|
+
* ___|_______|_______|_______|_______|_______|
|
64
|
+
* | | | | | |
|
65
|
+
* 3 | | A | | B | |
|
66
|
+
* ___|_______|_______|_______|_______|_______|
|
67
|
+
* | | | | | |
|
68
|
+
* 4 | | | | | Z |
|
69
|
+
* ___|_______|_______|_______|_______|_______|
|
70
|
+
* | | | | | |
|
71
|
+
* 5 | 1,400 | | 250 | | |
|
72
|
+
* ___|_______|_______|_______|_______|_______|
|
73
|
+
*
|
74
|
+
* </pre>
|
75
|
+
*
|
76
|
+
* Then, the resulting CSV file will contain the following lines (records);
|
77
|
+
* <pre>
|
78
|
+
* 1,2,3,4,5
|
79
|
+
* ,,,,
|
80
|
+
* ,A,,B,
|
81
|
+
* ,,,,Z
|
82
|
+
* "1,400",,250,,
|
83
|
+
* </pre><p>
|
84
|
+
* Typically, the comma is used to separate each of the fields that, together,
|
85
|
+
* constitute a single record or line within the CSV file. This is not however
|
86
|
+
* a hard and fast rule and so this class allows the user to determine which
|
87
|
+
* character is used as the field separator and assumes the comma if none other
|
88
|
+
* is specified.
|
89
|
+
* </p><p>
|
90
|
+
* If a field contains the separator then it will be escaped. If the file should
|
91
|
+
* obey Excel's CSV formatting rules, then the field will be surrounded with
|
92
|
+
* speech marks whilst if it should obey UNIX conventions, each occurrence of
|
93
|
+
* the separator will be preceded by the backslash character.
|
94
|
+
* </p><p>
|
95
|
+
* If a field contains an end of line (EOL) character then it too will be
|
96
|
+
* escaped. If the file should obey Excel's CSV formatting rules then the field
|
97
|
+
* will again be surrounded by speech marks. On the other hand, if the file
|
98
|
+
* should follow UNIX conventions then a single backslash will precede the
|
99
|
+
* EOL character. There is no single applicable standard for UNIX and some
|
100
|
+
* applications replace the CR with \r and the LF with \n but this class will
|
101
|
+
* not do so.
|
102
|
+
* </p><p>
|
103
|
+
* If the field contains double quotes then that character will be escaped. It
|
104
|
+
* seems as though UNIX does not define a standard for this whilst Excel does.
|
105
|
+
* Should the CSV file have to obey Excel's formating rules then the speech
|
106
|
+
* mark character will be escaped with a second set of speech marks. Finally, an
|
107
|
+
* enclosing set of speech marks will also surround the entire field. Thus, if
|
108
|
+
* the following line of text appeared in a cell - "Hello" he said - it would
|
109
|
+
* look like this when converted into a field within a CSV file - """Hello"" he
|
110
|
+
* said".
|
111
|
+
* </p><p>
|
112
|
+
* Finally, it is worth noting that talk of CSV 'standards' is really slightly
|
113
|
+
* miss-leading as there is no such thing. It may well be that the code in this
|
114
|
+
* class has to be modified to produce files to suit a specific application
|
115
|
+
* or requirement.
|
116
|
+
* </p>
|
117
|
+
* @author Mark B
|
118
|
+
* @version 1.00 9th April 2010
|
119
|
+
* 1.10 13th April 2010 - Added support for processing all Excel
|
120
|
+
* workbooks in a folder along with the ability
|
121
|
+
* to specify a field separator character.
|
122
|
+
* 2.00 14th April 2010 - Added support for embedded characters; the
|
123
|
+
* field separator, EOL and double quotes or
|
124
|
+
* speech marks. In addition, gave the client
|
125
|
+
* the ability to select how these are handled,
|
126
|
+
* either obeying Excel's or UNIX formatting
|
127
|
+
* conventions.
|
128
|
+
*/
|
129
|
+
public class ToCSV {
|
130
|
+
|
131
|
+
private Workbook workbook = null;
|
132
|
+
private ArrayList<ArrayList<String>> csvData = null;
|
133
|
+
private int maxRowWidth = 0;
|
134
|
+
private int formattingConvention = 0;
|
135
|
+
private DataFormatter formatter = null;
|
136
|
+
private FormulaEvaluator evaluator = null;
|
137
|
+
private String separator = null;
|
138
|
+
|
139
|
+
private static final String CSV_FILE_EXTENSION = ".csv";
|
140
|
+
private static final String DEFAULT_SEPARATOR = ",";
|
141
|
+
|
142
|
+
/**
|
143
|
+
* Identifies that the CSV file should obey Excel's formatting conventions
|
144
|
+
* with regard to escaping certain embedded characters - the field separator,
|
145
|
+
* speech mark and end of line (EOL) character
|
146
|
+
*/
|
147
|
+
public static final int EXCEL_STYLE_ESCAPING = 0;
|
148
|
+
|
149
|
+
/**
|
150
|
+
* Identifies that the CSV file should obey UNIX formatting conventions
|
151
|
+
* with regard to escaping certain embedded characters - the field separator
|
152
|
+
* and end of line (EOL) character
|
153
|
+
*/
|
154
|
+
public static final int UNIX_STYLE_ESCAPING = 1;
|
155
|
+
|
156
|
+
/**
|
157
|
+
* Process the contents of a folder, convert the contents of each Excel
|
158
|
+
* workbook into CSV format and save the resulting file to the specified
|
159
|
+
* folder using the same name as the original workbook with the .xls or
|
160
|
+
* .xlsx extension replaced by .csv. This method will ensure that the
|
161
|
+
* CSV file created contains the comma field separator and that embedded
|
162
|
+
* characters such as the field separator, the EOL and double quotes are
|
163
|
+
* escaped in accordance with Excel's convention.
|
164
|
+
*
|
165
|
+
* @param strSource An instance of the String class that encapsulates the
|
166
|
+
* name of and path to either a folder containing those Excel
|
167
|
+
* workbook(s) or the name of and path to an individual Excel workbook
|
168
|
+
* that is/are to be converted.
|
169
|
+
* @param strDestination An instance of the String class encapsulating the
|
170
|
+
* name of and path to a folder that will contain the resulting CSV
|
171
|
+
* files.
|
172
|
+
* @throws java.io.FileNotFoundException Thrown if any file cannot be located
|
173
|
+
* on the file system during processing.
|
174
|
+
* @throws java.io.IOException Thrown if the file system encounters any
|
175
|
+
* problems during processing.
|
176
|
+
* @throws java.lang.IllegalArgumentException Thrown if the values passed
|
177
|
+
* to the strSource parameter refers to a file or folder that does not
|
178
|
+
* exist or if the value passed to the strDestination parameter refers
|
179
|
+
* to a folder that does not exist or simply does not refer to a
|
180
|
+
* folder.
|
181
|
+
* @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown
|
182
|
+
* if the xml markup encountered whilst parsing a SpreadsheetML
|
183
|
+
* file (.xlsx) is invalid.
|
184
|
+
*/
|
185
|
+
public void convertExcelToCSV(String strSource, String strDestination)
|
186
|
+
throws FileNotFoundException, IOException,
|
187
|
+
IllegalArgumentException, InvalidFormatException {
|
188
|
+
|
189
|
+
// Simply chain the call to the overloaded convertExcelToCSV(String,
|
190
|
+
// String, String, int) method, pass the default separator and ensure
|
191
|
+
// that certain embedded characters are escaped in accordance with
|
192
|
+
// Excel's formatting conventions
|
193
|
+
this.convertExcelToCSV(strSource, strDestination,
|
194
|
+
ToCSV.DEFAULT_SEPARATOR, ToCSV.EXCEL_STYLE_ESCAPING);
|
195
|
+
}
|
196
|
+
|
197
|
+
/**
|
198
|
+
* Process the contents of a folder, convert the contents of each Excel
|
199
|
+
* workbook into CSV format and save the resulting file to the specified
|
200
|
+
* folder using the same name as the original workbook with the .xls or
|
201
|
+
* .xlsx extension replaced by .csv. This method allows the client to
|
202
|
+
* define the field separator but will ensure that embedded characters such
|
203
|
+
* as the field separator, the EOL and double quotes are escaped in
|
204
|
+
* accordance with Excel's convention.
|
205
|
+
*
|
206
|
+
* @param strSource An instance of the String class that encapsulates the
|
207
|
+
* name of and path to either a folder containing those Excel
|
208
|
+
* workbook(s) or the name of and path to an individual Excel workbook
|
209
|
+
* that is/are to be converted.
|
210
|
+
* @param strDestination An instance of the String class encapsulating the
|
211
|
+
* name of and path to a folder that will contain the resulting CSV
|
212
|
+
* files.
|
213
|
+
* @param separator An instance of the String class that encapsulates the
|
214
|
+
* character or characters the client wishes to use as the field
|
215
|
+
* separator.
|
216
|
+
* @throws java.io.FileNotFoundException Thrown if any file cannot be located
|
217
|
+
* on the file system during processing.
|
218
|
+
* @throws java.io.IOException Thrown if the file system encounters any
|
219
|
+
* problems during processing.
|
220
|
+
* @throws java.lang.IllegalArgumentException Thrown if the values passed
|
221
|
+
* to the strSource parameter refers to a file or folder that does not
|
222
|
+
* exist or if the value passed to the strDestination parameter refers
|
223
|
+
* to a folder that does not exist or simply does not refer to a
|
224
|
+
* folder.
|
225
|
+
* @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown
|
226
|
+
* if the xml markup encountered whilst parsing a SpreadsheetML
|
227
|
+
* file (.xlsx) is invalid.
|
228
|
+
*/
|
229
|
+
public void convertExcelToCSV(String strSource, String strDestination,
|
230
|
+
String separator)
|
231
|
+
throws FileNotFoundException, IOException,
|
232
|
+
IllegalArgumentException, InvalidFormatException {
|
233
|
+
|
234
|
+
// Simply chain the call to the overloaded convertExcelToCSV(String,
|
235
|
+
// String, String, int) method and ensure that certain embedded
|
236
|
+
// characters are escaped in accordance with Excel's formatting
|
237
|
+
// conventions
|
238
|
+
this.convertExcelToCSV(strSource, strDestination,
|
239
|
+
separator, ToCSV.EXCEL_STYLE_ESCAPING);
|
240
|
+
}
|
241
|
+
|
242
|
+
/**
|
243
|
+
* Process the contents of a folder, convert the contents of each Excel
|
244
|
+
* workbook into CSV format and save the resulting file to the specified
|
245
|
+
* folder using the same name as the original workbook with the .xls or
|
246
|
+
* .xlsx extension replaced by .csv
|
247
|
+
*
|
248
|
+
* @param strSource An instance of the String class that encapsulates the
|
249
|
+
* name of and path to either a folder containing those Excel
|
250
|
+
* workbook(s) or the name of and path to an individual Excel workbook
|
251
|
+
* that is/are to be converted.
|
252
|
+
* @param strDestination An instance of the String class encapsulating the name
|
253
|
+
* of and path to a folder that will contain the resulting CSV files.
|
254
|
+
* @param formattingConvention A primitive int whose value will determine
|
255
|
+
* whether certain embedded characters should be escaped in accordance
|
256
|
+
* with Excel's or UNIX formatting conventions. Two constants are
|
257
|
+
* defined to support this option; ToCSV.EXCEL_STYLE_ESCAPING and
|
258
|
+
* ToCSV.UNIX_STYLE_ESCAPING
|
259
|
+
* @param separator An instance of the String class encapsulating the
|
260
|
+
* characters or characters that should be used to separate items
|
261
|
+
* on a line within the CSV file.
|
262
|
+
* @throws java.io.FileNotFoundException Thrown if any file cannot be located
|
263
|
+
* on the file system during processing.
|
264
|
+
* @throws java.io.IOException Thrown if the file system encounters any
|
265
|
+
* problems during processing.
|
266
|
+
* @throws java.lang.IllegalArgumentException Thrown if the values passed
|
267
|
+
* to the strSource parameter refers to a file or folder that does not
|
268
|
+
* exist, if the value passed to the strDestination parameter refers
|
269
|
+
* to a folder that does not exist, if the value passed to the
|
270
|
+
* strDestination parameter does not refer to a folder or if the
|
271
|
+
* value passed to the formattingConvention parameter is other than
|
272
|
+
* one of the values defined by the constants ToCSV.EXCEL_STYLE_ESCAPING
|
273
|
+
* and ToCSV.UNIX_STYLE_ESCAPING.
|
274
|
+
* @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown
|
275
|
+
* if the xml markup encountered whilst parsing a SpreadsheetML
|
276
|
+
* file (.xlsx) is invalid.
|
277
|
+
*/
|
278
|
+
public void convertExcelToCSV(String strSource, String strDestination,
|
279
|
+
String separator, int formattingConvention)
|
280
|
+
throws FileNotFoundException, IOException,
|
281
|
+
IllegalArgumentException, InvalidFormatException {
|
282
|
+
File source = new File(strSource);
|
283
|
+
File destination = new File(strDestination);
|
284
|
+
File[] filesList = null;
|
285
|
+
String destinationFilename = null;
|
286
|
+
|
287
|
+
// Check that the source file/folder exists.
|
288
|
+
if(!source.exists()) {
|
289
|
+
throw new IllegalArgumentException("The source for the Excel " +
|
290
|
+
"file(s) cannot be found.");
|
291
|
+
}
|
292
|
+
|
293
|
+
// Ensure that the folder the user has chosen to save the CSV files
|
294
|
+
// away into firstly exists and secondly is a folder rather than, for
|
295
|
+
// instance, a data file.
|
296
|
+
if(!destination.exists()) {
|
297
|
+
throw new IllegalArgumentException("The folder/directory for the " +
|
298
|
+
"converted CSV file(s) does not exist.");
|
299
|
+
}
|
300
|
+
if(!destination.isDirectory()) {
|
301
|
+
throw new IllegalArgumentException("The destination for the CSV " +
|
302
|
+
"file(s) is not a directory/folder.");
|
303
|
+
}
|
304
|
+
|
305
|
+
// Ensure the value passed to the formattingConvention parameter is
|
306
|
+
// within range.
|
307
|
+
if(formattingConvention != ToCSV.EXCEL_STYLE_ESCAPING &&
|
308
|
+
formattingConvention != ToCSV.UNIX_STYLE_ESCAPING) {
|
309
|
+
throw new IllegalArgumentException("The value passed to the " +
|
310
|
+
"formattingConvention parameter is out of range.");
|
311
|
+
}
|
312
|
+
|
313
|
+
// Copy the separator character and formatting convention into local
|
314
|
+
// variables for use in other methods.
|
315
|
+
this.separator = separator;
|
316
|
+
this.formattingConvention = formattingConvention;
|
317
|
+
|
318
|
+
// Check to see if the sourceFolder variable holds a reference to
|
319
|
+
// a file or a folder full of files.
|
320
|
+
if(source.isDirectory()) {
|
321
|
+
// Get a list of all of the Excel spreadsheet files (workbooks) in
|
322
|
+
// the source folder/directory
|
323
|
+
filesList = source.listFiles(new ExcelFilenameFilter());
|
324
|
+
}
|
325
|
+
else {
|
326
|
+
// Assume that it must be a file handle - although there are other
|
327
|
+
// options the code should perhaps check - and store the reference
|
328
|
+
// into the filesList variable.
|
329
|
+
filesList = new File[]{source};
|
330
|
+
}
|
331
|
+
|
332
|
+
// Step through each of the files in the source folder and for each
|
333
|
+
// open the workbook, convert it's contents to CSV format and then
|
334
|
+
// save the resulting file away into the folder specified by the
|
335
|
+
// contents of the destination variable. Note that the name of the
|
336
|
+
// csv file will be created by taking the name of the Excel file,
|
337
|
+
// removing the extension and replacing it with .csv. Note that there
|
338
|
+
// is one drawback with this approach; if the folder holding the files
|
339
|
+
// contains two workbooks whose names match but one is a binary file
|
340
|
+
// (.xls) and the other a SpreadsheetML file (.xlsx), then the names
|
341
|
+
// for both CSV files will be identical and one CSV file will,
|
342
|
+
// therefore, over-write the other.
|
343
|
+
for(File excelFile : filesList) {
|
344
|
+
// Open the workbook
|
345
|
+
this.openWorkbook(excelFile);
|
346
|
+
|
347
|
+
// Convert it's contents into a CSV file
|
348
|
+
this.convertToCSV();
|
349
|
+
|
350
|
+
// Build the name of the csv folder from that of the Excel workbook.
|
351
|
+
// Simply replace the .xls or .xlsx file extension with .csv
|
352
|
+
destinationFilename = excelFile.getName();
|
353
|
+
destinationFilename = destinationFilename.substring(
|
354
|
+
0, destinationFilename.lastIndexOf(".")) +
|
355
|
+
ToCSV.CSV_FILE_EXTENSION;
|
356
|
+
|
357
|
+
// Save the CSV file away using the newly constructed file name
|
358
|
+
// and to the specified directory.
|
359
|
+
this.saveCSVFile(new File(destination, destinationFilename));
|
360
|
+
}
|
361
|
+
}
|
362
|
+
|
363
|
+
/**
|
364
|
+
* Open an Excel workbook ready for conversion.
|
365
|
+
*
|
366
|
+
* @param file An instance of the File class that encapsulates a handle
|
367
|
+
* to a valid Excel workbook. Note that the workbook can be in
|
368
|
+
* either binary (.xls) or SpreadsheetML (.xlsx) format.
|
369
|
+
* @throws java.io.FileNotFoundException Thrown if the file cannot be located.
|
370
|
+
* @throws java.io.IOException Thrown if a problem occurs in the file system.
|
371
|
+
* @throws org.apache.poi.openxml4j.exceptions.InvalidFormatException Thrown
|
372
|
+
* if invalid xml is found whilst parsing an input SpreadsheetML
|
373
|
+
* file.
|
374
|
+
*/
|
375
|
+
private void openWorkbook(File file) throws FileNotFoundException,
|
376
|
+
IOException, InvalidFormatException {
|
377
|
+
FileInputStream fis = null;
|
378
|
+
try {
|
379
|
+
System.out.println("Opening workbook [" + file.getName() + "]");
|
380
|
+
|
381
|
+
fis = new FileInputStream(file);
|
382
|
+
|
383
|
+
// Open the workbook and then create the FormulaEvaluator and
|
384
|
+
// DataFormatter instances that will be needed to, respectively,
|
385
|
+
// force evaluation of formulae found in cells and create a
|
386
|
+
// formatted String encapsulating the cells contents.
|
387
|
+
this.workbook = WorkbookFactory.create(fis);
|
388
|
+
this.evaluator = this.workbook.getCreationHelper().createFormulaEvaluator();
|
389
|
+
this.formatter = new DataFormatter(true);
|
390
|
+
}
|
391
|
+
finally {
|
392
|
+
if(fis != null) {
|
393
|
+
fis.close();
|
394
|
+
}
|
395
|
+
}
|
396
|
+
}
|
397
|
+
|
398
|
+
/**
|
399
|
+
* Called to convert the contents of the currently opened workbook into
|
400
|
+
* a CSV file.
|
401
|
+
*/
|
402
|
+
private void convertToCSV() {
|
403
|
+
Sheet sheet = null;
|
404
|
+
Row row = null;
|
405
|
+
int lastRowNum = 0;
|
406
|
+
this.csvData = new ArrayList<ArrayList<String>>();
|
407
|
+
|
408
|
+
System.out.println("Converting files contents to CSV format.");
|
409
|
+
|
410
|
+
// Discover how many sheets there are in the workbook....
|
411
|
+
int numSheets = this.workbook.getNumberOfSheets();
|
412
|
+
|
413
|
+
// and then iterate through them.
|
414
|
+
for(int i = 0; i < numSheets; i++) {
|
415
|
+
|
416
|
+
// Get a reference to a sheet and check to see if it contains
|
417
|
+
// any rows.
|
418
|
+
sheet = this.workbook.getSheetAt(i);
|
419
|
+
if(sheet.getPhysicalNumberOfRows() > 0) {
|
420
|
+
|
421
|
+
// Note down the index number of the bottom-most row and
|
422
|
+
// then iterate through all of the rows on the sheet starting
|
423
|
+
// from the very first row - number 1 - even if it is missing.
|
424
|
+
// Recover a reference to the row and then call another method
|
425
|
+
// which will strip the data from the cells and build lines
|
426
|
+
// for inclusion in the resulting CSV file.
|
427
|
+
lastRowNum = sheet.getLastRowNum();
|
428
|
+
for(int j = 0; j <= lastRowNum; j++) {
|
429
|
+
row = sheet.getRow(j);
|
430
|
+
this.rowToCSV(row);
|
431
|
+
}
|
432
|
+
}
|
433
|
+
}
|
434
|
+
}
|
435
|
+
|
436
|
+
/**
|
437
|
+
* Called to actually save the data recovered from the Excel workbook
|
438
|
+
* as a CSV file.
|
439
|
+
*
|
440
|
+
* @param file An instance of the File class that encapsulates a handle
|
441
|
+
* referring to the CSV file.
|
442
|
+
* @throws java.io.FileNotFoundException Thrown if the file cannot be found.
|
443
|
+
* @throws java.io.IOException Thrown to indicate and error occurred in the
|
444
|
+
* underlying file system.
|
445
|
+
*/
|
446
|
+
private void saveCSVFile(File file)
|
447
|
+
throws FileNotFoundException, IOException {
|
448
|
+
FileWriter fw = null;
|
449
|
+
BufferedWriter bw = null;
|
450
|
+
ArrayList<String> line = null;
|
451
|
+
StringBuffer buffer = null;
|
452
|
+
String csvLineElement = null;
|
453
|
+
try {
|
454
|
+
|
455
|
+
System.out.println("Saving the CSV file [" + file.getName() + "]");
|
456
|
+
|
457
|
+
// Open a writer onto the CSV file.
|
458
|
+
fw = new FileWriter(file);
|
459
|
+
bw = new BufferedWriter(fw);
|
460
|
+
|
461
|
+
// Step through the elements of the ArrayList that was used to hold
|
462
|
+
// all of the data recovered from the Excel workbooks' sheets, rows
|
463
|
+
// and cells.
|
464
|
+
for(int i = 0; i < this.csvData.size(); i++) {
|
465
|
+
buffer = new StringBuffer();
|
466
|
+
|
467
|
+
// Get an element from the ArrayList that contains the data for
|
468
|
+
// the workbook. This element will itself be an ArrayList
|
469
|
+
// containing Strings and each String will hold the data recovered
|
470
|
+
// from a single cell. The for() loop is used to recover elements
|
471
|
+
// from this 'row' ArrayList one at a time and to write the Strings
|
472
|
+
// away to a StringBuffer thus assembling a single line for inclusion
|
473
|
+
// in the CSV file. If a row was empty or if it was short, then
|
474
|
+
// the ArrayList that contains it's data will also be shorter than
|
475
|
+
// some of the others. Therefore, it is necessary to check within
|
476
|
+
// the for loop to ensure that the ArrayList contains data to be
|
477
|
+
// processed. If it does, then an element will be recovered and
|
478
|
+
// appended to the StringBuffer.
|
479
|
+
line = this.csvData.get(i);
|
480
|
+
for(int j = 0; j < this.maxRowWidth; j++) {
|
481
|
+
if(line.size() > j) {
|
482
|
+
csvLineElement = line.get(j);
|
483
|
+
if(csvLineElement != null) {
|
484
|
+
buffer.append(this.escapeEmbeddedCharacters(
|
485
|
+
csvLineElement));
|
486
|
+
}
|
487
|
+
}
|
488
|
+
if(j < (this.maxRowWidth - 1)) {
|
489
|
+
buffer.append(this.separator);
|
490
|
+
}
|
491
|
+
}
|
492
|
+
|
493
|
+
// Once the line is built, write it away to the CSV file.
|
494
|
+
bw.write(buffer.toString().trim());
|
495
|
+
|
496
|
+
// Condition the inclusion of new line characters so as to
|
497
|
+
// avoid an additional, superfluous, new line at the end of
|
498
|
+
// the file.
|
499
|
+
if(i < (this.csvData.size() - 1)) {
|
500
|
+
bw.newLine();
|
501
|
+
}
|
502
|
+
}
|
503
|
+
}
|
504
|
+
finally {
|
505
|
+
if(bw != null) {
|
506
|
+
bw.flush();
|
507
|
+
bw.close();
|
508
|
+
}
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
/**
|
513
|
+
* Called to convert a row of cells into a line of data that can later be
|
514
|
+
* output to the CSV file.
|
515
|
+
*
|
516
|
+
* @param row An instance of either the HSSFRow or XSSFRow classes that
|
517
|
+
* encapsulates information about a row of cells recovered from
|
518
|
+
* an Excel workbook.
|
519
|
+
*/
|
520
|
+
private void rowToCSV(Row row) {
|
521
|
+
Cell cell = null;
|
522
|
+
int lastCellNum = 0;
|
523
|
+
ArrayList<String> csvLine = new ArrayList<String>();
|
524
|
+
|
525
|
+
// Check to ensure that a row was recovered from the sheet as it is
|
526
|
+
// possible that one or more rows between other populated rows could be
|
527
|
+
// missing - blank. If the row does contain cells then...
|
528
|
+
if(row != null) {
|
529
|
+
|
530
|
+
// Get the index for the right most cell on the row and then
|
531
|
+
// step along the row from left to right recovering the contents
|
532
|
+
// of each cell, converting that into a formatted String and
|
533
|
+
// then storing the String into the csvLine ArrayList.
|
534
|
+
lastCellNum = row.getLastCellNum();
|
535
|
+
for(int i = 0; i <= lastCellNum; i++) {
|
536
|
+
cell = row.getCell(i);
|
537
|
+
if(cell == null) {
|
538
|
+
csvLine.add("");
|
539
|
+
}
|
540
|
+
else {
|
541
|
+
if(cell.getCellType() != Cell.CELL_TYPE_FORMULA) {
|
542
|
+
csvLine.add(this.formatter.formatCellValue(cell));
|
543
|
+
}
|
544
|
+
else {
|
545
|
+
csvLine.add(this.formatter.formatCellValue(cell, this.evaluator));
|
546
|
+
}
|
547
|
+
}
|
548
|
+
}
|
549
|
+
// Make a note of the index number of the right most cell. This value
|
550
|
+
// will later be used to ensure that the matrix of data in the CSV file
|
551
|
+
// is square.
|
552
|
+
if(lastCellNum > this.maxRowWidth) {
|
553
|
+
this.maxRowWidth = lastCellNum;
|
554
|
+
}
|
555
|
+
}
|
556
|
+
this.csvData.add(csvLine);
|
557
|
+
}
|
558
|
+
|
559
|
+
/**
|
560
|
+
* Checks to see whether the field - which consists of the formatted
|
561
|
+
* contents of an Excel worksheet cell encapsulated within a String - contains
|
562
|
+
* any embedded characters that must be escaped. The method is able to
|
563
|
+
* comply with either Excel's or UNIX formatting conventions in the
|
564
|
+
* following manner;
|
565
|
+
*
|
566
|
+
* With regard to UNIX conventions, if the field contains any embedded
|
567
|
+
* field separator or EOL characters they will each be escaped by prefixing
|
568
|
+
* a leading backspace character. These are the only changes that have yet
|
569
|
+
* emerged following some research as being required.
|
570
|
+
*
|
571
|
+
* Excel has other embedded character escaping requirements, some that emerged
|
572
|
+
* from empirical testing, other through research. Firstly, with regards to
|
573
|
+
* any embedded speech marks ("), each occurrence should be escaped with
|
574
|
+
* another speech mark and the whole field then surrounded with speech marks.
|
575
|
+
* Thus if a field holds <em>"Hello" he said</em> then it should be modified
|
576
|
+
* to appear as <em>"""Hello"" he said"</em>. Furthermore, if the field
|
577
|
+
* contains either embedded separator or EOL characters, it should also
|
578
|
+
* be surrounded with speech marks. As a result <em>1,400</em> would become
|
579
|
+
* <em>"1,400"</em> assuming that the comma is the required field separator.
|
580
|
+
* This has one consequence in, if a field contains embedded speech marks
|
581
|
+
* and embedded separator characters, checks for both are not required as the
|
582
|
+
* additional set of speech marks that should be placed around any field
|
583
|
+
* containing embedded speech marks will also account for the embedded
|
584
|
+
* separator.
|
585
|
+
*
|
586
|
+
* It is worth making one further note with regard to embedded EOL
|
587
|
+
* characters. If the data in a worksheet is exported as a CSV file using
|
588
|
+
* Excel itself, then the field will be surrounded with speech marks. If the
|
589
|
+
* resulting CSV file is then re-imports into another worksheet, the EOL
|
590
|
+
* character will result in the original single field occupying more than
|
591
|
+
* one cell. This same 'feature' is replicated in this classes behaviour.
|
592
|
+
*
|
593
|
+
* @param field An instance of the String class encapsulating the formatted
|
594
|
+
* contents of a cell on an Excel worksheet.
|
595
|
+
* @return A String that encapsulates the formatted contents of that
|
596
|
+
* Excel worksheet cell but with any embedded separator, EOL or
|
597
|
+
* speech mark characters correctly escaped.
|
598
|
+
*/
|
599
|
+
private String escapeEmbeddedCharacters(String field) {
|
600
|
+
StringBuffer buffer = null;
|
601
|
+
|
602
|
+
// If the fields contents should be formatted to confirm with Excel's
|
603
|
+
// convention....
|
604
|
+
if(this.formattingConvention == ToCSV.EXCEL_STYLE_ESCAPING) {
|
605
|
+
|
606
|
+
// Firstly, check if there are any speech marks (") in the field;
|
607
|
+
// each occurrence must be escaped with another set of speech marks
|
608
|
+
// and then the entire field should be enclosed within another
|
609
|
+
// set of speech marks. Thus, "Yes" he said would become
|
610
|
+
// """Yes"" he said"
|
611
|
+
if(field.contains("\"")) {
|
612
|
+
buffer = new StringBuffer(field.replaceAll("\"", "\\\"\\\""));
|
613
|
+
buffer.insert(0, "\"");
|
614
|
+
buffer.append("\"");
|
615
|
+
}
|
616
|
+
else {
|
617
|
+
// If the field contains either embedded separator or EOL
|
618
|
+
// characters, then escape the whole field by surrounding it
|
619
|
+
// with speech marks.
|
620
|
+
buffer = new StringBuffer(field);
|
621
|
+
if((buffer.indexOf(this.separator)) > -1 ||
|
622
|
+
(buffer.indexOf("\n")) > -1) {
|
623
|
+
buffer.insert(0, "\"");
|
624
|
+
buffer.append("\"");
|
625
|
+
}
|
626
|
+
}
|
627
|
+
return(buffer.toString().trim());
|
628
|
+
}
|
629
|
+
// The only other formatting convention this class obeys is the UNIX one
|
630
|
+
// where any occurrence of the field separator or EOL character will
|
631
|
+
// be escaped by preceding it with a backslash.
|
632
|
+
else {
|
633
|
+
if(field.contains(this.separator)) {
|
634
|
+
field = field.replaceAll(this.separator, ("\\\\" + this.separator));
|
635
|
+
}
|
636
|
+
if(field.contains("\n")) {
|
637
|
+
field = field.replaceAll("\n", "\\\\\n");
|
638
|
+
}
|
639
|
+
return(field);
|
640
|
+
}
|
641
|
+
}
|
642
|
+
|
643
|
+
/**
|
644
|
+
* The main() method contains code that demonstrates how to use the class.
|
645
|
+
*
|
646
|
+
* @param args An array containing zero, one or more elements all of type
|
647
|
+
* String. Each element will encapsulate an argument specified by the
|
648
|
+
* user when running the program from the command prompt.
|
649
|
+
*/
|
650
|
+
public static void main(String[] args) {
|
651
|
+
// Check the number of arguments passed to the main method. There
|
652
|
+
// must be two, three or four; the name of and path to either the folder
|
653
|
+
// containing the Excel files or an individual Excel workbook that is/are
|
654
|
+
// to be converted, the name of and path to the folder to which the CSV
|
655
|
+
// files should be written, - optionally - the separator character
|
656
|
+
// that should be used to separate individual items (fields) on the
|
657
|
+
// lines (records) of the CSV file and - again optionally - an integer
|
658
|
+
// that indicates whether the CSV file ought to obey Excel's or UNIX
|
659
|
+
// conventions with regard to formatting fields that contain embedded
|
660
|
+
// separator, Speech mark or EOL character(s).
|
661
|
+
//
|
662
|
+
// Note that the names of the CSV files will be derived from those
|
663
|
+
// of the Excel file(s). Put simply the .xls or .xlsx extension will be
|
664
|
+
// replaced with .csv. Therefore, if the source folder contains files
|
665
|
+
// with matching names but different extensions - Test.xls and Test.xlsx
|
666
|
+
// for example - then the CSV file generated from one will overwrite
|
667
|
+
// that generated from the other.
|
668
|
+
ToCSV converter = null;
|
669
|
+
try {
|
670
|
+
converter = new ToCSV();
|
671
|
+
if(args.length == 2) {
|
672
|
+
// Just the Source File/Folder and Destination Folder were
|
673
|
+
// passed to the main method.
|
674
|
+
converter.convertExcelToCSV(args[0], args[1]);
|
675
|
+
}
|
676
|
+
else if(args.length == 3){
|
677
|
+
// The Source File/Folder, Destination Folder and Separator
|
678
|
+
// were passed to the main method.
|
679
|
+
converter.convertExcelToCSV(args[0], args[1], args[2]);
|
680
|
+
}
|
681
|
+
else if(args.length == 4) {
|
682
|
+
// The Source File/Folder, Destination Folder, Separator and
|
683
|
+
// Formatting Convention were passed to the main method.
|
684
|
+
converter.convertExcelToCSV(args[0], args[1],
|
685
|
+
args[2], Integer.parseInt(args[3]));
|
686
|
+
}
|
687
|
+
else {
|
688
|
+
// None or more than four parameters were passed so display
|
689
|
+
//a Usage message.
|
690
|
+
System.out.println("Usage: java ToCSV [Source File/Folder] " +
|
691
|
+
"[Destination Folder] [Separator] [Formatting Convention]\n" +
|
692
|
+
"\tSource File/Folder\tThis argument should contain the name of and\n" +
|
693
|
+
"\t\t\t\tpath to either a single Excel workbook or a\n" +
|
694
|
+
"\t\t\t\tfolder containing one or more Excel workbooks.\n" +
|
695
|
+
"\tDestination Folder\tThe name of and path to the folder that the\n" +
|
696
|
+
"\t\t\t\tCSV files should be written out into. The\n" +
|
697
|
+
"\t\t\t\tfolder must exist before running the ToCSV\n" +
|
698
|
+
"\t\t\t\tcode as it will not check for or create it.\n" +
|
699
|
+
"\tSeparator\t\tOptional. The character or characters that\n" +
|
700
|
+
"\t\t\t\tshould be used to separate fields in the CSV\n" +
|
701
|
+
"\t\t\t\trecord. If no value is passed then the comma\n" +
|
702
|
+
"\t\t\t\twill be assumed.\n" +
|
703
|
+
"\tFormatting Convention\tOptional. This argument can take one of two\n" +
|
704
|
+
"\t\t\t\tvalues. Passing 0 (zero) will result in a CSV\n" +
|
705
|
+
"\t\t\t\tfile that obeys Excel's formatting conventions\n" +
|
706
|
+
"\t\t\t\twhilst passing 1 (one) will result in a file\n" +
|
707
|
+
"\t\t\t\tthat obeys UNIX formatting conventions. If no\n" +
|
708
|
+
"\t\t\t\tvalue is passed, then the CSV file produced\n" +
|
709
|
+
"\t\t\t\twill obey Excel's formatting conventions.");
|
710
|
+
}
|
711
|
+
}
|
712
|
+
// It is not wise to have such a wide catch clause - Exception is very
|
713
|
+
// close to being at the top of the inheritance hierarchy - though it
|
714
|
+
// will suffice for this example as it is really not possible to recover
|
715
|
+
// easily from an exceptional set of circumstances at this point in the
|
716
|
+
// program. It should however, ideally be replaced with one or more
|
717
|
+
// catch clauses optimised to handle more specific problems.
|
718
|
+
catch(Exception ex) {
|
719
|
+
System.out.println("Caught an: " + ex.getClass().getName());
|
720
|
+
System.out.println("Message: " + ex.getMessage());
|
721
|
+
System.out.println("Stacktrace follows:.....");
|
722
|
+
ex.printStackTrace(System.out);
|
723
|
+
}
|
724
|
+
}
|
725
|
+
|
726
|
+
/**
|
727
|
+
* An instance of this class can be used to control the files returned
|
728
|
+
* be a call to the listFiles() method when made on an instance of the
|
729
|
+
* File class and that object refers to a folder/directory
|
730
|
+
*/
|
731
|
+
class ExcelFilenameFilter implements FilenameFilter {
|
732
|
+
|
733
|
+
/**
|
734
|
+
* Determine those files that will be returned by a call to the
|
735
|
+
* listFiles() method. In this case, the name of the file must end with
|
736
|
+
* either of the following two extension; '.xls' or '.xlsx'. For the
|
737
|
+
* future, it is very possible to parameterise this and allow the
|
738
|
+
* containing class to pass, for example, an array of Strings to this
|
739
|
+
* class on instantiation. Each element in that array could encapsulate
|
740
|
+
* a valid file extension - '.xls', '.xlsx', '.xlt', '.xlst', etc. These
|
741
|
+
* could then be used to control which files were returned by the call
|
742
|
+
* to the listFiles() method.
|
743
|
+
*
|
744
|
+
* @param file An instance of the File class that encapsulates a handle
|
745
|
+
* referring to the folder/directory that contains the file.
|
746
|
+
* @param name An instance of the String class that encapsulates the
|
747
|
+
* name of the file.
|
748
|
+
* @return A boolean value that indicates whether the file should be
|
749
|
+
* included in the array returned by the call to the listFiles()
|
750
|
+
* method. In this case true will be returned if the name of the
|
751
|
+
* file ends with either '.xls' or '.xlsx' and false will be
|
752
|
+
* returned in all other instances.
|
753
|
+
*/
|
754
|
+
public boolean accept(File file, String name) {
|
755
|
+
return(name.endsWith(".xls") || name.endsWith(".xlsx"));
|
756
|
+
}
|
757
|
+
}
|
758
|
+
}
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: poi2csv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Douglas English
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-07-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Converts Excel .xls and .xlsx files to CSV.
|
42
|
+
email:
|
43
|
+
- douglas.english@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .DS_Store
|
49
|
+
- .gitignore
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- build.xml
|
55
|
+
- classes/ToCSV$ExcelFilenameFilter.class
|
56
|
+
- classes/ToCSV.class
|
57
|
+
- lib/.DS_Store
|
58
|
+
- lib/commons-codec-1.5.jar
|
59
|
+
- lib/commons-logging-1.1.jar
|
60
|
+
- lib/dom4j-1.6.1.jar
|
61
|
+
- lib/junit-3.8.1.jar
|
62
|
+
- lib/log4j-1.2.13.jar
|
63
|
+
- lib/poi-3.9-20121203.jar
|
64
|
+
- lib/poi-examples-3.9-20121203.jar
|
65
|
+
- lib/poi-excelant-3.9-20121203.jar
|
66
|
+
- lib/poi-ooxml-3.9-20121203.jar
|
67
|
+
- lib/poi-ooxml-schemas-3.9-20121203.jar
|
68
|
+
- lib/poi-scratchpad-3.9-20121203.jar
|
69
|
+
- lib/poi2csv.rb
|
70
|
+
- lib/poi2csv/version.rb
|
71
|
+
- lib/stax-api-1.0.1.jar
|
72
|
+
- lib/xmlbeans-2.3.0.jar
|
73
|
+
- poi2csv.gemspec
|
74
|
+
- src/ToCSV.java
|
75
|
+
homepage: https://github.com/denglish/poi2csv
|
76
|
+
licenses:
|
77
|
+
- MIT
|
78
|
+
metadata: {}
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options: []
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
requirements: []
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 2.0.3
|
96
|
+
signing_key:
|
97
|
+
specification_version: 4
|
98
|
+
summary: This GEM provides a wrapper to the http://poi.apache.org/ library for converting
|
99
|
+
Excel (.xls and .xlsx) files to CSV.
|
100
|
+
test_files: []
|