exceltocsv 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +22 -0
- data/LICENSE +22 -0
- data/README.md +66 -0
- data/exceltocsv.gemspec +26 -0
- data/lib/exceltocsv.rb +30 -0
- data/lib/exceltocsv/cross_platform_excel.rb +32 -0
- data/lib/exceltocsv/excel_app_wrapper.rb +28 -0
- data/lib/exceltocsv/excel_file.rb +278 -0
- data/lib/exceltocsv/os.rb +27 -0
- data/lib/exceltocsv/version.rb +16 -0
- data/lib/exceltocsv/win_excel.rb +42 -0
- data/rakefile.rb +48 -0
- data/spec/data/commastrings.xls +0 -0
- data/spec/data/decimals.xls +0 -0
- data/spec/data/normaldate.xls +0 -0
- data/spec/data/test1.xls +0 -0
- data/spec/data/textdate.xls +0 -0
- data/spec/excel_file_spec.rb +88 -0
- data/spec/spec_helper.rb +74 -0
- metadata +116 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: aec25d93d88e6a500e0cea8ef82c0b893c91b217
|
4
|
+
data.tar.gz: 1a79d4af66f0a6b1b1c8c442b4ac10f0fa14dae2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 71832e1729fec64b422478a6852435ea069f98b4e0ad1f973d33f06d6b4f3c4dff4e8129b7dbcd96468354dc91275b0c94904353044f3626509ae818c60b11df
|
7
|
+
data.tar.gz: 6f56205f61d7abc823b27566babe3969aa15260d9292ee639840cc585bcca0a22aed674d7fb5fca6451095abbce8ed4b2c828be2e1b5c4f7a957349fa6a91e1e
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
exceltocsv (0.2.0)
|
5
|
+
spreadsheet
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
rake (10.3.2)
|
11
|
+
ruby-ole (1.2.11.7)
|
12
|
+
spreadsheet (0.9.7)
|
13
|
+
ruby-ole (>= 1.0)
|
14
|
+
|
15
|
+
PLATFORMS
|
16
|
+
ruby
|
17
|
+
x86-mingw32
|
18
|
+
|
19
|
+
DEPENDENCIES
|
20
|
+
bundler (~> 1.3)
|
21
|
+
exceltocsv!
|
22
|
+
rake
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Jeff McAffee
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# ExcelToCsv::ExcelFile
|
2
|
+
|
3
|
+
ExcelFile is a file converter to convert Excel spreadsheets to CSV files.
|
4
|
+
It is specifically designed for the criteria required to generate properly
|
5
|
+
formated CSV files for use with [GDLC](https://github.com/jmcaffee/gdlc).
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
Quick example:
|
10
|
+
|
11
|
+
require 'exceltocsv'
|
12
|
+
|
13
|
+
converter = ExcelToCsv::ExcelFile.new
|
14
|
+
converter.xl_to_csv( 'path/to/input.xls', 'path/to/output.csv' )
|
15
|
+
|
16
|
+
Example rake task that updates (converts xls files) csvs based on last modified
|
17
|
+
date of each file within a directory structure.
|
18
|
+
|
19
|
+
###### plk.rake
|
20
|
+
|
21
|
+
require 'exceltocsv'
|
22
|
+
|
23
|
+
desc "Update CSV files from XLS source"
|
24
|
+
task :update do
|
25
|
+
plks = FileList['plk/xls/**/*.xls']
|
26
|
+
|
27
|
+
# Pathmap string maps to csv dir with csv target file
|
28
|
+
pm = "%{^plk/xls,plk/csv;.xls$,.csv;.xlsx$,.csv}p"
|
29
|
+
# Remove any source files when the dest file exists and is newer.
|
30
|
+
plks.delete_if do |s|
|
31
|
+
# Downcase the path,
|
32
|
+
d = s.pathmap( pm ).downcase
|
33
|
+
# and snakecase the target filename.
|
34
|
+
d = snakecase_filename(d)
|
35
|
+
File.exists?(d) && File.stat(s).mtime <= File.stat(d).mtime
|
36
|
+
end
|
37
|
+
|
38
|
+
target_csvs = plks.pathmap( pm )
|
39
|
+
|
40
|
+
# I want the target filenames normalized to lower case.
|
41
|
+
target_csvs.each { |p| p.downcase! }
|
42
|
+
|
43
|
+
# Create all target dirs
|
44
|
+
target_dirs = target_csvs.pathmap("%d")
|
45
|
+
target_dirs.uniq!
|
46
|
+
mkdir_p target_dirs
|
47
|
+
|
48
|
+
# Convert all newer XL files to CSVs.
|
49
|
+
# Note that this method only converts the first sheet in the workbook.
|
50
|
+
converter = ExcelToCsv::ExcelFile.new
|
51
|
+
plks.each do |x|
|
52
|
+
converter.xl_to_csv(x, snakecase_filename(x.pathmap(pm).downcase))
|
53
|
+
end
|
54
|
+
|
55
|
+
puts "All target files are up to date" if plks.empty?
|
56
|
+
end
|
57
|
+
|
58
|
+
def snakecase_filename(filepath)
|
59
|
+
snake_file_path = File.join(filepath.pathmap("%d"), filepath.pathmap("%n").snakecase + filepath.pathmap("%x"))
|
60
|
+
end
|
61
|
+
|
62
|
+
## License
|
63
|
+
|
64
|
+
See [LICENSE](https://github.com/jmcaffee/exceltocsv/blob/master/LICENSE).
|
65
|
+
Website: [http://ktechsystems.com](http://ktechsystems.com)
|
66
|
+
|
data/exceltocsv.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'exceltocsv/version'
|
5
|
+
require 'exceltocsv/os'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = "exceltocsv"
|
9
|
+
spec.version = ExcelToCsv::VERSION
|
10
|
+
spec.authors = ["Jeff McAffee"]
|
11
|
+
spec.email = ["jeff@ktechsystems.com"]
|
12
|
+
spec.description = %q{ExcelToCsv is a utility library for converting Excel files to CSV format.}
|
13
|
+
spec.summary = %q{Utility for converting Excel files to CSV format}
|
14
|
+
spec.homepage = "https://github.com/jmcaffee/exceltocsv"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files`.split($/)
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
23
|
+
spec.add_development_dependency "rake"
|
24
|
+
|
25
|
+
spec.add_runtime_dependency "spreadsheet"
|
26
|
+
end
|
data/lib/exceltocsv.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: exceltocsv.rb
|
3
|
+
# Purpose:: Include file for ExcelToCsv library
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 04/17/2013
|
6
|
+
# Copyright:: Copyright (c) 2013, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
|
10
|
+
require 'find'
|
11
|
+
require 'logger'
|
12
|
+
|
13
|
+
|
14
|
+
$LOG = Logger.new(STDERR)
|
15
|
+
$LOG.level = Logger::ERROR
|
16
|
+
|
17
|
+
if ENV["DEBUG"] == '1'
|
18
|
+
puts "LOGGING: ON due to DEBUG=1"
|
19
|
+
$LOG.level = Logger::DEBUG
|
20
|
+
end
|
21
|
+
|
22
|
+
require "#{File.join( File.dirname(__FILE__), 'exceltocsv','version')}"
|
23
|
+
|
24
|
+
$LOG.info "**********************************************************************"
|
25
|
+
$LOG.info "Logging started for ExcelToCsv library."
|
26
|
+
$LOG.info "**********************************************************************"
|
27
|
+
|
28
|
+
|
29
|
+
require "#{File.join( File.dirname(__FILE__), 'exceltocsv','excel_file')}"
|
30
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: cross_platform_excel.rb
|
3
|
+
# Purpose:: Cross platform Excel binary implementation
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 06/03/2014
|
6
|
+
# Copyright:: Copyright (c) 2014, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
require_relative 'excel_app_wrapper'
|
10
|
+
require 'spreadsheet'
|
11
|
+
|
12
|
+
module ExcelToCsv
|
13
|
+
class CrossPlatformExcel < ExcelAppWrapper
|
14
|
+
def open_workbook(filepath)
|
15
|
+
# Open an Excel file
|
16
|
+
@wb = Spreadsheet.open filepath
|
17
|
+
end
|
18
|
+
|
19
|
+
def worksheet_names
|
20
|
+
worksheets = @wb.worksheets.collect { |w| w.name }
|
21
|
+
end
|
22
|
+
|
23
|
+
def close_workbook
|
24
|
+
# NOP
|
25
|
+
end
|
26
|
+
|
27
|
+
def worksheet_data(worksheet_name)
|
28
|
+
sheet = @wb.worksheet worksheet_name
|
29
|
+
sheet.rows
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: excel_app_wrapper.rb
|
3
|
+
# Purpose:: Excel Application Wrapper base class
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 06/03/2014
|
6
|
+
# Copyright:: Copyright (c) 2014, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
|
10
|
+
module ExcelToCsv
|
11
|
+
class ExcelAppWrapper
|
12
|
+
def open_workbook(filepath)
|
13
|
+
fail 'abstract #open_workbook method must be overridden'
|
14
|
+
end
|
15
|
+
|
16
|
+
def worksheet_names
|
17
|
+
fail 'abstract #worksheet_names method must be overridden'
|
18
|
+
end
|
19
|
+
|
20
|
+
def close_workbook
|
21
|
+
fail 'abstract #close_workbook method must be overridden'
|
22
|
+
end
|
23
|
+
|
24
|
+
def worksheet_data(worksheet_name)
|
25
|
+
fail 'abstract #worksheet_data method must be overridden'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,278 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: excel_file.rb
|
3
|
+
# Purpose:: Convert Excel files to CSV format accounting for formating
|
4
|
+
# This file was originally located in the gdlrakeutils gem
|
5
|
+
# as converttocsv.rb
|
6
|
+
#
|
7
|
+
# Author:: Jeff McAffee 04/17/2013
|
8
|
+
# Copyright:: Copyright (c) 2013, kTech Systems LLC. All rights reserved.
|
9
|
+
# Website:: http://ktechsystems.com
|
10
|
+
##############################################################################
|
11
|
+
|
12
|
+
require_relative 'os'
|
13
|
+
require 'time'
|
14
|
+
require 'csv'
|
15
|
+
require 'bigdecimal'
|
16
|
+
|
17
|
+
|
18
|
+
module ExcelToCsv
|
19
|
+
class ExcelFile
|
20
|
+
|
21
|
+
# If FORCE_WIN_OLE is Y or 1, use the actual Excel application.
|
22
|
+
# NOTE: This will only work on a windows OS!
|
23
|
+
unless ENV['FORCE_WIN_OLE'].nil?
|
24
|
+
FORCE_WIN_OLE = 1 if ENV['FORCE_WIN_OLE'] == '1'
|
25
|
+
FORCE_WIN_OLE = 1 if ENV['FORCE_WIN_OLE'].downcase == 'y'
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize()
|
29
|
+
@date_RE = Regexp.new(/\d{4,4}\/\d{2,2}\/\d{2,2}/)
|
30
|
+
@date_with_dashes_RE = Regexp.new(/\d{4,4}-\d{2,2}-\d{2,2}/)
|
31
|
+
@date_with_time_RE = Regexp.new(/\d{2,2}:\d{2,2}:\d{2,2}/)
|
32
|
+
end
|
33
|
+
|
34
|
+
def xl_app
|
35
|
+
return @xl_app unless @xl_app.nil?
|
36
|
+
if OS.windows? and defined?(FORCE_WIN_OLE)
|
37
|
+
require_relative 'win_excel'
|
38
|
+
@xl_app = WinExcel.new
|
39
|
+
else
|
40
|
+
# CrossPlatformExcel is faster (like, by 30x).
|
41
|
+
require_relative 'cross_platform_excel'
|
42
|
+
@xl_app = CrossPlatformExcel.new
|
43
|
+
end
|
44
|
+
@xl_app
|
45
|
+
end
|
46
|
+
|
47
|
+
def set_flag(flg)
|
48
|
+
if (flg == "-v")
|
49
|
+
@verbose = true
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def verbose?()
|
54
|
+
@verbose ||= false
|
55
|
+
end
|
56
|
+
|
57
|
+
# Convert the 1st sheet in an xls(x) file to a csv file.
|
58
|
+
def xl_to_csv(infile, outfile)
|
59
|
+
filepath = File.expand_path(infile)
|
60
|
+
puts "xl_to_csv: #{infile} => #{outfile}" if verbose?
|
61
|
+
|
62
|
+
unless File.exists?(filepath)
|
63
|
+
puts "Unable to find file."
|
64
|
+
puts " #{filepath}"
|
65
|
+
return
|
66
|
+
end
|
67
|
+
|
68
|
+
# Open an Excel file
|
69
|
+
xl_app.open_workbook filepath
|
70
|
+
|
71
|
+
# Build a list of work sheets to dump to file.
|
72
|
+
sheets_in_file = []
|
73
|
+
|
74
|
+
sheet_saved_count = 0
|
75
|
+
|
76
|
+
xl_app.worksheet_names.each do |sheetname|
|
77
|
+
if( sheetname.match(/CQDS/) || sheetname.match(/PLK/) )
|
78
|
+
sheets_in_file << sheetname
|
79
|
+
puts "Converting sheet #{sheetname}" if verbose?
|
80
|
+
sheet_saved_count += 1
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
if (1 > sheet_saved_count)
|
85
|
+
puts "*** No sheets labeled 'PLK' or 'CQDS' ***"
|
86
|
+
puts "Verify #{infile} is formatted correctly."
|
87
|
+
# Close Excel
|
88
|
+
xl_app.close_workbook
|
89
|
+
return
|
90
|
+
end
|
91
|
+
|
92
|
+
# Write sheet data to file.
|
93
|
+
File.open(outfile, "w") do |f|
|
94
|
+
data = xl_app.worksheet_data(sheets_in_file[0])
|
95
|
+
for row in data
|
96
|
+
row_data = []
|
97
|
+
for a_cell in row
|
98
|
+
row_data << process_cell_value(a_cell)
|
99
|
+
end
|
100
|
+
|
101
|
+
contains_data = false
|
102
|
+
|
103
|
+
# Determine if the row contains any data.
|
104
|
+
for cell in row_data
|
105
|
+
if(cell.match(/[^,\r\n]+/))
|
106
|
+
contains_data = true
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Insert an empty line if the row contains no data.
|
111
|
+
if(true == contains_data)
|
112
|
+
f << row_data.join(",")
|
113
|
+
f << "\n"
|
114
|
+
|
115
|
+
if(true == verbose?)
|
116
|
+
puts "#{row_data}"
|
117
|
+
end
|
118
|
+
|
119
|
+
else
|
120
|
+
f << "\n"
|
121
|
+
|
122
|
+
if(true == verbose?)
|
123
|
+
puts "\n"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Strip empty data from end of lines
|
130
|
+
clean_csv(outfile)
|
131
|
+
|
132
|
+
# Close Excel
|
133
|
+
xl_app.close_workbook
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
def clean_csv(filename)
|
138
|
+
max_row_length = 0
|
139
|
+
CSV.foreach(filename) do |row|
|
140
|
+
row_len = 0
|
141
|
+
i = 0
|
142
|
+
row.each do |item|
|
143
|
+
row_len = i if !item.nil? && !item.empty?
|
144
|
+
i += 1
|
145
|
+
end
|
146
|
+
max_row_length = row_len if row_len > max_row_length
|
147
|
+
end
|
148
|
+
|
149
|
+
puts "Max row length: #{max_row_length.to_s}" if verbose?
|
150
|
+
|
151
|
+
tmp_file = filename.to_s + ".tmp.csv"
|
152
|
+
CSV.open(tmp_file, "wb") do |tmp_csv|
|
153
|
+
# Used to track empty lines
|
154
|
+
empty_found = false
|
155
|
+
|
156
|
+
CSV.foreach(filename) do |row|
|
157
|
+
i = 0
|
158
|
+
clean_row = []
|
159
|
+
while(i <= max_row_length) do
|
160
|
+
clean_row << row[i]
|
161
|
+
i += 1
|
162
|
+
end
|
163
|
+
# We need to stop output on 2nd empty row
|
164
|
+
break if empty_row?(clean_row) && empty_found
|
165
|
+
empty_found = empty_row?(clean_row)
|
166
|
+
tmp_csv << clean_row
|
167
|
+
end # CSV read
|
168
|
+
end # CSV write
|
169
|
+
|
170
|
+
# Replace original file with tmpfile.
|
171
|
+
FileUtils.rm filename
|
172
|
+
FileUtils.mv tmp_file, filename
|
173
|
+
end
|
174
|
+
|
175
|
+
# Return true if row contains no data
|
176
|
+
def empty_row?(row)
|
177
|
+
is_empty = true
|
178
|
+
row.each do |item|
|
179
|
+
is_empty = false if item && !item.empty?
|
180
|
+
end
|
181
|
+
is_empty
|
182
|
+
end
|
183
|
+
|
184
|
+
def process_cell_value(a_cell)
|
185
|
+
# Truncate the number to 3 decimal places if numeric.
|
186
|
+
a_cell = truncate_decimal(a_cell)
|
187
|
+
|
188
|
+
# Remove leading and trailing spaces.
|
189
|
+
a_cell = a_cell.to_s.strip
|
190
|
+
|
191
|
+
# If the result is n.000... Remove the unecessary zeros.
|
192
|
+
a_cell = clean_int_value(a_cell)
|
193
|
+
|
194
|
+
# If the result is a date, remove time.
|
195
|
+
a_cell = format_date(a_cell)
|
196
|
+
|
197
|
+
# Surround the cell value with quotes when it contains a comma.
|
198
|
+
a_cell = '"' + a_cell + '"' if a_cell.include?(',')
|
199
|
+
|
200
|
+
a_cell
|
201
|
+
end
|
202
|
+
|
203
|
+
# Truncates a decimal to 3 decimal places if numeric
|
204
|
+
# and remove trailing zeros, if more than one decimal place.
|
205
|
+
# returns a string
|
206
|
+
def truncate_decimal(a_cell)
|
207
|
+
if(a_cell.is_a?(Numeric))
|
208
|
+
a_cell = truncate_decimal_to_string(a_cell, 3)
|
209
|
+
# Truncate zeros (unless there is only 1 decimal place)
|
210
|
+
# eg. 12.10 => 12.1
|
211
|
+
# 12.0 => 12.0
|
212
|
+
a_cell = BigDecimal.new(a_cell).to_s("F")
|
213
|
+
end
|
214
|
+
a_cell
|
215
|
+
end
|
216
|
+
|
217
|
+
# Truncates a decimal and converts it to a string.
|
218
|
+
# num: decimal to truncate
|
219
|
+
# places: number of decimal places to truncate at
|
220
|
+
def truncate_decimal_to_string(num, places)
|
221
|
+
"%.#{places}f" % num
|
222
|
+
end
|
223
|
+
|
224
|
+
# If the result is n.000... Remove the unecessary zeros.
|
225
|
+
def clean_int_value(a_cell)
|
226
|
+
if(a_cell.match(/\.[0]+$/))
|
227
|
+
cary = a_cell.split(".")
|
228
|
+
a_cell = cary[0]
|
229
|
+
end
|
230
|
+
a_cell
|
231
|
+
end
|
232
|
+
|
233
|
+
# If the cell is a date, format it to MM/DD/YYYY, stripping time.
|
234
|
+
def format_date(a_cell)
|
235
|
+
isdate = true if(nil != (dt = a_cell.match(@date_RE)))
|
236
|
+
isdate = true if(isdate || (nil != (dt = a_cell.match(@date_with_dashes_RE))) )
|
237
|
+
isdate = true if(isdate || (nil != (dt = a_cell.match(@date_with_time_RE))) )
|
238
|
+
if isdate
|
239
|
+
begin
|
240
|
+
mod_dt = DateTime.parse(a_cell)
|
241
|
+
cary = "#{mod_dt.month}/#{mod_dt.day}/#{mod_dt.year}"
|
242
|
+
if(true == verbose?)
|
243
|
+
puts ""
|
244
|
+
puts "*** Converted date to #{cary} ***"
|
245
|
+
puts ""
|
246
|
+
end
|
247
|
+
a_cell = cary
|
248
|
+
rescue ArgumentError => e
|
249
|
+
# Either this is not a date, or the date format is unrecognized,
|
250
|
+
# nothing to see here, moving on.
|
251
|
+
end
|
252
|
+
end
|
253
|
+
a_cell
|
254
|
+
end
|
255
|
+
|
256
|
+
def prepare_outdir(outdir)
|
257
|
+
if( !File.directory?(outdir) )
|
258
|
+
FileUtils.makedirs("#{outdir}")
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def winPath(filepath)
|
263
|
+
parts = filepath.split("/")
|
264
|
+
mspath = nil
|
265
|
+
|
266
|
+
for part in parts
|
267
|
+
if(mspath == nil)
|
268
|
+
mspath = []
|
269
|
+
mspath << part
|
270
|
+
else
|
271
|
+
mspath << "\\" << part
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
mspath
|
276
|
+
end
|
277
|
+
end # class ExcelFile
|
278
|
+
end # module ExcelToCsv
|
@@ -0,0 +1,27 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: os.rb
|
3
|
+
# Purpose:: Operating System detecting
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 06/03/2014
|
6
|
+
# Copyright:: Copyright (c) 2014, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
|
10
|
+
module OS
|
11
|
+
def OS.windows?
|
12
|
+
(/cygwin|mswin|mingw|bccwin|wince|emx/ =~ RbConfig::CONFIG["arch"]) != nil
|
13
|
+
end
|
14
|
+
|
15
|
+
def OS.mac?
|
16
|
+
(/darwin/ =~ RbConfig::CONFIG["arch"]) != nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def OS.unix?
|
20
|
+
!OS.windows?
|
21
|
+
end
|
22
|
+
|
23
|
+
def OS.linux?
|
24
|
+
OS.unix? and not OS.mac?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# Everything is contained in Module ExcelToCsv
|
3
|
+
#
|
4
|
+
module ExcelToCsv
|
5
|
+
|
6
|
+
VERSION = "0.2.1" unless constants.include?("VERSION")
|
7
|
+
APPNAME = "ExcelToCsv" unless constants.include?("APPNAME")
|
8
|
+
COPYRIGHT = "Copyright (c) 2014, kTech Systems LLC. All rights reserved." unless constants.include?("COPYRIGHT")
|
9
|
+
|
10
|
+
def self.logo()
|
11
|
+
return [ "#{ExcelToCsv::APPNAME} v#{ExcelToCsv::VERSION}",
|
12
|
+
"#{ExcelToCsv::COPYRIGHT}",
|
13
|
+
""
|
14
|
+
].join("\n")
|
15
|
+
end
|
16
|
+
end # module ExcelToCsv
|
@@ -0,0 +1,42 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: win_excel.rb
|
3
|
+
# Purpose:: Windows OLE Excel Application Wrapper
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 06/03/2014
|
6
|
+
# Copyright:: Copyright (c) 2014, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
require_relative 'excel_app_wrapper'
|
10
|
+
require 'win32ole'
|
11
|
+
|
12
|
+
module ExcelToCsv
|
13
|
+
class WinExcel < ExcelAppWrapper
|
14
|
+
def open_workbook(filepath)
|
15
|
+
# Open an Excel file
|
16
|
+
@xl = WIN32OLE.new('Excel.Application')
|
17
|
+
# Turn off excel alerts.
|
18
|
+
@xl.DisplayAlerts = false
|
19
|
+
|
20
|
+
# 2nd param of false turns off the link update request
|
21
|
+
# when an xls file is opened that contains links.
|
22
|
+
@wb = @xl.Workbooks.Open("#{filepath}", false)
|
23
|
+
end
|
24
|
+
|
25
|
+
def worksheet_names
|
26
|
+
worksheets = []
|
27
|
+
@wb.Worksheets.each do |ws|
|
28
|
+
worksheets << ws.Name
|
29
|
+
end
|
30
|
+
worksheets
|
31
|
+
end
|
32
|
+
|
33
|
+
def close_workbook
|
34
|
+
# Close Excel
|
35
|
+
@xl.Quit
|
36
|
+
end
|
37
|
+
|
38
|
+
def worksheet_data(worksheet_name)
|
39
|
+
data = @wb.Worksheets(worksheet_name).UsedRange.Value
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/rakefile.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
######################################################################################
|
2
|
+
# File:: rakefile
|
3
|
+
# Purpose:: Build tasks for ExcelToCsv application
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 04/17/2013
|
6
|
+
# Copyright:: Copyright (c) 2013, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
######################################################################################
|
9
|
+
|
10
|
+
require 'bundler/gem_tasks'
|
11
|
+
require 'psych'
|
12
|
+
gem 'rdoc', '>= 3.9.4'
|
13
|
+
|
14
|
+
require 'rake'
|
15
|
+
require 'rake/clean'
|
16
|
+
require 'rdoc/task'
|
17
|
+
require 'rspec/core/rake_task'
|
18
|
+
|
19
|
+
# Setup common directory structure
|
20
|
+
|
21
|
+
PROJNAME = "ExcelToCsv"
|
22
|
+
|
23
|
+
# Setup common clean and clobber targets
|
24
|
+
|
25
|
+
CLEAN.include("pkg/**/*.*")
|
26
|
+
CLEAN.include("tmp/**/*.*")
|
27
|
+
|
28
|
+
CLOBBER.include("pkg")
|
29
|
+
CLOBBER.include("tmp")
|
30
|
+
|
31
|
+
|
32
|
+
#############################################################################
|
33
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
34
|
+
files = ['docs/**/*.rdoc', 'lib/**/*.rb', 'app/**/*.rb']
|
35
|
+
rdoc.rdoc_files.add( files )
|
36
|
+
rdoc.main = "docs/README.md" # Page to start on
|
37
|
+
rdoc.title = "#{PROJNAME} Documentation"
|
38
|
+
rdoc.rdoc_dir = 'doc' # rdoc output folder
|
39
|
+
rdoc.options << '--line-numbers' << '--all'
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
#############################################################################
|
44
|
+
desc "Run all specs"
|
45
|
+
RSpec::Core::RakeTask.new do |t|
|
46
|
+
#t.rcov = true
|
47
|
+
end
|
48
|
+
|
Binary file
|
Binary file
|
Binary file
|
data/spec/data/test1.xls
ADDED
Binary file
|
Binary file
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ExcelToCsv::ExcelFile do
|
4
|
+
|
5
|
+
let(:converter) { ExcelToCsv::ExcelFile.new }
|
6
|
+
|
7
|
+
let(:outdir) { Pathname.new('tmp/spec') }
|
8
|
+
let(:testfile) { Pathname.new('spec/data/test1.xls') }
|
9
|
+
let(:outfile) { outdir + 'test1.csv' }
|
10
|
+
|
11
|
+
before :each do
|
12
|
+
outdir.rmtree if outdir.exist? && outdir.directory?
|
13
|
+
outdir.mkpath
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
it "converts a file to csv" do
|
18
|
+
converter.xl_to_csv(testfile, outfile)
|
19
|
+
outfile.exist?.should be_truthy
|
20
|
+
end
|
21
|
+
|
22
|
+
context "dates" do
|
23
|
+
|
24
|
+
let(:ndatefile) { Pathname.new('spec/data/normaldate.xls') }
|
25
|
+
let(:ndatefileout) { outdir + 'normaldate.csv' }
|
26
|
+
|
27
|
+
it "normal excel dates are converted consistently" do
|
28
|
+
converter.xl_to_csv(ndatefile, ndatefileout)
|
29
|
+
ndatefileout.exist?.should be_truthy
|
30
|
+
file_to_string(ndatefileout).should include '12/18/2012'
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
let(:tdatefile) { Pathname.new('spec/data/textdate.xls') }
|
35
|
+
let(:tdatefileout) { outdir + 'textdate.csv' }
|
36
|
+
|
37
|
+
it "text excel dates are converted consistently" do
|
38
|
+
converter.xl_to_csv(tdatefile, tdatefileout)
|
39
|
+
tdatefileout.exist?.should be_truthy
|
40
|
+
file_to_string(tdatefileout).should include '12/18/2012'
|
41
|
+
end
|
42
|
+
end # context "dates"
|
43
|
+
|
44
|
+
|
45
|
+
context "decimals" do
|
46
|
+
|
47
|
+
let(:decimalsfile) { Pathname.new('spec/data/decimals.xls') }
|
48
|
+
let(:decimalsfileout) { outdir + 'decimals.csv' }
|
49
|
+
|
50
|
+
it "3 decimal place numbers are processed as is" do
|
51
|
+
converter.xl_to_csv(decimalsfile, decimalsfileout)
|
52
|
+
decimalsfileout.exist?.should be_truthy
|
53
|
+
file_row_starting_with(decimalsfileout, '3 Place Decimal').should include '1.123'
|
54
|
+
end
|
55
|
+
|
56
|
+
it "more than 3 decimal places are truncated at 3 places" do
|
57
|
+
converter.xl_to_csv(decimalsfile, decimalsfileout)
|
58
|
+
decimalsfileout.exist?.should be_truthy
|
59
|
+
last_item_from_row('Truncate Decimal', decimalsfileout).should eq '1.234'
|
60
|
+
end
|
61
|
+
|
62
|
+
it "decimal places are trucated when 0" do
|
63
|
+
converter.xl_to_csv(decimalsfile, decimalsfileout)
|
64
|
+
decimalsfileout.exist?.should be_truthy
|
65
|
+
last_item_from_row('Integer', decimalsfileout).should eq '1'
|
66
|
+
end
|
67
|
+
|
68
|
+
it "trailing decimal zeros are trucated" do
|
69
|
+
converter.xl_to_csv(decimalsfile, decimalsfileout)
|
70
|
+
decimalsfileout.exist?.should be_truthy
|
71
|
+
last_item_from_row('No Trailing Zero', decimalsfileout).should eq '1.23'
|
72
|
+
end
|
73
|
+
end # context "decimals"
|
74
|
+
|
75
|
+
|
76
|
+
context "commas" do
|
77
|
+
|
78
|
+
let(:commasfile) { Pathname.new('spec/data/commastrings.xls') }
|
79
|
+
let(:commasfileout) { outdir + 'commastrings.csv' }
|
80
|
+
|
81
|
+
it "within cells are enclosed in quotes" do
|
82
|
+
converter.xl_to_csv(commasfile, commasfileout)
|
83
|
+
commasfileout.exist?.should be_truthy
|
84
|
+
file_to_string(commasfileout).should include 'Comma String,"This,string,has,commas"'
|
85
|
+
end
|
86
|
+
|
87
|
+
end # context "commas"
|
88
|
+
end # describe ExcelToCsv::ExcelFile
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.run_all_when_everything_filtered = true
|
9
|
+
config.filter_run :focus
|
10
|
+
|
11
|
+
# Run specs in random order to surface order dependencies. If you find an
|
12
|
+
# order dependency and want to debug it, you can fix the order by providing
|
13
|
+
# the seed, which is printed after each run.
|
14
|
+
# --seed 1234
|
15
|
+
config.order = 'random'
|
16
|
+
|
17
|
+
# Enable both 'should' and 'expect' syntax:
|
18
|
+
config.expect_with :rspec do |c|
|
19
|
+
# Disable the `expect` sytax...
|
20
|
+
#c.syntax = :should
|
21
|
+
|
22
|
+
# ...or disable the `should` syntax...
|
23
|
+
#c.syntax = :expect
|
24
|
+
|
25
|
+
# ...or explicitly enable both
|
26
|
+
c.syntax = [:should, :expect]
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
require 'pathname'
|
32
|
+
require_relative '../lib/exceltocsv'
|
33
|
+
|
34
|
+
def file_to_array(filepath)
|
35
|
+
dump = []
|
36
|
+
File.open(filepath) do |f|
|
37
|
+
f.each_line { |line| dump << line }
|
38
|
+
end
|
39
|
+
dump
|
40
|
+
end
|
41
|
+
|
42
|
+
def file_contains?(haystack, needle)
|
43
|
+
result = false
|
44
|
+
File.open(haystack) do |f|
|
45
|
+
f.each_line { |line| result = true if line.include?(needle) }
|
46
|
+
end
|
47
|
+
result
|
48
|
+
end
|
49
|
+
|
50
|
+
def file_to_string(filename)
|
51
|
+
result = ""
|
52
|
+
File.open(filename) do |f|
|
53
|
+
f.each_line { |line| result << line }
|
54
|
+
end
|
55
|
+
result
|
56
|
+
end
|
57
|
+
|
58
|
+
def file_row_starting_with(filename, starts_with)
|
59
|
+
rows = file_to_array filename
|
60
|
+
rows.each do |r|
|
61
|
+
return r if r.include? starts_with
|
62
|
+
end
|
63
|
+
""
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def last_item_from_row(row_starts_with, filename)
|
68
|
+
row = file_row_starting_with(filename, row_starts_with)
|
69
|
+
items = row.split(',')
|
70
|
+
items.each { |i| i.chomp! }
|
71
|
+
items[items.size - 1]
|
72
|
+
end
|
73
|
+
|
74
|
+
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: exceltocsv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeff McAffee
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-08-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: spreadsheet
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: ExcelToCsv is a utility library for converting Excel files to CSV format.
|
56
|
+
email:
|
57
|
+
- jeff@ktechsystems.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- Gemfile
|
65
|
+
- Gemfile.lock
|
66
|
+
- LICENSE
|
67
|
+
- README.md
|
68
|
+
- exceltocsv.gemspec
|
69
|
+
- lib/exceltocsv.rb
|
70
|
+
- lib/exceltocsv/cross_platform_excel.rb
|
71
|
+
- lib/exceltocsv/excel_app_wrapper.rb
|
72
|
+
- lib/exceltocsv/excel_file.rb
|
73
|
+
- lib/exceltocsv/os.rb
|
74
|
+
- lib/exceltocsv/version.rb
|
75
|
+
- lib/exceltocsv/win_excel.rb
|
76
|
+
- rakefile.rb
|
77
|
+
- spec/data/commastrings.xls
|
78
|
+
- spec/data/decimals.xls
|
79
|
+
- spec/data/normaldate.xls
|
80
|
+
- spec/data/test1.xls
|
81
|
+
- spec/data/textdate.xls
|
82
|
+
- spec/excel_file_spec.rb
|
83
|
+
- spec/spec_helper.rb
|
84
|
+
homepage: https://github.com/jmcaffee/exceltocsv
|
85
|
+
licenses:
|
86
|
+
- MIT
|
87
|
+
metadata: {}
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubyforge_project:
|
104
|
+
rubygems_version: 2.3.0
|
105
|
+
signing_key:
|
106
|
+
specification_version: 4
|
107
|
+
summary: Utility for converting Excel files to CSV format
|
108
|
+
test_files:
|
109
|
+
- spec/data/commastrings.xls
|
110
|
+
- spec/data/decimals.xls
|
111
|
+
- spec/data/normaldate.xls
|
112
|
+
- spec/data/test1.xls
|
113
|
+
- spec/data/textdate.xls
|
114
|
+
- spec/excel_file_spec.rb
|
115
|
+
- spec/spec_helper.rb
|
116
|
+
has_rdoc:
|