exceltocsv 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +22 -0
- data/LICENSE +22 -0
- data/README.md +66 -0
- data/exceltocsv.gemspec +26 -0
- data/lib/exceltocsv.rb +30 -0
- data/lib/exceltocsv/cross_platform_excel.rb +32 -0
- data/lib/exceltocsv/excel_app_wrapper.rb +28 -0
- data/lib/exceltocsv/excel_file.rb +278 -0
- data/lib/exceltocsv/os.rb +27 -0
- data/lib/exceltocsv/version.rb +16 -0
- data/lib/exceltocsv/win_excel.rb +42 -0
- data/rakefile.rb +48 -0
- data/spec/data/commastrings.xls +0 -0
- data/spec/data/decimals.xls +0 -0
- data/spec/data/normaldate.xls +0 -0
- data/spec/data/test1.xls +0 -0
- data/spec/data/textdate.xls +0 -0
- data/spec/excel_file_spec.rb +88 -0
- data/spec/spec_helper.rb +74 -0
- metadata +116 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: aec25d93d88e6a500e0cea8ef82c0b893c91b217
|
4
|
+
data.tar.gz: 1a79d4af66f0a6b1b1c8c442b4ac10f0fa14dae2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 71832e1729fec64b422478a6852435ea069f98b4e0ad1f973d33f06d6b4f3c4dff4e8129b7dbcd96468354dc91275b0c94904353044f3626509ae818c60b11df
|
7
|
+
data.tar.gz: 6f56205f61d7abc823b27566babe3969aa15260d9292ee639840cc585bcca0a22aed674d7fb5fca6451095abbce8ed4b2c828be2e1b5c4f7a957349fa6a91e1e
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
exceltocsv (0.2.0)
|
5
|
+
spreadsheet
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
rake (10.3.2)
|
11
|
+
ruby-ole (1.2.11.7)
|
12
|
+
spreadsheet (0.9.7)
|
13
|
+
ruby-ole (>= 1.0)
|
14
|
+
|
15
|
+
PLATFORMS
|
16
|
+
ruby
|
17
|
+
x86-mingw32
|
18
|
+
|
19
|
+
DEPENDENCIES
|
20
|
+
bundler (~> 1.3)
|
21
|
+
exceltocsv!
|
22
|
+
rake
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Jeff McAffee
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# ExcelToCsv::ExcelFile
|
2
|
+
|
3
|
+
ExcelFile is a file converter to convert Excel spreadsheets to CSV files.
|
4
|
+
It is specifically designed for the criteria required to generate properly
|
5
|
+
formated CSV files for use with [GDLC](https://github.com/jmcaffee/gdlc).
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
Quick example:
|
10
|
+
|
11
|
+
require 'exceltocsv'
|
12
|
+
|
13
|
+
converter = ExcelToCsv::ExcelFile.new
|
14
|
+
converter.xl_to_csv( 'path/to/input.xls', 'path/to/output.csv' )
|
15
|
+
|
16
|
+
Example rake task that updates (converts xls files) csvs based on last modified
|
17
|
+
date of each file within a directory structure.
|
18
|
+
|
19
|
+
###### plk.rake
|
20
|
+
|
21
|
+
require 'exceltocsv'
|
22
|
+
|
23
|
+
desc "Update CSV files from XLS source"
|
24
|
+
task :update do
|
25
|
+
plks = FileList['plk/xls/**/*.xls']
|
26
|
+
|
27
|
+
# Pathmap string maps to csv dir with csv target file
|
28
|
+
pm = "%{^plk/xls,plk/csv;.xls$,.csv;.xlsx$,.csv}p"
|
29
|
+
# Remove any source files when the dest file exists and is newer.
|
30
|
+
plks.delete_if do |s|
|
31
|
+
# Downcase the path,
|
32
|
+
d = s.pathmap( pm ).downcase
|
33
|
+
# and snakecase the target filename.
|
34
|
+
d = snakecase_filename(d)
|
35
|
+
File.exists?(d) && File.stat(s).mtime <= File.stat(d).mtime
|
36
|
+
end
|
37
|
+
|
38
|
+
target_csvs = plks.pathmap( pm )
|
39
|
+
|
40
|
+
# I want the target filenames normalized to lower case.
|
41
|
+
target_csvs.each { |p| p.downcase! }
|
42
|
+
|
43
|
+
# Create all target dirs
|
44
|
+
target_dirs = target_csvs.pathmap("%d")
|
45
|
+
target_dirs.uniq!
|
46
|
+
mkdir_p target_dirs
|
47
|
+
|
48
|
+
# Convert all newer XL files to CSVs.
|
49
|
+
# Note that this method only converts the first sheet in the workbook.
|
50
|
+
converter = ExcelToCsv::ExcelFile.new
|
51
|
+
plks.each do |x|
|
52
|
+
converter.xl_to_csv(x, snakecase_filename(x.pathmap(pm).downcase))
|
53
|
+
end
|
54
|
+
|
55
|
+
puts "All target files are up to date" if plks.empty?
|
56
|
+
end
|
57
|
+
|
58
|
+
def snakecase_filename(filepath)
|
59
|
+
snake_file_path = File.join(filepath.pathmap("%d"), filepath.pathmap("%n").snakecase + filepath.pathmap("%x"))
|
60
|
+
end
|
61
|
+
|
62
|
+
## License
|
63
|
+
|
64
|
+
See [LICENSE](https://github.com/jmcaffee/exceltocsv/blob/master/LICENSE).
|
65
|
+
Website: [http://ktechsystems.com](http://ktechsystems.com)
|
66
|
+
|
data/exceltocsv.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'exceltocsv/version'
|
5
|
+
require 'exceltocsv/os'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = "exceltocsv"
|
9
|
+
spec.version = ExcelToCsv::VERSION
|
10
|
+
spec.authors = ["Jeff McAffee"]
|
11
|
+
spec.email = ["jeff@ktechsystems.com"]
|
12
|
+
spec.description = %q{ExcelToCsv is a utility library for converting Excel files to CSV format.}
|
13
|
+
spec.summary = %q{Utility for converting Excel files to CSV format}
|
14
|
+
spec.homepage = "https://github.com/jmcaffee/exceltocsv"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files`.split($/)
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
23
|
+
spec.add_development_dependency "rake"
|
24
|
+
|
25
|
+
spec.add_runtime_dependency "spreadsheet"
|
26
|
+
end
|
data/lib/exceltocsv.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: exceltocsv.rb
|
3
|
+
# Purpose:: Include file for ExcelToCsv library
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 04/17/2013
|
6
|
+
# Copyright:: Copyright (c) 2013, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
|
10
|
+
require 'find'
|
11
|
+
require 'logger'
|
12
|
+
|
13
|
+
|
14
|
+
$LOG = Logger.new(STDERR)
|
15
|
+
$LOG.level = Logger::ERROR
|
16
|
+
|
17
|
+
if ENV["DEBUG"] == '1'
|
18
|
+
puts "LOGGING: ON due to DEBUG=1"
|
19
|
+
$LOG.level = Logger::DEBUG
|
20
|
+
end
|
21
|
+
|
22
|
+
require "#{File.join( File.dirname(__FILE__), 'exceltocsv','version')}"
|
23
|
+
|
24
|
+
$LOG.info "**********************************************************************"
|
25
|
+
$LOG.info "Logging started for ExcelToCsv library."
|
26
|
+
$LOG.info "**********************************************************************"
|
27
|
+
|
28
|
+
|
29
|
+
require "#{File.join( File.dirname(__FILE__), 'exceltocsv','excel_file')}"
|
30
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: cross_platform_excel.rb
|
3
|
+
# Purpose:: Cross platform Excel binary implementation
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 06/03/2014
|
6
|
+
# Copyright:: Copyright (c) 2014, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
require_relative 'excel_app_wrapper'
|
10
|
+
require 'spreadsheet'
|
11
|
+
|
12
|
+
module ExcelToCsv
|
13
|
+
class CrossPlatformExcel < ExcelAppWrapper
|
14
|
+
def open_workbook(filepath)
|
15
|
+
# Open an Excel file
|
16
|
+
@wb = Spreadsheet.open filepath
|
17
|
+
end
|
18
|
+
|
19
|
+
def worksheet_names
|
20
|
+
worksheets = @wb.worksheets.collect { |w| w.name }
|
21
|
+
end
|
22
|
+
|
23
|
+
def close_workbook
|
24
|
+
# NOP
|
25
|
+
end
|
26
|
+
|
27
|
+
def worksheet_data(worksheet_name)
|
28
|
+
sheet = @wb.worksheet worksheet_name
|
29
|
+
sheet.rows
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: excel_app_wrapper.rb
|
3
|
+
# Purpose:: Excel Application Wrapper base class
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 06/03/2014
|
6
|
+
# Copyright:: Copyright (c) 2014, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
|
10
|
+
module ExcelToCsv
|
11
|
+
class ExcelAppWrapper
|
12
|
+
def open_workbook(filepath)
|
13
|
+
fail 'abstract #open_workbook method must be overridden'
|
14
|
+
end
|
15
|
+
|
16
|
+
def worksheet_names
|
17
|
+
fail 'abstract #worksheet_names method must be overridden'
|
18
|
+
end
|
19
|
+
|
20
|
+
def close_workbook
|
21
|
+
fail 'abstract #close_workbook method must be overridden'
|
22
|
+
end
|
23
|
+
|
24
|
+
def worksheet_data(worksheet_name)
|
25
|
+
fail 'abstract #worksheet_data method must be overridden'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,278 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: excel_file.rb
|
3
|
+
# Purpose:: Convert Excel files to CSV format accounting for formating
|
4
|
+
# This file was originally located in the gdlrakeutils gem
|
5
|
+
# as converttocsv.rb
|
6
|
+
#
|
7
|
+
# Author:: Jeff McAffee 04/17/2013
|
8
|
+
# Copyright:: Copyright (c) 2013, kTech Systems LLC. All rights reserved.
|
9
|
+
# Website:: http://ktechsystems.com
|
10
|
+
##############################################################################
|
11
|
+
|
12
|
+
require_relative 'os'
|
13
|
+
require 'time'
|
14
|
+
require 'csv'
|
15
|
+
require 'bigdecimal'
|
16
|
+
|
17
|
+
|
18
|
+
module ExcelToCsv
|
19
|
+
class ExcelFile
|
20
|
+
|
21
|
+
# If FORCE_WIN_OLE is Y or 1, use the actual Excel application.
|
22
|
+
# NOTE: This will only work on a windows OS!
|
23
|
+
unless ENV['FORCE_WIN_OLE'].nil?
|
24
|
+
FORCE_WIN_OLE = 1 if ENV['FORCE_WIN_OLE'] == '1'
|
25
|
+
FORCE_WIN_OLE = 1 if ENV['FORCE_WIN_OLE'].downcase == 'y'
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize()
|
29
|
+
@date_RE = Regexp.new(/\d{4,4}\/\d{2,2}\/\d{2,2}/)
|
30
|
+
@date_with_dashes_RE = Regexp.new(/\d{4,4}-\d{2,2}-\d{2,2}/)
|
31
|
+
@date_with_time_RE = Regexp.new(/\d{2,2}:\d{2,2}:\d{2,2}/)
|
32
|
+
end
|
33
|
+
|
34
|
+
def xl_app
|
35
|
+
return @xl_app unless @xl_app.nil?
|
36
|
+
if OS.windows? and defined?(FORCE_WIN_OLE)
|
37
|
+
require_relative 'win_excel'
|
38
|
+
@xl_app = WinExcel.new
|
39
|
+
else
|
40
|
+
# CrossPlatformExcel is faster (like, by 30x).
|
41
|
+
require_relative 'cross_platform_excel'
|
42
|
+
@xl_app = CrossPlatformExcel.new
|
43
|
+
end
|
44
|
+
@xl_app
|
45
|
+
end
|
46
|
+
|
47
|
+
def set_flag(flg)
|
48
|
+
if (flg == "-v")
|
49
|
+
@verbose = true
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def verbose?()
|
54
|
+
@verbose ||= false
|
55
|
+
end
|
56
|
+
|
57
|
+
# Convert the 1st sheet in an xls(x) file to a csv file.
|
58
|
+
def xl_to_csv(infile, outfile)
|
59
|
+
filepath = File.expand_path(infile)
|
60
|
+
puts "xl_to_csv: #{infile} => #{outfile}" if verbose?
|
61
|
+
|
62
|
+
unless File.exists?(filepath)
|
63
|
+
puts "Unable to find file."
|
64
|
+
puts " #{filepath}"
|
65
|
+
return
|
66
|
+
end
|
67
|
+
|
68
|
+
# Open an Excel file
|
69
|
+
xl_app.open_workbook filepath
|
70
|
+
|
71
|
+
# Build a list of work sheets to dump to file.
|
72
|
+
sheets_in_file = []
|
73
|
+
|
74
|
+
sheet_saved_count = 0
|
75
|
+
|
76
|
+
xl_app.worksheet_names.each do |sheetname|
|
77
|
+
if( sheetname.match(/CQDS/) || sheetname.match(/PLK/) )
|
78
|
+
sheets_in_file << sheetname
|
79
|
+
puts "Converting sheet #{sheetname}" if verbose?
|
80
|
+
sheet_saved_count += 1
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
if (1 > sheet_saved_count)
|
85
|
+
puts "*** No sheets labeled 'PLK' or 'CQDS' ***"
|
86
|
+
puts "Verify #{infile} is formatted correctly."
|
87
|
+
# Close Excel
|
88
|
+
xl_app.close_workbook
|
89
|
+
return
|
90
|
+
end
|
91
|
+
|
92
|
+
# Write sheet data to file.
|
93
|
+
File.open(outfile, "w") do |f|
|
94
|
+
data = xl_app.worksheet_data(sheets_in_file[0])
|
95
|
+
for row in data
|
96
|
+
row_data = []
|
97
|
+
for a_cell in row
|
98
|
+
row_data << process_cell_value(a_cell)
|
99
|
+
end
|
100
|
+
|
101
|
+
contains_data = false
|
102
|
+
|
103
|
+
# Determine if the row contains any data.
|
104
|
+
for cell in row_data
|
105
|
+
if(cell.match(/[^,\r\n]+/))
|
106
|
+
contains_data = true
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Insert an empty line if the row contains no data.
|
111
|
+
if(true == contains_data)
|
112
|
+
f << row_data.join(",")
|
113
|
+
f << "\n"
|
114
|
+
|
115
|
+
if(true == verbose?)
|
116
|
+
puts "#{row_data}"
|
117
|
+
end
|
118
|
+
|
119
|
+
else
|
120
|
+
f << "\n"
|
121
|
+
|
122
|
+
if(true == verbose?)
|
123
|
+
puts "\n"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Strip empty data from end of lines
|
130
|
+
clean_csv(outfile)
|
131
|
+
|
132
|
+
# Close Excel
|
133
|
+
xl_app.close_workbook
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
def clean_csv(filename)
|
138
|
+
max_row_length = 0
|
139
|
+
CSV.foreach(filename) do |row|
|
140
|
+
row_len = 0
|
141
|
+
i = 0
|
142
|
+
row.each do |item|
|
143
|
+
row_len = i if !item.nil? && !item.empty?
|
144
|
+
i += 1
|
145
|
+
end
|
146
|
+
max_row_length = row_len if row_len > max_row_length
|
147
|
+
end
|
148
|
+
|
149
|
+
puts "Max row length: #{max_row_length.to_s}" if verbose?
|
150
|
+
|
151
|
+
tmp_file = filename.to_s + ".tmp.csv"
|
152
|
+
CSV.open(tmp_file, "wb") do |tmp_csv|
|
153
|
+
# Used to track empty lines
|
154
|
+
empty_found = false
|
155
|
+
|
156
|
+
CSV.foreach(filename) do |row|
|
157
|
+
i = 0
|
158
|
+
clean_row = []
|
159
|
+
while(i <= max_row_length) do
|
160
|
+
clean_row << row[i]
|
161
|
+
i += 1
|
162
|
+
end
|
163
|
+
# We need to stop output on 2nd empty row
|
164
|
+
break if empty_row?(clean_row) && empty_found
|
165
|
+
empty_found = empty_row?(clean_row)
|
166
|
+
tmp_csv << clean_row
|
167
|
+
end # CSV read
|
168
|
+
end # CSV write
|
169
|
+
|
170
|
+
# Replace original file with tmpfile.
|
171
|
+
FileUtils.rm filename
|
172
|
+
FileUtils.mv tmp_file, filename
|
173
|
+
end
|
174
|
+
|
175
|
+
# Return true if row contains no data
|
176
|
+
def empty_row?(row)
|
177
|
+
is_empty = true
|
178
|
+
row.each do |item|
|
179
|
+
is_empty = false if item && !item.empty?
|
180
|
+
end
|
181
|
+
is_empty
|
182
|
+
end
|
183
|
+
|
184
|
+
def process_cell_value(a_cell)
|
185
|
+
# Truncate the number to 3 decimal places if numeric.
|
186
|
+
a_cell = truncate_decimal(a_cell)
|
187
|
+
|
188
|
+
# Remove leading and trailing spaces.
|
189
|
+
a_cell = a_cell.to_s.strip
|
190
|
+
|
191
|
+
# If the result is n.000... Remove the unecessary zeros.
|
192
|
+
a_cell = clean_int_value(a_cell)
|
193
|
+
|
194
|
+
# If the result is a date, remove time.
|
195
|
+
a_cell = format_date(a_cell)
|
196
|
+
|
197
|
+
# Surround the cell value with quotes when it contains a comma.
|
198
|
+
a_cell = '"' + a_cell + '"' if a_cell.include?(',')
|
199
|
+
|
200
|
+
a_cell
|
201
|
+
end
|
202
|
+
|
203
|
+
# Truncates a decimal to 3 decimal places if numeric
|
204
|
+
# and remove trailing zeros, if more than one decimal place.
|
205
|
+
# returns a string
|
206
|
+
def truncate_decimal(a_cell)
|
207
|
+
if(a_cell.is_a?(Numeric))
|
208
|
+
a_cell = truncate_decimal_to_string(a_cell, 3)
|
209
|
+
# Truncate zeros (unless there is only 1 decimal place)
|
210
|
+
# eg. 12.10 => 12.1
|
211
|
+
# 12.0 => 12.0
|
212
|
+
a_cell = BigDecimal.new(a_cell).to_s("F")
|
213
|
+
end
|
214
|
+
a_cell
|
215
|
+
end
|
216
|
+
|
217
|
+
# Truncates a decimal and converts it to a string.
|
218
|
+
# num: decimal to truncate
|
219
|
+
# places: number of decimal places to truncate at
|
220
|
+
def truncate_decimal_to_string(num, places)
|
221
|
+
"%.#{places}f" % num
|
222
|
+
end
|
223
|
+
|
224
|
+
# If the result is n.000... Remove the unecessary zeros.
|
225
|
+
def clean_int_value(a_cell)
|
226
|
+
if(a_cell.match(/\.[0]+$/))
|
227
|
+
cary = a_cell.split(".")
|
228
|
+
a_cell = cary[0]
|
229
|
+
end
|
230
|
+
a_cell
|
231
|
+
end
|
232
|
+
|
233
|
+
# If the cell is a date, format it to MM/DD/YYYY, stripping time.
|
234
|
+
def format_date(a_cell)
|
235
|
+
isdate = true if(nil != (dt = a_cell.match(@date_RE)))
|
236
|
+
isdate = true if(isdate || (nil != (dt = a_cell.match(@date_with_dashes_RE))) )
|
237
|
+
isdate = true if(isdate || (nil != (dt = a_cell.match(@date_with_time_RE))) )
|
238
|
+
if isdate
|
239
|
+
begin
|
240
|
+
mod_dt = DateTime.parse(a_cell)
|
241
|
+
cary = "#{mod_dt.month}/#{mod_dt.day}/#{mod_dt.year}"
|
242
|
+
if(true == verbose?)
|
243
|
+
puts ""
|
244
|
+
puts "*** Converted date to #{cary} ***"
|
245
|
+
puts ""
|
246
|
+
end
|
247
|
+
a_cell = cary
|
248
|
+
rescue ArgumentError => e
|
249
|
+
# Either this is not a date, or the date format is unrecognized,
|
250
|
+
# nothing to see here, moving on.
|
251
|
+
end
|
252
|
+
end
|
253
|
+
a_cell
|
254
|
+
end
|
255
|
+
|
256
|
+
def prepare_outdir(outdir)
|
257
|
+
if( !File.directory?(outdir) )
|
258
|
+
FileUtils.makedirs("#{outdir}")
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def winPath(filepath)
|
263
|
+
parts = filepath.split("/")
|
264
|
+
mspath = nil
|
265
|
+
|
266
|
+
for part in parts
|
267
|
+
if(mspath == nil)
|
268
|
+
mspath = []
|
269
|
+
mspath << part
|
270
|
+
else
|
271
|
+
mspath << "\\" << part
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
mspath
|
276
|
+
end
|
277
|
+
end # class ExcelFile
|
278
|
+
end # module ExcelToCsv
|
@@ -0,0 +1,27 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: os.rb
|
3
|
+
# Purpose:: Operating System detecting
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 06/03/2014
|
6
|
+
# Copyright:: Copyright (c) 2014, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
|
10
|
+
module OS
|
11
|
+
def OS.windows?
|
12
|
+
(/cygwin|mswin|mingw|bccwin|wince|emx/ =~ RbConfig::CONFIG["arch"]) != nil
|
13
|
+
end
|
14
|
+
|
15
|
+
def OS.mac?
|
16
|
+
(/darwin/ =~ RbConfig::CONFIG["arch"]) != nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def OS.unix?
|
20
|
+
!OS.windows?
|
21
|
+
end
|
22
|
+
|
23
|
+
def OS.linux?
|
24
|
+
OS.unix? and not OS.mac?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# Everything is contained in Module ExcelToCsv
|
3
|
+
#
|
4
|
+
module ExcelToCsv
|
5
|
+
|
6
|
+
VERSION = "0.2.1" unless constants.include?("VERSION")
|
7
|
+
APPNAME = "ExcelToCsv" unless constants.include?("APPNAME")
|
8
|
+
COPYRIGHT = "Copyright (c) 2014, kTech Systems LLC. All rights reserved." unless constants.include?("COPYRIGHT")
|
9
|
+
|
10
|
+
def self.logo()
|
11
|
+
return [ "#{ExcelToCsv::APPNAME} v#{ExcelToCsv::VERSION}",
|
12
|
+
"#{ExcelToCsv::COPYRIGHT}",
|
13
|
+
""
|
14
|
+
].join("\n")
|
15
|
+
end
|
16
|
+
end # module ExcelToCsv
|
@@ -0,0 +1,42 @@
|
|
1
|
+
##############################################################################
|
2
|
+
# File:: win_excel.rb
|
3
|
+
# Purpose:: Windows OLE Excel Application Wrapper
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 06/03/2014
|
6
|
+
# Copyright:: Copyright (c) 2014, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
##############################################################################
|
9
|
+
require_relative 'excel_app_wrapper'
|
10
|
+
require 'win32ole'
|
11
|
+
|
12
|
+
module ExcelToCsv
|
13
|
+
class WinExcel < ExcelAppWrapper
|
14
|
+
def open_workbook(filepath)
|
15
|
+
# Open an Excel file
|
16
|
+
@xl = WIN32OLE.new('Excel.Application')
|
17
|
+
# Turn off excel alerts.
|
18
|
+
@xl.DisplayAlerts = false
|
19
|
+
|
20
|
+
# 2nd param of false turns off the link update request
|
21
|
+
# when an xls file is opened that contains links.
|
22
|
+
@wb = @xl.Workbooks.Open("#{filepath}", false)
|
23
|
+
end
|
24
|
+
|
25
|
+
def worksheet_names
|
26
|
+
worksheets = []
|
27
|
+
@wb.Worksheets.each do |ws|
|
28
|
+
worksheets << ws.Name
|
29
|
+
end
|
30
|
+
worksheets
|
31
|
+
end
|
32
|
+
|
33
|
+
def close_workbook
|
34
|
+
# Close Excel
|
35
|
+
@xl.Quit
|
36
|
+
end
|
37
|
+
|
38
|
+
def worksheet_data(worksheet_name)
|
39
|
+
data = @wb.Worksheets(worksheet_name).UsedRange.Value
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/rakefile.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
######################################################################################
|
2
|
+
# File:: rakefile
|
3
|
+
# Purpose:: Build tasks for ExcelToCsv application
|
4
|
+
#
|
5
|
+
# Author:: Jeff McAffee 04/17/2013
|
6
|
+
# Copyright:: Copyright (c) 2013, kTech Systems LLC. All rights reserved.
|
7
|
+
# Website:: http://ktechsystems.com
|
8
|
+
######################################################################################
|
9
|
+
|
10
|
+
require 'bundler/gem_tasks'
|
11
|
+
require 'psych'
|
12
|
+
gem 'rdoc', '>= 3.9.4'
|
13
|
+
|
14
|
+
require 'rake'
|
15
|
+
require 'rake/clean'
|
16
|
+
require 'rdoc/task'
|
17
|
+
require 'rspec/core/rake_task'
|
18
|
+
|
19
|
+
# Setup common directory structure
|
20
|
+
|
21
|
+
PROJNAME = "ExcelToCsv"
|
22
|
+
|
23
|
+
# Setup common clean and clobber targets
|
24
|
+
|
25
|
+
CLEAN.include("pkg/**/*.*")
|
26
|
+
CLEAN.include("tmp/**/*.*")
|
27
|
+
|
28
|
+
CLOBBER.include("pkg")
|
29
|
+
CLOBBER.include("tmp")
|
30
|
+
|
31
|
+
|
32
|
+
#############################################################################
|
33
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
34
|
+
files = ['docs/**/*.rdoc', 'lib/**/*.rb', 'app/**/*.rb']
|
35
|
+
rdoc.rdoc_files.add( files )
|
36
|
+
rdoc.main = "docs/README.md" # Page to start on
|
37
|
+
rdoc.title = "#{PROJNAME} Documentation"
|
38
|
+
rdoc.rdoc_dir = 'doc' # rdoc output folder
|
39
|
+
rdoc.options << '--line-numbers' << '--all'
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
#############################################################################
|
44
|
+
desc "Run all specs"
|
45
|
+
RSpec::Core::RakeTask.new do |t|
|
46
|
+
#t.rcov = true
|
47
|
+
end
|
48
|
+
|
Binary file
|
Binary file
|
Binary file
|
data/spec/data/test1.xls
ADDED
Binary file
|
Binary file
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ExcelToCsv::ExcelFile do
|
4
|
+
|
5
|
+
let(:converter) { ExcelToCsv::ExcelFile.new }
|
6
|
+
|
7
|
+
let(:outdir) { Pathname.new('tmp/spec') }
|
8
|
+
let(:testfile) { Pathname.new('spec/data/test1.xls') }
|
9
|
+
let(:outfile) { outdir + 'test1.csv' }
|
10
|
+
|
11
|
+
before :each do
|
12
|
+
outdir.rmtree if outdir.exist? && outdir.directory?
|
13
|
+
outdir.mkpath
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
it "converts a file to csv" do
|
18
|
+
converter.xl_to_csv(testfile, outfile)
|
19
|
+
outfile.exist?.should be_truthy
|
20
|
+
end
|
21
|
+
|
22
|
+
context "dates" do
|
23
|
+
|
24
|
+
let(:ndatefile) { Pathname.new('spec/data/normaldate.xls') }
|
25
|
+
let(:ndatefileout) { outdir + 'normaldate.csv' }
|
26
|
+
|
27
|
+
it "normal excel dates are converted consistently" do
|
28
|
+
converter.xl_to_csv(ndatefile, ndatefileout)
|
29
|
+
ndatefileout.exist?.should be_truthy
|
30
|
+
file_to_string(ndatefileout).should include '12/18/2012'
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
let(:tdatefile) { Pathname.new('spec/data/textdate.xls') }
|
35
|
+
let(:tdatefileout) { outdir + 'textdate.csv' }
|
36
|
+
|
37
|
+
it "text excel dates are converted consistently" do
|
38
|
+
converter.xl_to_csv(tdatefile, tdatefileout)
|
39
|
+
tdatefileout.exist?.should be_truthy
|
40
|
+
file_to_string(tdatefileout).should include '12/18/2012'
|
41
|
+
end
|
42
|
+
end # context "dates"
|
43
|
+
|
44
|
+
|
45
|
+
context "decimals" do
|
46
|
+
|
47
|
+
let(:decimalsfile) { Pathname.new('spec/data/decimals.xls') }
|
48
|
+
let(:decimalsfileout) { outdir + 'decimals.csv' }
|
49
|
+
|
50
|
+
it "3 decimal place numbers are processed as is" do
|
51
|
+
converter.xl_to_csv(decimalsfile, decimalsfileout)
|
52
|
+
decimalsfileout.exist?.should be_truthy
|
53
|
+
file_row_starting_with(decimalsfileout, '3 Place Decimal').should include '1.123'
|
54
|
+
end
|
55
|
+
|
56
|
+
it "more than 3 decimal places are truncated at 3 places" do
|
57
|
+
converter.xl_to_csv(decimalsfile, decimalsfileout)
|
58
|
+
decimalsfileout.exist?.should be_truthy
|
59
|
+
last_item_from_row('Truncate Decimal', decimalsfileout).should eq '1.234'
|
60
|
+
end
|
61
|
+
|
62
|
+
it "decimal places are trucated when 0" do
|
63
|
+
converter.xl_to_csv(decimalsfile, decimalsfileout)
|
64
|
+
decimalsfileout.exist?.should be_truthy
|
65
|
+
last_item_from_row('Integer', decimalsfileout).should eq '1'
|
66
|
+
end
|
67
|
+
|
68
|
+
it "trailing decimal zeros are trucated" do
|
69
|
+
converter.xl_to_csv(decimalsfile, decimalsfileout)
|
70
|
+
decimalsfileout.exist?.should be_truthy
|
71
|
+
last_item_from_row('No Trailing Zero', decimalsfileout).should eq '1.23'
|
72
|
+
end
|
73
|
+
end # context "decimals"
|
74
|
+
|
75
|
+
|
76
|
+
context "commas" do
|
77
|
+
|
78
|
+
let(:commasfile) { Pathname.new('spec/data/commastrings.xls') }
|
79
|
+
let(:commasfileout) { outdir + 'commastrings.csv' }
|
80
|
+
|
81
|
+
it "within cells are enclosed in quotes" do
|
82
|
+
converter.xl_to_csv(commasfile, commasfileout)
|
83
|
+
commasfileout.exist?.should be_truthy
|
84
|
+
file_to_string(commasfileout).should include 'Comma String,"This,string,has,commas"'
|
85
|
+
end
|
86
|
+
|
87
|
+
end # context "commas"
|
88
|
+
end # describe ExcelToCsv::ExcelFile
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.run_all_when_everything_filtered = true
|
9
|
+
config.filter_run :focus
|
10
|
+
|
11
|
+
# Run specs in random order to surface order dependencies. If you find an
|
12
|
+
# order dependency and want to debug it, you can fix the order by providing
|
13
|
+
# the seed, which is printed after each run.
|
14
|
+
# --seed 1234
|
15
|
+
config.order = 'random'
|
16
|
+
|
17
|
+
# Enable both 'should' and 'expect' syntax:
|
18
|
+
config.expect_with :rspec do |c|
|
19
|
+
# Disable the `expect` sytax...
|
20
|
+
#c.syntax = :should
|
21
|
+
|
22
|
+
# ...or disable the `should` syntax...
|
23
|
+
#c.syntax = :expect
|
24
|
+
|
25
|
+
# ...or explicitly enable both
|
26
|
+
c.syntax = [:should, :expect]
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
require 'pathname'
|
32
|
+
require_relative '../lib/exceltocsv'
|
33
|
+
|
34
|
+
def file_to_array(filepath)
|
35
|
+
dump = []
|
36
|
+
File.open(filepath) do |f|
|
37
|
+
f.each_line { |line| dump << line }
|
38
|
+
end
|
39
|
+
dump
|
40
|
+
end
|
41
|
+
|
42
|
+
def file_contains?(haystack, needle)
|
43
|
+
result = false
|
44
|
+
File.open(haystack) do |f|
|
45
|
+
f.each_line { |line| result = true if line.include?(needle) }
|
46
|
+
end
|
47
|
+
result
|
48
|
+
end
|
49
|
+
|
50
|
+
def file_to_string(filename)
|
51
|
+
result = ""
|
52
|
+
File.open(filename) do |f|
|
53
|
+
f.each_line { |line| result << line }
|
54
|
+
end
|
55
|
+
result
|
56
|
+
end
|
57
|
+
|
58
|
+
def file_row_starting_with(filename, starts_with)
|
59
|
+
rows = file_to_array filename
|
60
|
+
rows.each do |r|
|
61
|
+
return r if r.include? starts_with
|
62
|
+
end
|
63
|
+
""
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def last_item_from_row(row_starts_with, filename)
|
68
|
+
row = file_row_starting_with(filename, row_starts_with)
|
69
|
+
items = row.split(',')
|
70
|
+
items.each { |i| i.chomp! }
|
71
|
+
items[items.size - 1]
|
72
|
+
end
|
73
|
+
|
74
|
+
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: exceltocsv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeff McAffee
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-08-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: spreadsheet
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: ExcelToCsv is a utility library for converting Excel files to CSV format.
|
56
|
+
email:
|
57
|
+
- jeff@ktechsystems.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- Gemfile
|
65
|
+
- Gemfile.lock
|
66
|
+
- LICENSE
|
67
|
+
- README.md
|
68
|
+
- exceltocsv.gemspec
|
69
|
+
- lib/exceltocsv.rb
|
70
|
+
- lib/exceltocsv/cross_platform_excel.rb
|
71
|
+
- lib/exceltocsv/excel_app_wrapper.rb
|
72
|
+
- lib/exceltocsv/excel_file.rb
|
73
|
+
- lib/exceltocsv/os.rb
|
74
|
+
- lib/exceltocsv/version.rb
|
75
|
+
- lib/exceltocsv/win_excel.rb
|
76
|
+
- rakefile.rb
|
77
|
+
- spec/data/commastrings.xls
|
78
|
+
- spec/data/decimals.xls
|
79
|
+
- spec/data/normaldate.xls
|
80
|
+
- spec/data/test1.xls
|
81
|
+
- spec/data/textdate.xls
|
82
|
+
- spec/excel_file_spec.rb
|
83
|
+
- spec/spec_helper.rb
|
84
|
+
homepage: https://github.com/jmcaffee/exceltocsv
|
85
|
+
licenses:
|
86
|
+
- MIT
|
87
|
+
metadata: {}
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubyforge_project:
|
104
|
+
rubygems_version: 2.3.0
|
105
|
+
signing_key:
|
106
|
+
specification_version: 4
|
107
|
+
summary: Utility for converting Excel files to CSV format
|
108
|
+
test_files:
|
109
|
+
- spec/data/commastrings.xls
|
110
|
+
- spec/data/decimals.xls
|
111
|
+
- spec/data/normaldate.xls
|
112
|
+
- spec/data/test1.xls
|
113
|
+
- spec/data/textdate.xls
|
114
|
+
- spec/excel_file_spec.rb
|
115
|
+
- spec/spec_helper.rb
|
116
|
+
has_rdoc:
|