xls-split 0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.md +13 -0
- data/README.md +77 -0
- data/bin/xls-split +111 -0
- data/lib/xls-split/version.rb +3 -0
- metadata +62 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9762885e78b2e3f27485a4493df8031f6b998ec6
|
4
|
+
data.tar.gz: bdc067b5a95852f50df5612d5638cbd8988a0c50
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0feb5bcabc0fb7dd40ebd27f66a07b4bed76c5957edbf0b8429f1870e6ce752ed3e6064cf1f577f6a9245972efb64f8e7fc68178af9e622820ef68e65b361109
|
7
|
+
data.tar.gz: c8752b0484cea5595671836bfda8bf2319aae9ac379861339353e8771e584093d527ebb928f56d0e4da78f017f4432cc4f643ed64d4fd3668b7312a39e974180
|
data/LICENSE.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
2
|
+
you may not use this file except in compliance with the License.
|
3
|
+
|
4
|
+
You may obtain a copy of the License at
|
5
|
+
|
6
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
7
|
+
|
8
|
+
Unless required by applicable law or agreed to in writing,
|
9
|
+
software distributed under the License is distributed on an "AS IS" BASIS,
|
10
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
11
|
+
|
12
|
+
See the License for the specific language governing permissions and limitations
|
13
|
+
under the License.
|
data/README.md
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
A simple ruby command-line tool that allows a spreadsheet to be split into a number of
|
2
|
+
separate CSV files, one file per worksheet.
|
3
|
+
|
4
|
+
Lots of (UK) Government data is published in this way, often with hidden worksheets. This
|
5
|
+
tool makes it simple to split all of the worksheets out into separate files for easier
|
6
|
+
processing, e.g. using tools like Google Refine.
|
7
|
+
|
8
|
+
USAGE
|
9
|
+
-----
|
10
|
+
|
11
|
+
xls-split [opts] [file]
|
12
|
+
|
13
|
+
For example, if you have a spreadsheet containing two worksheets, Table1 and Table2 then the following
|
14
|
+
command will split that into two CSV files: /tmp/my-data-Table1.csv and /tmp/my-data-Table2.csv
|
15
|
+
|
16
|
+
xls-split -d /tmp -b my-data spreadsheet.xls
|
17
|
+
|
18
|
+
There are command-line options available to control location and naming of generated files,
|
19
|
+
as well as the ability to only extract specific worksheets, based on a regex match.
|
20
|
+
|
21
|
+
INSTALLATION
|
22
|
+
------------
|
23
|
+
|
24
|
+
Install as a gem:
|
25
|
+
|
26
|
+
gem install xls-split
|
27
|
+
|
28
|
+
Or, grab the source from github and run:
|
29
|
+
|
30
|
+
rake install
|
31
|
+
|
32
|
+
OPTIONS
|
33
|
+
-------
|
34
|
+
|
35
|
+
--help , -h
|
36
|
+
show this message
|
37
|
+
|
38
|
+
--verbose , -v
|
39
|
+
verbose progress reporting
|
40
|
+
|
41
|
+
--encoding , -e
|
42
|
+
set the encoding of the spreadsheet. Default is UTF-8
|
43
|
+
|
44
|
+
--skip , -s
|
45
|
+
number of rows in each worksheet to skip before writing data
|
46
|
+
|
47
|
+
--skipfooter , -f
|
48
|
+
number of rows to skip at the end of each worksheet
|
49
|
+
|
50
|
+
--dir , -d
|
51
|
+
output directory into which CSV files will be written
|
52
|
+
|
53
|
+
--base , -b
|
54
|
+
set a base file name for generated CSV files. Worksheet name will be appended
|
55
|
+
|
56
|
+
--match , -m
|
57
|
+
regular expression used to match worksheet names. Only matches sheets will be split out
|
58
|
+
|
59
|
+
--tidy-names , -t
|
60
|
+
tidy up worksheet names. Lower case and strips spaces
|
61
|
+
|
62
|
+
LICENSE
|
63
|
+
--------
|
64
|
+
|
65
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
66
|
+
you may not use this file except in compliance with the License.
|
67
|
+
|
68
|
+
You may obtain a copy of the License at
|
69
|
+
|
70
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
71
|
+
|
72
|
+
Unless required by applicable law or agreed to in writing,
|
73
|
+
software distributed under the License is distributed on an "AS IS" BASIS,
|
74
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
75
|
+
|
76
|
+
See the License for the specific language governing permissions and limitations
|
77
|
+
under the License.
|
data/bin/xls-split
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require 'getoptlong'
|
5
|
+
require "spreadsheet"
|
6
|
+
require "csv"
|
7
|
+
|
8
|
+
USAGE = <<-EOL
|
9
|
+
SUMMARY
|
10
|
+
|
11
|
+
xls-split [opts] file.xls
|
12
|
+
|
13
|
+
DESCRIPTION
|
14
|
+
|
15
|
+
Split a spreadsheet into separate CSV files, one file per worksheet.
|
16
|
+
|
17
|
+
Makes it easier to process complex, multi-sheet spreadsheets in tools like Google Refine
|
18
|
+
|
19
|
+
OPTIONS
|
20
|
+
|
21
|
+
--help , -h
|
22
|
+
show this message
|
23
|
+
--verbose , -v
|
24
|
+
verbose progress reporting
|
25
|
+
--encoding , -e
|
26
|
+
set the encoding of the spreadsheet. Default is UTF-8
|
27
|
+
--skip , -s
|
28
|
+
number of rows in each worksheet to skip before writing data
|
29
|
+
--dir , -d
|
30
|
+
output directory into which CSV files will be written
|
31
|
+
--base , -b
|
32
|
+
set a base file name for generated CSV files. Worksheet name will be appended
|
33
|
+
--match , -m
|
34
|
+
regular expression used to match worksheet names. Only matches sheets will be split out
|
35
|
+
--tidy-names , -t
|
36
|
+
tidy up worksheet names. Lower case and strips spaces
|
37
|
+
EOL
|
38
|
+
|
39
|
+
opts = GetoptLong::new(
|
40
|
+
[ "--help" , "-h" , GetoptLong::NO_ARGUMENT ],
|
41
|
+
[ "--encoding" , "-e" , GetoptLong::REQUIRED_ARGUMENT ],
|
42
|
+
[ "--skip" , "-s" , GetoptLong::REQUIRED_ARGUMENT ],
|
43
|
+
[ "--dir" , "-d" , GetoptLong::REQUIRED_ARGUMENT ],
|
44
|
+
[ "--base" , "-b" , GetoptLong::REQUIRED_ARGUMENT ],
|
45
|
+
[ "--match" , "-m" , GetoptLong::REQUIRED_ARGUMENT ],
|
46
|
+
[ "--tidy" , "-t" , GetoptLong::NO_ARGUMENT ],
|
47
|
+
[ "--verbose" , "-v" , GetoptLong::NO_ARGUMENT ],
|
48
|
+
[ "--skipfooter" , "-f" , GetoptLong::REQUIRED_ARGUMENT ]
|
49
|
+
).enum_for.inject({}) { |h, (k, v)| h.update k.delete('-') => v }
|
50
|
+
|
51
|
+
if opts["help"]
|
52
|
+
USAGE.display
|
53
|
+
exit(0)
|
54
|
+
end
|
55
|
+
|
56
|
+
def log(opts, msg)
|
57
|
+
$stderr.puts msg if opts["verbose"]
|
58
|
+
end
|
59
|
+
|
60
|
+
if opts["encoding"]
|
61
|
+
Spreadsheet.client_encoding = opts["encoding"]
|
62
|
+
else
|
63
|
+
Spreadsheet.client_encoding = 'UTF-8'
|
64
|
+
end
|
65
|
+
|
66
|
+
log( opts, "Worksheet encoding set to #{Spreadsheet.client_encoding}" )
|
67
|
+
|
68
|
+
skip = opts["skip"].to_i || 0
|
69
|
+
skipfooter = opts["skipfooter"].to_i || 0
|
70
|
+
|
71
|
+
log( opts, "Skipping #{skip} rows in each worksheet" )
|
72
|
+
log( opts, "Skipping #{skipfooter} rows at end of each worksheet" )
|
73
|
+
|
74
|
+
base = opts["base"] || ""
|
75
|
+
dir = opts["dir"] || "."
|
76
|
+
|
77
|
+
book = Spreadsheet.open ARGV[0]
|
78
|
+
|
79
|
+
book.worksheets.each do |worksheet|
|
80
|
+
|
81
|
+
if opts["match"] == nil || worksheet.name.match( opts["match"] )
|
82
|
+
|
83
|
+
name = worksheet.name
|
84
|
+
if opts["tidy"]
|
85
|
+
name = name.downcase.gsub(" ", "-")
|
86
|
+
end
|
87
|
+
filename = File.join( dir, "#{base}-#{name}.csv")
|
88
|
+
|
89
|
+
log( opts, "Writing worksheet #{worksheet.name} to #{filename}" )
|
90
|
+
|
91
|
+
maxrow = worksheet.dimensions[1]
|
92
|
+
if skipfooter
|
93
|
+
maxrow = maxrow - skipfooter
|
94
|
+
end
|
95
|
+
|
96
|
+
CSV.open(filename, "w") do |writer|
|
97
|
+
|
98
|
+
worksheet.each skip do |row|
|
99
|
+
if row.idx <= maxrow
|
100
|
+
writer << row
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
else
|
107
|
+
log( opts, "Ignoring unmatched worksheet: #{worksheet.name}" )
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xls-split
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.2'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Leigh Dodds
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-01-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: spreadsheet
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.6.4.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.6.4.1
|
27
|
+
description: Tools for splitting Excel files into CSV files
|
28
|
+
email:
|
29
|
+
- leigh@ldodds.com
|
30
|
+
executables:
|
31
|
+
- xls-split
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- LICENSE.md
|
36
|
+
- README.md
|
37
|
+
- bin/xls-split
|
38
|
+
- lib/xls-split/version.rb
|
39
|
+
homepage: http://github.com/ldodds/xls-split
|
40
|
+
licenses: []
|
41
|
+
metadata: {}
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
requirements: []
|
57
|
+
rubyforge_project:
|
58
|
+
rubygems_version: 2.2.1
|
59
|
+
signing_key:
|
60
|
+
specification_version: 4
|
61
|
+
summary: Extract worksheets from XLS files into CSV files
|
62
|
+
test_files: []
|