xls-split 0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9762885e78b2e3f27485a4493df8031f6b998ec6
4
+ data.tar.gz: bdc067b5a95852f50df5612d5638cbd8988a0c50
5
+ SHA512:
6
+ metadata.gz: 0feb5bcabc0fb7dd40ebd27f66a07b4bed76c5957edbf0b8429f1870e6ce752ed3e6064cf1f577f6a9245972efb64f8e7fc68178af9e622820ef68e65b361109
7
+ data.tar.gz: c8752b0484cea5595671836bfda8bf2319aae9ac379861339353e8771e584093d527ebb928f56d0e4da78f017f4432cc4f643ed64d4fd3668b7312a39e974180
@@ -0,0 +1,13 @@
1
+ Licensed under the Apache License, Version 2.0 (the "License");
2
+ you may not use this file except in compliance with the License.
3
+
4
+ You may obtain a copy of the License at
5
+
6
+ http://www.apache.org/licenses/LICENSE-2.0
7
+
8
+ Unless required by applicable law or agreed to in writing,
9
+ software distributed under the License is distributed on an "AS IS" BASIS,
10
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+
12
+ See the License for the specific language governing permissions and limitations
13
+ under the License.
@@ -0,0 +1,77 @@
1
+ A simple ruby command-line tool that allows a spreadsheet to be split into a number of
2
+ separate CSV files, one file per worksheet.
3
+
4
+ Lots of (UK) Government data is published in this way, often with hidden worksheets. This
5
+ tool makes it simple to split all of the worksheets out into separate files for easier
6
+ processing, e.g. using tools like Google Refine.
7
+
8
+ USAGE
9
+ -----
10
+
11
+ xls-split [opts] [file]
12
+
13
+ For example, if you have a spreadsheet containing two worksheets, Table1 and Table2 then the following
14
+ command will split that into two CSV files: /tmp/my-data-Table1.csv and /tmp/my-data-Table2.csv
15
+
16
+ xls-split -d /tmp -b my-data spreadsheet.xls
17
+
18
+ There are command-line options available to control location and naming of generated files,
19
+ as well as the ability to only extract specific worksheets, based on a regex match.
20
+
21
+ INSTALLATION
22
+ ------------
23
+
24
+ Install as a gem:
25
+
26
+ gem install xls-split
27
+
28
+ Or, grab the source from github and run:
29
+
30
+ rake install
31
+
32
+ OPTIONS
33
+ -------
34
+
35
+ --help , -h
36
+ show this message
37
+
38
+ --verbose , -v
39
+ verbose progress reporting
40
+
41
+ --encoding , -e
42
+ set the encoding of the spreadsheet. Default is UTF-8
43
+
44
+ --skip , -s
45
+ number of rows in each worksheet to skip before writing data
46
+
47
+ --skipfooter , -f
48
+ number of rows to skip at the end of each worksheet
49
+
50
+ --dir , -d
51
+ output directory into which CSV files will be written
52
+
53
+ --base , -b
54
+ set a base file name for generated CSV files. Worksheet name will be appended
55
+
56
+ --match , -m
57
+ regular expression used to match worksheet names. Only matches sheets will be split out
58
+
59
+ --tidy-names , -t
60
+ tidy up worksheet names. Lower case and strips spaces
61
+
62
+ LICENSE
63
+ --------
64
+
65
+ Licensed under the Apache License, Version 2.0 (the "License");
66
+ you may not use this file except in compliance with the License.
67
+
68
+ You may obtain a copy of the License at
69
+
70
+ http://www.apache.org/licenses/LICENSE-2.0
71
+
72
+ Unless required by applicable law or agreed to in writing,
73
+ software distributed under the License is distributed on an "AS IS" BASIS,
74
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
75
+
76
+ See the License for the specific language governing permissions and limitations
77
+ under the License.
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "rubygems"
4
+ require 'getoptlong'
5
+ require "spreadsheet"
6
+ require "csv"
7
+
8
+ USAGE = <<-EOL
9
+ SUMMARY
10
+
11
+ xls-split [opts] file.xls
12
+
13
+ DESCRIPTION
14
+
15
+ Split a spreadsheet into separate CSV files, one file per worksheet.
16
+
17
+ Makes it easier to process complex, multi-sheet spreadsheets in tools like Google Refine
18
+
19
+ OPTIONS
20
+
21
+ --help , -h
22
+ show this message
23
+ --verbose , -v
24
+ verbose progress reporting
25
+ --encoding , -e
26
+ set the encoding of the spreadsheet. Default is UTF-8
27
+ --skip , -s
28
+ number of rows in each worksheet to skip before writing data
29
+ --dir , -d
30
+ output directory into which CSV files will be written
31
+ --base , -b
32
+ set a base file name for generated CSV files. Worksheet name will be appended
33
+ --match , -m
34
+ regular expression used to match worksheet names. Only matches sheets will be split out
35
+ --tidy-names , -t
36
+ tidy up worksheet names. Lower case and strips spaces
37
+ EOL
38
+
39
+ opts = GetoptLong::new(
40
+ [ "--help" , "-h" , GetoptLong::NO_ARGUMENT ],
41
+ [ "--encoding" , "-e" , GetoptLong::REQUIRED_ARGUMENT ],
42
+ [ "--skip" , "-s" , GetoptLong::REQUIRED_ARGUMENT ],
43
+ [ "--dir" , "-d" , GetoptLong::REQUIRED_ARGUMENT ],
44
+ [ "--base" , "-b" , GetoptLong::REQUIRED_ARGUMENT ],
45
+ [ "--match" , "-m" , GetoptLong::REQUIRED_ARGUMENT ],
46
+ [ "--tidy" , "-t" , GetoptLong::NO_ARGUMENT ],
47
+ [ "--verbose" , "-v" , GetoptLong::NO_ARGUMENT ],
48
+ [ "--skipfooter" , "-f" , GetoptLong::REQUIRED_ARGUMENT ]
49
+ ).enum_for.inject({}) { |h, (k, v)| h.update k.delete('-') => v }
50
+
51
+ if opts["help"]
52
+ USAGE.display
53
+ exit(0)
54
+ end
55
+
56
+ def log(opts, msg)
57
+ $stderr.puts msg if opts["verbose"]
58
+ end
59
+
60
+ if opts["encoding"]
61
+ Spreadsheet.client_encoding = opts["encoding"]
62
+ else
63
+ Spreadsheet.client_encoding = 'UTF-8'
64
+ end
65
+
66
+ log( opts, "Worksheet encoding set to #{Spreadsheet.client_encoding}" )
67
+
68
+ skip = opts["skip"].to_i || 0
69
+ skipfooter = opts["skipfooter"].to_i || 0
70
+
71
+ log( opts, "Skipping #{skip} rows in each worksheet" )
72
+ log( opts, "Skipping #{skipfooter} rows at end of each worksheet" )
73
+
74
+ base = opts["base"] || ""
75
+ dir = opts["dir"] || "."
76
+
77
+ book = Spreadsheet.open ARGV[0]
78
+
79
+ book.worksheets.each do |worksheet|
80
+
81
+ if opts["match"] == nil || worksheet.name.match( opts["match"] )
82
+
83
+ name = worksheet.name
84
+ if opts["tidy"]
85
+ name = name.downcase.gsub(" ", "-")
86
+ end
87
+ filename = File.join( dir, "#{base}-#{name}.csv")
88
+
89
+ log( opts, "Writing worksheet #{worksheet.name} to #{filename}" )
90
+
91
+ maxrow = worksheet.dimensions[1]
92
+ if skipfooter
93
+ maxrow = maxrow - skipfooter
94
+ end
95
+
96
+ CSV.open(filename, "w") do |writer|
97
+
98
+ worksheet.each skip do |row|
99
+ if row.idx <= maxrow
100
+ writer << row
101
+ end
102
+ end
103
+
104
+ end
105
+
106
+ else
107
+ log( opts, "Ignoring unmatched worksheet: #{worksheet.name}" )
108
+ end
109
+
110
+ end
111
+
@@ -0,0 +1,3 @@
1
+ module XLSSplit
2
+ VERSION = "0.2"
3
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xls-split
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.2'
5
+ platform: ruby
6
+ authors:
7
+ - Leigh Dodds
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: spreadsheet
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.6.4.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.6.4.1
27
+ description: Tools for splitting Excel files into CSV files
28
+ email:
29
+ - leigh@ldodds.com
30
+ executables:
31
+ - xls-split
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - LICENSE.md
36
+ - README.md
37
+ - bin/xls-split
38
+ - lib/xls-split/version.rb
39
+ homepage: http://github.com/ldodds/xls-split
40
+ licenses: []
41
+ metadata: {}
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ requirements: []
57
+ rubyforge_project:
58
+ rubygems_version: 2.2.1
59
+ signing_key:
60
+ specification_version: 4
61
+ summary: Extract worksheets from XLS files into CSV files
62
+ test_files: []