RubyGems - xls-split - Versions diffs - 0.2 - Mend

xls-split 0.2

Files changed (6) hide show

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 9762885e78b2e3f27485a4493df8031f6b998ec6
+  data.tar.gz: bdc067b5a95852f50df5612d5638cbd8988a0c50
+SHA512:
+  metadata.gz: 0feb5bcabc0fb7dd40ebd27f66a07b4bed76c5957edbf0b8429f1870e6ce752ed3e6064cf1f577f6a9245972efb64f8e7fc68178af9e622820ef68e65b361109
+  data.tar.gz: c8752b0484cea5595671836bfda8bf2319aae9ac379861339353e8771e584093d527ebb928f56d0e4da78f017f4432cc4f643ed64d4fd3668b7312a39e974180

data/LICENSE.md ADDED

@@ -0,0 +1,13 @@
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and limitations
+under the License.

data/README.md ADDED

@@ -0,0 +1,77 @@
+A simple ruby command-line tool that allows a spreadsheet to be split into a number of
+separate CSV files, one file per worksheet.
+Lots of (UK) Government data is published in this way, often with hidden worksheets. This
+tool makes it simple to split all of the worksheets out into separate files for easier
+processing, e.g. using tools like Google Refine.
+USAGE
+-----
+	xls-split [opts] [file]
+For example, if you have a spreadsheet containing two worksheets, Table1 and Table2 then the following
+command will split that into two CSV files: /tmp/my-data-Table1.csv and /tmp/my-data-Table2.csv
+ 	xls-split -d /tmp -b my-data spreadsheet.xls
+There are command-line options available to control location and naming of generated files,
+as well as the ability to only extract specific worksheets, based on a regex match.
+INSTALLATION
+------------
+Install as a gem:
+	gem install xls-split
+Or, grab the source from github and run:
+	rake install
+OPTIONS
+-------
+  --help            , -h
+    show this message
+  --verbose         , -v
+    verbose progress reporting
+  --encoding        , -e
+    set the encoding of the spreadsheet. Default is UTF-8
+  --skip            , -s
+    number of rows in each worksheet to skip before writing data
+  --skipfooter      , -f
+    number of rows to skip at the end of each worksheet
+  --dir             , -d
+    output directory into which CSV files will be written
+  --base            , -b
+    set a base file name for generated CSV files. Worksheet name will be appended
+  --match           , -m
+    regular expression used to match worksheet names. Only matches sheets will be split out
+  --tidy-names      , -t
+    tidy up worksheet names. Lower case and strips spaces
+LICENSE
+--------
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and limitations
+under the License.

data/bin/xls-split ADDED

@@ -0,0 +1,111 @@
+#!/usr/bin/env ruby
+require "rubygems"
+require 'getoptlong'
+require "spreadsheet"
+require "csv"
+USAGE = <<-EOL
+SUMMARY
+xls-split [opts] file.xls
+DESCRIPTION
+  Split a spreadsheet into separate CSV files, one file per worksheet.
+  Makes it easier to process complex, multi-sheet spreadsheets in tools like Google Refine
+OPTIONS
+  --help            , -h
+    show this message
+  --verbose         , -v
+    verbose progress reporting
+  --encoding        , -e
+    set the encoding of the spreadsheet. Default is UTF-8
+  --skip            , -s
+    number of rows in each worksheet to skip before writing data
+  --dir             , -d
+    output directory into which CSV files will be written
+  --base            , -b
+    set a base file name for generated CSV files. Worksheet name will be appended
+  --match           , -m
+    regular expression used to match worksheet names. Only matches sheets will be split out
+  --tidy-names      , -t
+    tidy up worksheet names. Lower case and strips spaces
+EOL
+opts = GetoptLong::new(
+         [ "--help"            , "-h" , GetoptLong::NO_ARGUMENT ],
+         [ "--encoding"        , "-e" , GetoptLong::REQUIRED_ARGUMENT ],
+         [ "--skip"            , "-s" , GetoptLong::REQUIRED_ARGUMENT ],
+         [ "--dir"             , "-d" , GetoptLong::REQUIRED_ARGUMENT ],
+         [ "--base"            , "-b" , GetoptLong::REQUIRED_ARGUMENT ],
+         [ "--match"           , "-m" , GetoptLong::REQUIRED_ARGUMENT ],
+         [ "--tidy"            , "-t" , GetoptLong::NO_ARGUMENT ],
+         [ "--verbose"         , "-v" , GetoptLong::NO_ARGUMENT ],
+         [ "--skipfooter"      , "-f" , GetoptLong::REQUIRED_ARGUMENT ]
+       ).enum_for.inject({}) { |h, (k, v)| h.update k.delete('-') => v }
+if opts["help"]
+  USAGE.display
+  exit(0)
+end
+def log(opts, msg)
+  $stderr.puts msg if opts["verbose"]
+end
+if opts["encoding"]
+  Spreadsheet.client_encoding = opts["encoding"]
+else
+  Spreadsheet.client_encoding = 'UTF-8'
+end
+log( opts, "Worksheet encoding set to #{Spreadsheet.client_encoding}" )
+skip = opts["skip"].to_i || 0
+skipfooter = opts["skipfooter"].to_i || 0
+log( opts, "Skipping #{skip} rows in each worksheet" )
+log( opts, "Skipping #{skipfooter} rows at end of each worksheet" )
+base = opts["base"] || ""
+dir = opts["dir"] || "."
+book = Spreadsheet.open ARGV[0]
+book.worksheets.each do |worksheet|
+  if opts["match"] == nil || worksheet.name.match( opts["match"] )
+    name = worksheet.name
+    if opts["tidy"]
+      name = name.downcase.gsub(" ", "-")
+    end
+    filename = File.join( dir, "#{base}-#{name}.csv")
+    log( opts, "Writing worksheet #{worksheet.name} to #{filename}" )
+    maxrow = worksheet.dimensions[1]
+    if skipfooter
+      maxrow = maxrow - skipfooter
+    end
+    CSV.open(filename, "w") do |writer|
+      worksheet.each skip do |row|
+        if row.idx <= maxrow
+          writer << row
+        end
+      end
+    end
+  else
+    log( opts, "Ignoring unmatched worksheet: #{worksheet.name}" )
+  end
+end

data/lib/xls-split/version.rb ADDED

@@ -0,0 +1,3 @@
+module XLSSplit
+  VERSION = "0.2"
+end

metadata ADDED

@@ -0,0 +1,62 @@
+--- !ruby/object:Gem::Specification
+name: xls-split
+version: !ruby/object:Gem::Version
+  version: '0.2'
+platform: ruby
+authors:
+- Leigh Dodds
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-01-13 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: spreadsheet
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 0.6.4.1
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 0.6.4.1
+description: Tools for splitting Excel files into CSV files
+email:
+- leigh@ldodds.com
+executables:
+- xls-split
+extensions: []
+extra_rdoc_files: []
+files:
+- LICENSE.md
+- README.md
+- bin/xls-split
+- lib/xls-split/version.rb
+homepage: http://github.com/ldodds/xls-split
+licenses: []
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.2.1
+signing_key:
+specification_version: 4
+summary: Extract worksheets from XLS files into CSV files
+test_files: []