xlsx_importer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b3d2b150f788f4b35e0c2fbe8705bb7dad67c425
4
+ data.tar.gz: 4827b0a0e3b365eeed33b429882d062a83022396
5
+ SHA512:
6
+ metadata.gz: 30de840c9e84166184a78a1117d95d9d9f85448574afd58dc8f6a359de998090f98010be0c71fa3d45987dd4243416c2c3ce5a23855446fd1aeeb0b19ebc9ac8
7
+ data.tar.gz: 065786ed8e8d09b2d024108c66024997ec767fe93ef2a66e377254d5c636b617b384e1be34304c4a1dc6d15a2689965544853c390967873161e63c908bf5f9e5
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ /.idea/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in xlsx_importer.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Martin Bianculli
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,74 @@
1
+ # XlsxImporter
2
+
3
+ I really like [Smarter CSV](https://github.com/tilo/smarter_csv) but as MS Excel cannot export CSV files in unicode,
4
+ I need to import the spreadsheet directly to preserve the unicode characters.
5
+
6
+ XlsxImporter is a port from Smarter CSV with a few specific options for spreadsheets import. It uses [SimpleXlsxReader](https://github.com/woahdae/simple_xlsx_reader)
7
+ to read the MS Excel xlsx file. It only support xlsx files.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'xlsx_importer'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install xlsx_importer
22
+
23
+ ## Usage
24
+
25
+ ```ruby
26
+ > XlsxImporter.process('/tmp/people.xlsx', options = {})
27
+ => [ {:first_name=>"Dan", :last_name=>"McAllister"},
28
+ {:first_name=>"Lucy", :last_name=>"Laweless"} ]
29
+ ```
30
+
31
+ ### Options
32
+
33
+ A few options from Smarter CVS are supported, a few more were added.
34
+
35
+ | Option | Default | Explanation
36
+ |---------------------------|---------|-----------------------------------------------------------------------------
37
+ | :key_mapping_hash | nil | a hash which maps headers from the spreadsheet to keys in the result hash
38
+ | :remove_unmapped_keys | false | when using :key_mapping option, should non-mapped keys / columns be removed?
39
+ | :sheet | 0 | the number of the sheet inside the workbook to parse
40
+ | :strip_whitespace | true | remove whitespace before/after values and headers
41
+ | :downcase_header | true | downcase all column headers
42
+ | :strip_chars_from_headers | nil | RegExp to remove extraneous characters from the header line
43
+ | :remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs
44
+ | :remove_empty_values | true | remove values which have nil or empty strings as values
45
+ | :remove_zero_values | false | remove values which have a numeric value equal to zero / 0
46
+ | :remove_values_matching | nil | removes key/value pairs if value matches given regular expressions
47
+ | :header_row | 1 | row number with the headers
48
+ | :date_keys | nil | Array with the keys of date fields to perform DB compatible validation
49
+
50
+ ### Example
51
+
52
+ ```ruby
53
+ result = XlsxImporter.process('tmp/test.xlsx', {
54
+ :key_mapping_hash => {
55
+ :name => :full_name,
56
+ :date_of_creation => :created_at,
57
+ :date_of_termination => :deleted_at,
58
+ },
59
+ :date_keys => [:created_at, :deleted_at],
60
+ :remove_unmapped_keys => true,
61
+ :strip_chars_from_headers => /[\/\(\)]/,
62
+ :remove_values_matching => /^[\.\-]$/,
63
+ :sheet => 1,
64
+ :header_row => 3
65
+ })
66
+ ```
67
+
68
+ ## Contributing
69
+
70
+ 1. Fork it
71
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
72
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
73
+ 4. Push to the branch (`git push origin my-new-feature`)
74
+ 5. Create new Pull Request
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ desc "Open an irb session preloaded with this library"
4
+ task :console do
5
+ sh "irb -rubygems -I lib -r xlsx_importer.rb"
6
+ end
@@ -0,0 +1,9 @@
1
+ # the following extension for class Hash is needed (from Facets of Ruby library):
2
+
3
+ class Hash
4
+ def self.zip(keys,values) # from Facets of Ruby library
5
+ h = {}
6
+ keys.size.times{ |i| h[ keys[i] ] = values[i] }
7
+ h
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ require "xlsx_importer/version"
2
+ require "extensions/hash.rb"
3
+ require "xlsx_importer/xlsx_importer"
@@ -0,0 +1,3 @@
1
+ module XlsxImporter
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,81 @@
1
+ require 'simple_xlsx_reader'
2
+
3
+ module XlsxImporter
4
+ def self.process(input, options={})
5
+
6
+ default_options = {
7
+ :key_mapping_hash => nil,
8
+ :remove_unmapped_keys => false,
9
+ :sheet => 0,
10
+ :strip_whitespace => true,
11
+ :downcase_header => true,
12
+ :strip_chars_from_headers => nil,
13
+ :remove_empty_hashes => true,
14
+ :remove_empty_values => true,
15
+ :remove_zero_values => false,
16
+ :remove_values_matching => nil,
17
+ :header_row => 1,
18
+ :date_keys => nil,
19
+ }
20
+ options = default_options.merge(options)
21
+
22
+ headers = []
23
+ result = []
24
+
25
+ doc = SimpleXlsxReader.open(input)
26
+ sheet = doc.sheets[options[:sheet]]
27
+
28
+ headers = sheet.rows[options[:header_row] - 1]
29
+ headers.map!{|x| x.gsub(options[:strip_chars_from_headers], '')} if options[:strip_chars_from_headers]
30
+ headers.map!{|x| x.respond_to?(:strip) ? x.strip : x} if options[:strip_whitespace]
31
+ headers.map!{|x| x.gsub(/\s+/,'_')}
32
+ headers.map!{|x| x.respond_to?(:downcase) ? x.downcase : x } if options[:downcase_header]
33
+ headers.map!{|x| x.to_sym }
34
+
35
+ key_mapping_hash = options[:key_mapping_hash]
36
+ if !key_mapping_hash.nil? && key_mapping_hash.class == Hash && key_mapping_hash.keys.size > 0
37
+ headers.map!{|x| key_mapping_hash.has_key?(x) ? (key_mapping_hash[x].nil? ? nil : key_mapping_hash[x].to_sym) : (options[:remove_unmapped_keys] ? nil : x)}
38
+ end
39
+
40
+ line = 0
41
+ rows = sheet.rows.drop(options[:header_row])
42
+ begin
43
+ rows.each do |row|
44
+ line += 1
45
+
46
+ row.map!{|x| x.respond_to?(:strip) ? x.strip : x } if options[:strip_whitespace]
47
+
48
+ hash = Hash.zip(headers,row)
49
+
50
+ hash.delete(nil); hash.delete('');
51
+ hash.delete_if {|k,v| v.nil? || v =~ /^\s*$/} if options[:remove_empty_values]
52
+ hash.delete_if {|k,v| !v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
53
+ hash.delete_if {|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
54
+
55
+ options[:date_keys].map do |x|
56
+ hash[x] = nil if hash.has_key?(x) && self.date_validation(hash[x]).nil?
57
+ end if options[:date_keys].present? && options[:date_keys].class == Array && options[:date_keys].size > 0
58
+
59
+ next if hash.empty? if options[:remove_empty_hashes]
60
+
61
+ result << hash
62
+ end
63
+ rescue => e
64
+ puts "Error around line #{line} \n"
65
+ raise e
66
+ end
67
+
68
+ return result
69
+
70
+ end
71
+
72
+ def self.date_validation(date)
73
+ date = Date.parse(date) if date.class == 'String'
74
+ date.strftime('%Y-%m-%d') =~ /^([1-3][0-9]{3,3})-(0?[1-9]|1[0-2])-(0?[1-9]|[1-2][1-9]|3[0-1])$/
75
+ rescue
76
+ return nil
77
+ end
78
+
79
+
80
+
81
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'xlsx_importer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "xlsx_importer"
8
+ spec.version = XlsxImporter::VERSION
9
+ spec.authors = ["Martin Bianculli"]
10
+ spec.email = ["mbianculli@gmail.com"]
11
+ spec.description = %q{Import a xlxs file}
12
+ spec.summary = %q{Import a xlxs file}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+
24
+ spec.add_dependency "simple_xlsx_reader", "~> 1.0.1"
25
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xlsx_importer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Martin Bianculli
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: simple_xlsx_reader
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 1.0.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.1
55
+ description: Import a xlxs file
56
+ email:
57
+ - mbianculli@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - .gitignore
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - lib/extensions/hash.rb
68
+ - lib/xlsx_importer.rb
69
+ - lib/xlsx_importer/version.rb
70
+ - lib/xlsx_importer/xlsx_importer.rb
71
+ - xlsx_importer.gemspec
72
+ homepage: ''
73
+ licenses:
74
+ - MIT
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 2.1.11
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: Import a xlxs file
96
+ test_files: []