columns-matcher 0.0.2 → 0.0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in columns-matcher.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,132 @@
1
+ # Column Matcher
2
+
3
+ When you work with spreadsheets and casual users things do not always go as you want.
4
+
5
+ I worked on a project where users have to upload a spreadsheet with a lot of data about clients purchases.
6
+ After 3 upload they call me because the system "did not work well".
7
+
8
+ The files look like these followiong.
9
+
10
+ File 1:
11
+
12
+ <table>
13
+ <tr>
14
+ <th>Name</th>
15
+ <th>Surname</th>
16
+ <th>Emails</th>
17
+ </tr>
18
+ <tr>
19
+ <td>John</td>
20
+ <td>Smith</td>
21
+ <td>john.smith@gmail.com</td>
22
+ </tr>
23
+ <tr>
24
+ <td>John</td>
25
+ <td>Doe</td>
26
+ <td>john.doe@hotmail.com</td>
27
+ </tr>
28
+ </table>
29
+
30
+ File 2:
31
+
32
+ <table>
33
+ <tr>
34
+ <th>Surname</th>
35
+ <th>Name</th>
36
+ <th>E-mail</th>
37
+ </tr>
38
+ <tr>
39
+ <td>Smith</td>
40
+ <td>John</td>
41
+ <td>john.smith@gmail.com</td>
42
+ </tr>
43
+ <tr>
44
+ <td>Doe</td>
45
+ <td>John</td>
46
+ <td>john.doe@hotmail.com</td>
47
+ </tr>
48
+ </table>
49
+
50
+ File 3:
51
+
52
+ <table>
53
+ <tr>
54
+ <th>Mail</th>
55
+ <th>Nombre</th>
56
+ <th>Apellido</th>
57
+ </tr>
58
+ <tr>
59
+ <td>john.smith@gmail.com</td>
60
+ <td>John</td>
61
+ <td>Smith</td>
62
+ </tr>
63
+ <tr>
64
+ <td>john.doe@hotmail.com</td>
65
+ <td>John</td>
66
+ <td>Doe</td>
67
+ </tr>
68
+ </table>
69
+
70
+ 3 files, 3 different structures. 3 different headers. WTF!
71
+ How can i guess the position of the columns i'm looking for?
72
+
73
+ This gem try to solve the problem
74
+
75
+ ## Install
76
+
77
+ Add to your Gemfile and run the `bundle` command to install it.
78
+
79
+ ```ruby
80
+ gem "columns-matcher"
81
+ ```
82
+
83
+ **N.B. Requires Ruby 1.9.2 or later.**
84
+
85
+ ## Use
86
+
87
+ ```ruby
88
+
89
+ @matcher = ColumnsMatcher::Matcher.new
90
+
91
+ # the column that contains the name can be lebeled with "NAME", "NOME" or "NOMBRE"
92
+ @matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
93
+
94
+ # the column that contains the surname can be lebeled with "SURNAME", "COGNOME" or "APELLIDOS"
95
+ @matcher.add_column("cognome", ["SURNAME", "COGNOME", "APELLIDOS"])
96
+
97
+ # We suppose the header is ["COGNOME", "NOME", "INDIRIZZO"]
98
+ @matcher.set_header(header_loaded_from_spreadsheet)
99
+
100
+ @matcher.column_of("name") # return 1
101
+ @matcher.column_of("cognome") # return 0
102
+ ```
103
+
104
+ First try is as exact match. If does not work it try with different case:
105
+
106
+ ```ruby
107
+
108
+ @matcher = ColumnsMatcher::Matcher.new
109
+
110
+ # the column that contains the name can be lebeled with "NAME", "NOME" or "NOMBRE"
111
+ @matcher.add_column("name", ["name", "nome", "nombre"])
112
+
113
+ # We suppose the header is ["APELLIDO", "NOMBRE", "ADDRESS"]
114
+ @matcher.set_header(header_loaded_from_spreadsheet)
115
+
116
+ @matcher.column_of("name") # return 1
117
+ ```
118
+
119
+ If I can't find the column with exact match or different case match i can also use reg exp
120
+
121
+ ```ruby
122
+
123
+ @matcher = ColumnsMatcher::Matcher.new
124
+
125
+ # the column that contains the name can be lebeled with "NAME", "NOME" or "NOMBRE"
126
+ @matcher.add_column("email", ["[Ee]?[\-]*mail[s]*"])
127
+
128
+ # We suppose the header is ["Surname", "Name", "Emails"]
129
+ @matcher.set_header(header_loaded_from_spreadsheet)
130
+
131
+ @matcher.column_of("email") # return 2
132
+ ```
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task default: :spec
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/columns-matcher/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Andrea Mostosi"]
6
+ gem.email = ["andrea.mostosi@zenkay.net"]
7
+ gem.description = %q{Given an hash of possibles header label find the correct position of a column in the real header. Useful when you don't know the structure of a spreadsheet.}
8
+ gem.summary = %q{Column header label matcher}
9
+ gem.homepage = "https://github.com/zenkay/columns-matcher"
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "columns-matcher"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = ColumnsMatcher::VERSION
17
+
18
+ gem.add_development_dependency "rspec", "~> 2.6.0"
19
+ end
@@ -0,0 +1,3 @@
1
+ module ColumnsMatcher
2
+ VERSION = "0.0.2.1"
3
+ end
@@ -0,0 +1,102 @@
1
+ require "columns-matcher/version"
2
+
3
+ module ColumnsMatcher
4
+ class Matcher
5
+
6
+ def initialize
7
+ @header = []
8
+ @matcher = {}
9
+ @columns = {}
10
+ end
11
+
12
+ def add_column(name, labels)
13
+ begin
14
+ if labels.is_a? String
15
+ @matcher[name] = [labels]
16
+ elsif labels.is_a? Array
17
+ @matcher[name] = labels
18
+ end
19
+ self.match_columns
20
+ return true
21
+ rescue Exception => e
22
+ return false
23
+ end
24
+ end
25
+
26
+ def add_columns(matches)
27
+ if matches.is_a? Hash
28
+ matches.each do |name, labels|
29
+ begin
30
+ if labels.is_a? String
31
+ @matcher[name] = [labels]
32
+ elsif labels.is_a? Array
33
+ @matcher[name] = labels
34
+ end
35
+ rescue Exception => e
36
+ puts e.backtrace
37
+ end
38
+ end
39
+ self.match_columns
40
+ end
41
+ end
42
+
43
+ def set_header(header)
44
+ begin
45
+ if header.is_a? String
46
+ @header = [header]
47
+ elsif header.is_a? Array
48
+ @header = header
49
+ end
50
+ self.match_columns
51
+ rescue Exception => e
52
+ end
53
+ end
54
+
55
+ def match_columns
56
+ if not @matcher.empty? and not @header.empty?
57
+
58
+ @matcher.each do |name, labels|
59
+ labels.each do |label|
60
+
61
+ # exact match
62
+ unless @header.index(label).nil?
63
+ @columns[name] = @header.index(label)
64
+ break
65
+ end
66
+
67
+ # different case
68
+ @header.each_with_index do |head, index|
69
+ if head.downcase == label.downcase
70
+ @columns[name] = index
71
+ break
72
+ end
73
+ end
74
+ break unless @columns[name].nil?
75
+
76
+ # reg-exp
77
+ @header.each_with_index do |head, index|
78
+ unless head.match(/^#{label}$/).nil?
79
+ @columns[name] = index
80
+ break
81
+ end
82
+ end
83
+ break unless @columns[name].nil?
84
+
85
+ # multiple words search
86
+ # to-do
87
+
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+ def column_of(name)
94
+ if @columns[name].nil?
95
+ return nil
96
+ else
97
+ return @columns[name]
98
+ end
99
+ end
100
+
101
+ end
102
+ end
@@ -0,0 +1,86 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+ describe ColumnsMatcher do
4
+
5
+ describe "Matcher standard" do
6
+
7
+ before do
8
+ @matcher = ColumnsMatcher::Matcher.new
9
+ end
10
+
11
+ it "should can create the object" do
12
+ @matcher.should_not be_nil
13
+ end
14
+
15
+ it "should recognize single column with a single description" do
16
+ @matcher.add_column("name", "NOME")
17
+ @matcher.set_header(["NOME"])
18
+ @matcher.column_of("name").should be(0)
19
+ end
20
+
21
+ it "should recognize single column with multiple descriptions" do
22
+ @matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
23
+ @matcher.set_header(["NOME"])
24
+ @matcher.column_of("name").should be(0)
25
+ end
26
+
27
+ it "should recognize single column within different fields" do
28
+ @matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
29
+ @matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
30
+ @matcher.column_of("name").should be(1)
31
+ end
32
+
33
+ it "should recognize multiple columns within different fields" do
34
+ @matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
35
+ @matcher.add_column("cognome", ["SURNAME", "COGNOME", "APELIDOS"])
36
+ @matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
37
+ @matcher.column_of("name").should be(1)
38
+ @matcher.column_of("cognome").should be(0)
39
+ end
40
+
41
+ it "should recognize multiple columns within different fields with a single definition" do
42
+ @matcher.add_columns(
43
+ "name" => ["NAME", "NOME", "NOMBRE"],
44
+ "cognome" => ["SURNAME", "COGNOME", "APELIDOS"]
45
+ )
46
+ @matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
47
+ @matcher.column_of("name").should be(1)
48
+ @matcher.column_of("cognome").should be(0)
49
+ end
50
+
51
+ it "should recognize single column with different case" do
52
+ @matcher.add_column("name", ["nome"])
53
+ @matcher.set_header(["NOME"])
54
+ @matcher.column_of("name").should be(0)
55
+ end
56
+
57
+ it "should recognize multiple columns within different fields with different case" do
58
+ @matcher.add_column("name", ["name", "nome", "nombre"])
59
+ @matcher.add_column("cognome", ["surname", "cognome", "apelidos"])
60
+ @matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
61
+ @matcher.column_of("name").should be(1)
62
+ @matcher.column_of("cognome").should be(0)
63
+ end
64
+
65
+ it "should recognize single column with a reg exp" do
66
+ @matcher.add_column("name", ["N[AO]+ME"])
67
+ @matcher.set_header(["NOME"])
68
+ @matcher.column_of("name").should be(0)
69
+ end
70
+
71
+ it "should not find single column with a reg exp" do
72
+ @matcher.add_column("name", ["[AO]+ME"])
73
+ @matcher.set_header(["NOME"])
74
+ @matcher.column_of("name").should be_nil
75
+ end
76
+
77
+ it "should not find single column with a reg exp" do
78
+ @matcher.add_column("email", ["[Ee]?[\-]*mail[s]*"])
79
+ @matcher.set_header(["Emails"])
80
+ @matcher.column_of("email").should be(0)
81
+ end
82
+
83
+ end
84
+
85
+
86
+ end
@@ -0,0 +1 @@
1
+ require 'columns-matcher'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: columns-matcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-02 00:00:00.000000000Z
12
+ date: 2012-04-03 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70095432524900 !ruby/object:Gem::Requirement
16
+ requirement: &70253209572420 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: 2.6.0
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70095432524900
24
+ version_requirements: *70253209572420
25
25
  description: Given an hash of possibles header label find the correct position of
26
26
  a column in the real header. Useful when you don't know the structure of a spreadsheet.
27
27
  email:
@@ -29,7 +29,17 @@ email:
29
29
  executables: []
30
30
  extensions: []
31
31
  extra_rdoc_files: []
32
- files: []
32
+ files:
33
+ - .gitignore
34
+ - .rspec
35
+ - Gemfile
36
+ - README.md
37
+ - Rakefile
38
+ - columns-matcher.gemspec
39
+ - lib/columns-matcher.rb
40
+ - lib/columns-matcher/version.rb
41
+ - spec/columns-matcher_spec.rb
42
+ - spec/spec_helper.rb
33
43
  homepage: https://github.com/zenkay/columns-matcher
34
44
  licenses: []
35
45
  post_install_message:
@@ -54,4 +64,6 @@ rubygems_version: 1.8.6
54
64
  signing_key:
55
65
  specification_version: 3
56
66
  summary: Column header label matcher
57
- test_files: []
67
+ test_files:
68
+ - spec/columns-matcher_spec.rb
69
+ - spec/spec_helper.rb