columns-matcher 0.0.2 → 0.0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/README.md +132 -0
- data/Rakefile +6 -0
- data/columns-matcher.gemspec +19 -0
- data/lib/columns-matcher/version.rb +3 -0
- data/lib/columns-matcher.rb +102 -0
- data/spec/columns-matcher_spec.rb +86 -0
- data/spec/spec_helper.rb +1 -0
- metadata +18 -6
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
# Column Matcher
|
2
|
+
|
3
|
+
When you work with spreadsheets and casual users things do not always go as you want.
|
4
|
+
|
5
|
+
I worked on a project where users have to upload a spreadsheet with a lot of data about clients purchases.
|
6
|
+
After 3 upload they call me because the system "did not work well".
|
7
|
+
|
8
|
+
The files look like these followiong.
|
9
|
+
|
10
|
+
File 1:
|
11
|
+
|
12
|
+
<table>
|
13
|
+
<tr>
|
14
|
+
<th>Name</th>
|
15
|
+
<th>Surname</th>
|
16
|
+
<th>Emails</th>
|
17
|
+
</tr>
|
18
|
+
<tr>
|
19
|
+
<td>John</td>
|
20
|
+
<td>Smith</td>
|
21
|
+
<td>john.smith@gmail.com</td>
|
22
|
+
</tr>
|
23
|
+
<tr>
|
24
|
+
<td>John</td>
|
25
|
+
<td>Doe</td>
|
26
|
+
<td>john.doe@hotmail.com</td>
|
27
|
+
</tr>
|
28
|
+
</table>
|
29
|
+
|
30
|
+
File 2:
|
31
|
+
|
32
|
+
<table>
|
33
|
+
<tr>
|
34
|
+
<th>Surname</th>
|
35
|
+
<th>Name</th>
|
36
|
+
<th>E-mail</th>
|
37
|
+
</tr>
|
38
|
+
<tr>
|
39
|
+
<td>Smith</td>
|
40
|
+
<td>John</td>
|
41
|
+
<td>john.smith@gmail.com</td>
|
42
|
+
</tr>
|
43
|
+
<tr>
|
44
|
+
<td>Doe</td>
|
45
|
+
<td>John</td>
|
46
|
+
<td>john.doe@hotmail.com</td>
|
47
|
+
</tr>
|
48
|
+
</table>
|
49
|
+
|
50
|
+
File 3:
|
51
|
+
|
52
|
+
<table>
|
53
|
+
<tr>
|
54
|
+
<th>Mail</th>
|
55
|
+
<th>Nombre</th>
|
56
|
+
<th>Apellido</th>
|
57
|
+
</tr>
|
58
|
+
<tr>
|
59
|
+
<td>john.smith@gmail.com</td>
|
60
|
+
<td>John</td>
|
61
|
+
<td>Smith</td>
|
62
|
+
</tr>
|
63
|
+
<tr>
|
64
|
+
<td>john.doe@hotmail.com</td>
|
65
|
+
<td>John</td>
|
66
|
+
<td>Doe</td>
|
67
|
+
</tr>
|
68
|
+
</table>
|
69
|
+
|
70
|
+
3 files, 3 different structures. 3 different headers. WTF!
|
71
|
+
How can i guess the position of the columns i'm looking for?
|
72
|
+
|
73
|
+
This gem try to solve the problem
|
74
|
+
|
75
|
+
## Install
|
76
|
+
|
77
|
+
Add to your Gemfile and run the `bundle` command to install it.
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
gem "columns-matcher"
|
81
|
+
```
|
82
|
+
|
83
|
+
**N.B. Requires Ruby 1.9.2 or later.**
|
84
|
+
|
85
|
+
## Use
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
|
89
|
+
@matcher = ColumnsMatcher::Matcher.new
|
90
|
+
|
91
|
+
# the column that contains the name can be lebeled with "NAME", "NOME" or "NOMBRE"
|
92
|
+
@matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
|
93
|
+
|
94
|
+
# the column that contains the surname can be lebeled with "SURNAME", "COGNOME" or "APELLIDOS"
|
95
|
+
@matcher.add_column("cognome", ["SURNAME", "COGNOME", "APELLIDOS"])
|
96
|
+
|
97
|
+
# We suppose the header is ["COGNOME", "NOME", "INDIRIZZO"]
|
98
|
+
@matcher.set_header(header_loaded_from_spreadsheet)
|
99
|
+
|
100
|
+
@matcher.column_of("name") # return 1
|
101
|
+
@matcher.column_of("cognome") # return 0
|
102
|
+
```
|
103
|
+
|
104
|
+
First try is as exact match. If does not work it try with different case:
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
|
108
|
+
@matcher = ColumnsMatcher::Matcher.new
|
109
|
+
|
110
|
+
# the column that contains the name can be lebeled with "NAME", "NOME" or "NOMBRE"
|
111
|
+
@matcher.add_column("name", ["name", "nome", "nombre"])
|
112
|
+
|
113
|
+
# We suppose the header is ["APELLIDO", "NOMBRE", "ADDRESS"]
|
114
|
+
@matcher.set_header(header_loaded_from_spreadsheet)
|
115
|
+
|
116
|
+
@matcher.column_of("name") # return 1
|
117
|
+
```
|
118
|
+
|
119
|
+
If I can't find the column with exact match or different case match i can also use reg exp
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
|
123
|
+
@matcher = ColumnsMatcher::Matcher.new
|
124
|
+
|
125
|
+
# the column that contains the name can be lebeled with "NAME", "NOME" or "NOMBRE"
|
126
|
+
@matcher.add_column("email", ["[Ee]?[\-]*mail[s]*"])
|
127
|
+
|
128
|
+
# We suppose the header is ["Surname", "Name", "Emails"]
|
129
|
+
@matcher.set_header(header_loaded_from_spreadsheet)
|
130
|
+
|
131
|
+
@matcher.column_of("email") # return 2
|
132
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/columns-matcher/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Andrea Mostosi"]
|
6
|
+
gem.email = ["andrea.mostosi@zenkay.net"]
|
7
|
+
gem.description = %q{Given an hash of possibles header label find the correct position of a column in the real header. Useful when you don't know the structure of a spreadsheet.}
|
8
|
+
gem.summary = %q{Column header label matcher}
|
9
|
+
gem.homepage = "https://github.com/zenkay/columns-matcher"
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "columns-matcher"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = ColumnsMatcher::VERSION
|
17
|
+
|
18
|
+
gem.add_development_dependency "rspec", "~> 2.6.0"
|
19
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require "columns-matcher/version"
|
2
|
+
|
3
|
+
module ColumnsMatcher
|
4
|
+
class Matcher
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@header = []
|
8
|
+
@matcher = {}
|
9
|
+
@columns = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def add_column(name, labels)
|
13
|
+
begin
|
14
|
+
if labels.is_a? String
|
15
|
+
@matcher[name] = [labels]
|
16
|
+
elsif labels.is_a? Array
|
17
|
+
@matcher[name] = labels
|
18
|
+
end
|
19
|
+
self.match_columns
|
20
|
+
return true
|
21
|
+
rescue Exception => e
|
22
|
+
return false
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def add_columns(matches)
|
27
|
+
if matches.is_a? Hash
|
28
|
+
matches.each do |name, labels|
|
29
|
+
begin
|
30
|
+
if labels.is_a? String
|
31
|
+
@matcher[name] = [labels]
|
32
|
+
elsif labels.is_a? Array
|
33
|
+
@matcher[name] = labels
|
34
|
+
end
|
35
|
+
rescue Exception => e
|
36
|
+
puts e.backtrace
|
37
|
+
end
|
38
|
+
end
|
39
|
+
self.match_columns
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def set_header(header)
|
44
|
+
begin
|
45
|
+
if header.is_a? String
|
46
|
+
@header = [header]
|
47
|
+
elsif header.is_a? Array
|
48
|
+
@header = header
|
49
|
+
end
|
50
|
+
self.match_columns
|
51
|
+
rescue Exception => e
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def match_columns
|
56
|
+
if not @matcher.empty? and not @header.empty?
|
57
|
+
|
58
|
+
@matcher.each do |name, labels|
|
59
|
+
labels.each do |label|
|
60
|
+
|
61
|
+
# exact match
|
62
|
+
unless @header.index(label).nil?
|
63
|
+
@columns[name] = @header.index(label)
|
64
|
+
break
|
65
|
+
end
|
66
|
+
|
67
|
+
# different case
|
68
|
+
@header.each_with_index do |head, index|
|
69
|
+
if head.downcase == label.downcase
|
70
|
+
@columns[name] = index
|
71
|
+
break
|
72
|
+
end
|
73
|
+
end
|
74
|
+
break unless @columns[name].nil?
|
75
|
+
|
76
|
+
# reg-exp
|
77
|
+
@header.each_with_index do |head, index|
|
78
|
+
unless head.match(/^#{label}$/).nil?
|
79
|
+
@columns[name] = index
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
break unless @columns[name].nil?
|
84
|
+
|
85
|
+
# multiple words search
|
86
|
+
# to-do
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def column_of(name)
|
94
|
+
if @columns[name].nil?
|
95
|
+
return nil
|
96
|
+
else
|
97
|
+
return @columns[name]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe ColumnsMatcher do
|
4
|
+
|
5
|
+
describe "Matcher standard" do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@matcher = ColumnsMatcher::Matcher.new
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should can create the object" do
|
12
|
+
@matcher.should_not be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should recognize single column with a single description" do
|
16
|
+
@matcher.add_column("name", "NOME")
|
17
|
+
@matcher.set_header(["NOME"])
|
18
|
+
@matcher.column_of("name").should be(0)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should recognize single column with multiple descriptions" do
|
22
|
+
@matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
|
23
|
+
@matcher.set_header(["NOME"])
|
24
|
+
@matcher.column_of("name").should be(0)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should recognize single column within different fields" do
|
28
|
+
@matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
|
29
|
+
@matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
|
30
|
+
@matcher.column_of("name").should be(1)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should recognize multiple columns within different fields" do
|
34
|
+
@matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
|
35
|
+
@matcher.add_column("cognome", ["SURNAME", "COGNOME", "APELIDOS"])
|
36
|
+
@matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
|
37
|
+
@matcher.column_of("name").should be(1)
|
38
|
+
@matcher.column_of("cognome").should be(0)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should recognize multiple columns within different fields with a single definition" do
|
42
|
+
@matcher.add_columns(
|
43
|
+
"name" => ["NAME", "NOME", "NOMBRE"],
|
44
|
+
"cognome" => ["SURNAME", "COGNOME", "APELIDOS"]
|
45
|
+
)
|
46
|
+
@matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
|
47
|
+
@matcher.column_of("name").should be(1)
|
48
|
+
@matcher.column_of("cognome").should be(0)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should recognize single column with different case" do
|
52
|
+
@matcher.add_column("name", ["nome"])
|
53
|
+
@matcher.set_header(["NOME"])
|
54
|
+
@matcher.column_of("name").should be(0)
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should recognize multiple columns within different fields with different case" do
|
58
|
+
@matcher.add_column("name", ["name", "nome", "nombre"])
|
59
|
+
@matcher.add_column("cognome", ["surname", "cognome", "apelidos"])
|
60
|
+
@matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
|
61
|
+
@matcher.column_of("name").should be(1)
|
62
|
+
@matcher.column_of("cognome").should be(0)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should recognize single column with a reg exp" do
|
66
|
+
@matcher.add_column("name", ["N[AO]+ME"])
|
67
|
+
@matcher.set_header(["NOME"])
|
68
|
+
@matcher.column_of("name").should be(0)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should not find single column with a reg exp" do
|
72
|
+
@matcher.add_column("name", ["[AO]+ME"])
|
73
|
+
@matcher.set_header(["NOME"])
|
74
|
+
@matcher.column_of("name").should be_nil
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should not find single column with a reg exp" do
|
78
|
+
@matcher.add_column("email", ["[Ee]?[\-]*mail[s]*"])
|
79
|
+
@matcher.set_header(["Emails"])
|
80
|
+
@matcher.column_of("email").should be(0)
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'columns-matcher'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: columns-matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.2
|
4
|
+
version: 0.0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03
|
12
|
+
date: 2012-04-03 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70253209572420 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: 2.6.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70253209572420
|
25
25
|
description: Given an hash of possibles header label find the correct position of
|
26
26
|
a column in the real header. Useful when you don't know the structure of a spreadsheet.
|
27
27
|
email:
|
@@ -29,7 +29,17 @@ email:
|
|
29
29
|
executables: []
|
30
30
|
extensions: []
|
31
31
|
extra_rdoc_files: []
|
32
|
-
files:
|
32
|
+
files:
|
33
|
+
- .gitignore
|
34
|
+
- .rspec
|
35
|
+
- Gemfile
|
36
|
+
- README.md
|
37
|
+
- Rakefile
|
38
|
+
- columns-matcher.gemspec
|
39
|
+
- lib/columns-matcher.rb
|
40
|
+
- lib/columns-matcher/version.rb
|
41
|
+
- spec/columns-matcher_spec.rb
|
42
|
+
- spec/spec_helper.rb
|
33
43
|
homepage: https://github.com/zenkay/columns-matcher
|
34
44
|
licenses: []
|
35
45
|
post_install_message:
|
@@ -54,4 +64,6 @@ rubygems_version: 1.8.6
|
|
54
64
|
signing_key:
|
55
65
|
specification_version: 3
|
56
66
|
summary: Column header label matcher
|
57
|
-
test_files:
|
67
|
+
test_files:
|
68
|
+
- spec/columns-matcher_spec.rb
|
69
|
+
- spec/spec_helper.rb
|