columns-matcher 0.0.2 → 0.0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/README.md +132 -0
- data/Rakefile +6 -0
- data/columns-matcher.gemspec +19 -0
- data/lib/columns-matcher/version.rb +3 -0
- data/lib/columns-matcher.rb +102 -0
- data/spec/columns-matcher_spec.rb +86 -0
- data/spec/spec_helper.rb +1 -0
- metadata +18 -6
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
# Column Matcher
|
2
|
+
|
3
|
+
When you work with spreadsheets and casual users things do not always go as you want.
|
4
|
+
|
5
|
+
I worked on a project where users have to upload a spreadsheet with a lot of data about clients purchases.
|
6
|
+
After 3 upload they call me because the system "did not work well".
|
7
|
+
|
8
|
+
The files look like these followiong.
|
9
|
+
|
10
|
+
File 1:
|
11
|
+
|
12
|
+
<table>
|
13
|
+
<tr>
|
14
|
+
<th>Name</th>
|
15
|
+
<th>Surname</th>
|
16
|
+
<th>Emails</th>
|
17
|
+
</tr>
|
18
|
+
<tr>
|
19
|
+
<td>John</td>
|
20
|
+
<td>Smith</td>
|
21
|
+
<td>john.smith@gmail.com</td>
|
22
|
+
</tr>
|
23
|
+
<tr>
|
24
|
+
<td>John</td>
|
25
|
+
<td>Doe</td>
|
26
|
+
<td>john.doe@hotmail.com</td>
|
27
|
+
</tr>
|
28
|
+
</table>
|
29
|
+
|
30
|
+
File 2:
|
31
|
+
|
32
|
+
<table>
|
33
|
+
<tr>
|
34
|
+
<th>Surname</th>
|
35
|
+
<th>Name</th>
|
36
|
+
<th>E-mail</th>
|
37
|
+
</tr>
|
38
|
+
<tr>
|
39
|
+
<td>Smith</td>
|
40
|
+
<td>John</td>
|
41
|
+
<td>john.smith@gmail.com</td>
|
42
|
+
</tr>
|
43
|
+
<tr>
|
44
|
+
<td>Doe</td>
|
45
|
+
<td>John</td>
|
46
|
+
<td>john.doe@hotmail.com</td>
|
47
|
+
</tr>
|
48
|
+
</table>
|
49
|
+
|
50
|
+
File 3:
|
51
|
+
|
52
|
+
<table>
|
53
|
+
<tr>
|
54
|
+
<th>Mail</th>
|
55
|
+
<th>Nombre</th>
|
56
|
+
<th>Apellido</th>
|
57
|
+
</tr>
|
58
|
+
<tr>
|
59
|
+
<td>john.smith@gmail.com</td>
|
60
|
+
<td>John</td>
|
61
|
+
<td>Smith</td>
|
62
|
+
</tr>
|
63
|
+
<tr>
|
64
|
+
<td>john.doe@hotmail.com</td>
|
65
|
+
<td>John</td>
|
66
|
+
<td>Doe</td>
|
67
|
+
</tr>
|
68
|
+
</table>
|
69
|
+
|
70
|
+
3 files, 3 different structures. 3 different headers. WTF!
|
71
|
+
How can i guess the position of the columns i'm looking for?
|
72
|
+
|
73
|
+
This gem try to solve the problem
|
74
|
+
|
75
|
+
## Install
|
76
|
+
|
77
|
+
Add to your Gemfile and run the `bundle` command to install it.
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
gem "columns-matcher"
|
81
|
+
```
|
82
|
+
|
83
|
+
**N.B. Requires Ruby 1.9.2 or later.**
|
84
|
+
|
85
|
+
## Use
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
|
89
|
+
@matcher = ColumnsMatcher::Matcher.new
|
90
|
+
|
91
|
+
# the column that contains the name can be lebeled with "NAME", "NOME" or "NOMBRE"
|
92
|
+
@matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
|
93
|
+
|
94
|
+
# the column that contains the surname can be lebeled with "SURNAME", "COGNOME" or "APELLIDOS"
|
95
|
+
@matcher.add_column("cognome", ["SURNAME", "COGNOME", "APELLIDOS"])
|
96
|
+
|
97
|
+
# We suppose the header is ["COGNOME", "NOME", "INDIRIZZO"]
|
98
|
+
@matcher.set_header(header_loaded_from_spreadsheet)
|
99
|
+
|
100
|
+
@matcher.column_of("name") # return 1
|
101
|
+
@matcher.column_of("cognome") # return 0
|
102
|
+
```
|
103
|
+
|
104
|
+
First try is as exact match. If does not work it try with different case:
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
|
108
|
+
@matcher = ColumnsMatcher::Matcher.new
|
109
|
+
|
110
|
+
# the column that contains the name can be lebeled with "NAME", "NOME" or "NOMBRE"
|
111
|
+
@matcher.add_column("name", ["name", "nome", "nombre"])
|
112
|
+
|
113
|
+
# We suppose the header is ["APELLIDO", "NOMBRE", "ADDRESS"]
|
114
|
+
@matcher.set_header(header_loaded_from_spreadsheet)
|
115
|
+
|
116
|
+
@matcher.column_of("name") # return 1
|
117
|
+
```
|
118
|
+
|
119
|
+
If I can't find the column with exact match or different case match i can also use reg exp
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
|
123
|
+
@matcher = ColumnsMatcher::Matcher.new
|
124
|
+
|
125
|
+
# the column that contains the name can be lebeled with "NAME", "NOME" or "NOMBRE"
|
126
|
+
@matcher.add_column("email", ["[Ee]?[\-]*mail[s]*"])
|
127
|
+
|
128
|
+
# We suppose the header is ["Surname", "Name", "Emails"]
|
129
|
+
@matcher.set_header(header_loaded_from_spreadsheet)
|
130
|
+
|
131
|
+
@matcher.column_of("email") # return 2
|
132
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/columns-matcher/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Andrea Mostosi"]
|
6
|
+
gem.email = ["andrea.mostosi@zenkay.net"]
|
7
|
+
gem.description = %q{Given an hash of possibles header label find the correct position of a column in the real header. Useful when you don't know the structure of a spreadsheet.}
|
8
|
+
gem.summary = %q{Column header label matcher}
|
9
|
+
gem.homepage = "https://github.com/zenkay/columns-matcher"
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "columns-matcher"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = ColumnsMatcher::VERSION
|
17
|
+
|
18
|
+
gem.add_development_dependency "rspec", "~> 2.6.0"
|
19
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require "columns-matcher/version"
|
2
|
+
|
3
|
+
module ColumnsMatcher
|
4
|
+
class Matcher
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@header = []
|
8
|
+
@matcher = {}
|
9
|
+
@columns = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def add_column(name, labels)
|
13
|
+
begin
|
14
|
+
if labels.is_a? String
|
15
|
+
@matcher[name] = [labels]
|
16
|
+
elsif labels.is_a? Array
|
17
|
+
@matcher[name] = labels
|
18
|
+
end
|
19
|
+
self.match_columns
|
20
|
+
return true
|
21
|
+
rescue Exception => e
|
22
|
+
return false
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def add_columns(matches)
|
27
|
+
if matches.is_a? Hash
|
28
|
+
matches.each do |name, labels|
|
29
|
+
begin
|
30
|
+
if labels.is_a? String
|
31
|
+
@matcher[name] = [labels]
|
32
|
+
elsif labels.is_a? Array
|
33
|
+
@matcher[name] = labels
|
34
|
+
end
|
35
|
+
rescue Exception => e
|
36
|
+
puts e.backtrace
|
37
|
+
end
|
38
|
+
end
|
39
|
+
self.match_columns
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def set_header(header)
|
44
|
+
begin
|
45
|
+
if header.is_a? String
|
46
|
+
@header = [header]
|
47
|
+
elsif header.is_a? Array
|
48
|
+
@header = header
|
49
|
+
end
|
50
|
+
self.match_columns
|
51
|
+
rescue Exception => e
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def match_columns
|
56
|
+
if not @matcher.empty? and not @header.empty?
|
57
|
+
|
58
|
+
@matcher.each do |name, labels|
|
59
|
+
labels.each do |label|
|
60
|
+
|
61
|
+
# exact match
|
62
|
+
unless @header.index(label).nil?
|
63
|
+
@columns[name] = @header.index(label)
|
64
|
+
break
|
65
|
+
end
|
66
|
+
|
67
|
+
# different case
|
68
|
+
@header.each_with_index do |head, index|
|
69
|
+
if head.downcase == label.downcase
|
70
|
+
@columns[name] = index
|
71
|
+
break
|
72
|
+
end
|
73
|
+
end
|
74
|
+
break unless @columns[name].nil?
|
75
|
+
|
76
|
+
# reg-exp
|
77
|
+
@header.each_with_index do |head, index|
|
78
|
+
unless head.match(/^#{label}$/).nil?
|
79
|
+
@columns[name] = index
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
break unless @columns[name].nil?
|
84
|
+
|
85
|
+
# multiple words search
|
86
|
+
# to-do
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def column_of(name)
|
94
|
+
if @columns[name].nil?
|
95
|
+
return nil
|
96
|
+
else
|
97
|
+
return @columns[name]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe ColumnsMatcher do
|
4
|
+
|
5
|
+
describe "Matcher standard" do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@matcher = ColumnsMatcher::Matcher.new
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should can create the object" do
|
12
|
+
@matcher.should_not be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should recognize single column with a single description" do
|
16
|
+
@matcher.add_column("name", "NOME")
|
17
|
+
@matcher.set_header(["NOME"])
|
18
|
+
@matcher.column_of("name").should be(0)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should recognize single column with multiple descriptions" do
|
22
|
+
@matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
|
23
|
+
@matcher.set_header(["NOME"])
|
24
|
+
@matcher.column_of("name").should be(0)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should recognize single column within different fields" do
|
28
|
+
@matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
|
29
|
+
@matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
|
30
|
+
@matcher.column_of("name").should be(1)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should recognize multiple columns within different fields" do
|
34
|
+
@matcher.add_column("name", ["NAME", "NOME", "NOMBRE"])
|
35
|
+
@matcher.add_column("cognome", ["SURNAME", "COGNOME", "APELIDOS"])
|
36
|
+
@matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
|
37
|
+
@matcher.column_of("name").should be(1)
|
38
|
+
@matcher.column_of("cognome").should be(0)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should recognize multiple columns within different fields with a single definition" do
|
42
|
+
@matcher.add_columns(
|
43
|
+
"name" => ["NAME", "NOME", "NOMBRE"],
|
44
|
+
"cognome" => ["SURNAME", "COGNOME", "APELIDOS"]
|
45
|
+
)
|
46
|
+
@matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
|
47
|
+
@matcher.column_of("name").should be(1)
|
48
|
+
@matcher.column_of("cognome").should be(0)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should recognize single column with different case" do
|
52
|
+
@matcher.add_column("name", ["nome"])
|
53
|
+
@matcher.set_header(["NOME"])
|
54
|
+
@matcher.column_of("name").should be(0)
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should recognize multiple columns within different fields with different case" do
|
58
|
+
@matcher.add_column("name", ["name", "nome", "nombre"])
|
59
|
+
@matcher.add_column("cognome", ["surname", "cognome", "apelidos"])
|
60
|
+
@matcher.set_header(["COGNOME", "NOME", "INDIRIZZO"])
|
61
|
+
@matcher.column_of("name").should be(1)
|
62
|
+
@matcher.column_of("cognome").should be(0)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should recognize single column with a reg exp" do
|
66
|
+
@matcher.add_column("name", ["N[AO]+ME"])
|
67
|
+
@matcher.set_header(["NOME"])
|
68
|
+
@matcher.column_of("name").should be(0)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should not find single column with a reg exp" do
|
72
|
+
@matcher.add_column("name", ["[AO]+ME"])
|
73
|
+
@matcher.set_header(["NOME"])
|
74
|
+
@matcher.column_of("name").should be_nil
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should not find single column with a reg exp" do
|
78
|
+
@matcher.add_column("email", ["[Ee]?[\-]*mail[s]*"])
|
79
|
+
@matcher.set_header(["Emails"])
|
80
|
+
@matcher.column_of("email").should be(0)
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'columns-matcher'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: columns-matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.2
|
4
|
+
version: 0.0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03
|
12
|
+
date: 2012-04-03 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70253209572420 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: 2.6.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70253209572420
|
25
25
|
description: Given an hash of possibles header label find the correct position of
|
26
26
|
a column in the real header. Useful when you don't know the structure of a spreadsheet.
|
27
27
|
email:
|
@@ -29,7 +29,17 @@ email:
|
|
29
29
|
executables: []
|
30
30
|
extensions: []
|
31
31
|
extra_rdoc_files: []
|
32
|
-
files:
|
32
|
+
files:
|
33
|
+
- .gitignore
|
34
|
+
- .rspec
|
35
|
+
- Gemfile
|
36
|
+
- README.md
|
37
|
+
- Rakefile
|
38
|
+
- columns-matcher.gemspec
|
39
|
+
- lib/columns-matcher.rb
|
40
|
+
- lib/columns-matcher/version.rb
|
41
|
+
- spec/columns-matcher_spec.rb
|
42
|
+
- spec/spec_helper.rb
|
33
43
|
homepage: https://github.com/zenkay/columns-matcher
|
34
44
|
licenses: []
|
35
45
|
post_install_message:
|
@@ -54,4 +64,6 @@ rubygems_version: 1.8.6
|
|
54
64
|
signing_key:
|
55
65
|
specification_version: 3
|
56
66
|
summary: Column header label matcher
|
57
|
-
test_files:
|
67
|
+
test_files:
|
68
|
+
- spec/columns-matcher_spec.rb
|
69
|
+
- spec/spec_helper.rb
|