ldap_disambiguate 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/.rubocop_todo.yml +27 -6
- data/Gemfile +1 -0
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/ldap_disambiguate.gemspec +1 -0
- data/lib/ldap_disambiguate.rb +1 -0
- data/lib/ldap_disambiguate/base.rb +2 -1
- data/lib/ldap_disambiguate/email.rb +2 -1
- data/lib/ldap_disambiguate/ldap_user.rb +12 -5
- data/lib/ldap_disambiguate/multiple_user_error.rb +1 -0
- data/lib/ldap_disambiguate/name.rb +69 -50
- data/lib/ldap_disambiguate/version.rb +2 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b9a7376706b9375a3411d78c6b213126df03c4e
|
4
|
+
data.tar.gz: 62d7444cff4ff36b8be3dc8c85ee2b4c14ef3596
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54cea172b3246bdb8ecc4bf28e43f39fb56d4a5a19100863bb0fb48dd87bb360be67dc0c9cf5352ad324ddfc8ff5acfff0971f997a72e3fcac07d39309c88dd2
|
7
|
+
data.tar.gz: 8a6b99996fd35383184c8f1aaa2c5892798cdb0c700ad3c2f9b117b3135409646be22e957cd079b4ee01269016fd0523680e5eef55230a36c39e4336b952331b
|
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
@@ -1,19 +1,40 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on
|
3
|
+
# on 2017-09-14 10:19:42 -0400 using RuboCop version 0.49.1.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
+
# Offense count: 5
|
10
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
11
|
+
Metrics/BlockLength:
|
12
|
+
Max: 170
|
13
|
+
|
9
14
|
# Offense count: 1
|
10
15
|
# Configuration parameters: CountComments.
|
11
|
-
Metrics/
|
12
|
-
Max:
|
16
|
+
Metrics/MethodLength:
|
17
|
+
Max: 11
|
13
18
|
|
14
19
|
# Offense count: 1
|
15
|
-
|
20
|
+
# Configuration parameters: ExpectMatchingDefinition, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
21
|
+
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
22
|
+
Style/FileName:
|
23
|
+
Exclude:
|
24
|
+
- 'Gemfile'
|
25
|
+
|
26
|
+
# Offense count: 4
|
27
|
+
# Cop supports --auto-correct.
|
28
|
+
# Configuration parameters: AutoCorrect, EnforcedStyle, SupportedStyles.
|
29
|
+
# SupportedStyles: predicate, comparison
|
30
|
+
Style/NumericPredicate:
|
16
31
|
Exclude:
|
17
32
|
- 'spec/**/*'
|
18
|
-
- '
|
19
|
-
- 'lib/ldap_disambiguate.rb'
|
33
|
+
- 'lib/ldap_disambiguate/ldap_user.rb'
|
34
|
+
- 'lib/ldap_disambiguate/name.rb'
|
35
|
+
|
36
|
+
# Offense count: 1
|
37
|
+
# Cop supports --auto-correct.
|
38
|
+
Style/RedundantFreeze:
|
39
|
+
Exclude:
|
40
|
+
- 'lib/ldap_disambiguate/version.rb'
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
# Add your own tasks in files placed in lib/tasks ending in .rake,
|
3
4
|
# for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
|
4
5
|
begin
|
@@ -22,6 +23,6 @@ RuboCop::RakeTask.new(:rubocop) do |task|
|
|
22
23
|
end
|
23
24
|
|
24
25
|
desc "Run continuous integration tests"
|
25
|
-
task ci: [
|
26
|
+
task ci: %i[rubocop spec]
|
26
27
|
|
27
28
|
task default: :ci
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
data/ldap_disambiguate.gemspec
CHANGED
data/lib/ldap_disambiguate.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module LdapDisambiguate
|
3
4
|
# This class allows you to use LDAP to disambiguate a text name
|
4
5
|
#
|
@@ -12,7 +13,7 @@ module LdapDisambiguate
|
|
12
13
|
end
|
13
14
|
|
14
15
|
def ldap_attrs
|
15
|
-
[
|
16
|
+
%i[uid givenname sn mail eduPersonPrimaryAffiliation displayname]
|
16
17
|
end
|
17
18
|
end
|
18
19
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module LdapDisambiguate
|
3
4
|
# This class allows you to use LDAP to extract user information
|
4
5
|
# from an email or list of emails
|
@@ -14,7 +15,7 @@ module LdapDisambiguate
|
|
14
15
|
|
15
16
|
def email_in_name(email_list)
|
16
17
|
parts = email_list.split(' ')
|
17
|
-
emails = parts.
|
18
|
+
emails = parts.select { |part| part.include?('@') }
|
18
19
|
results = []
|
19
20
|
Array(emails).each do |email_str|
|
20
21
|
email = Mail::Address.new(email_str)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module LdapDisambiguate
|
3
4
|
# This class provides an api for quering LDAP with different portions of the user's
|
4
5
|
# information (name parts or id)
|
@@ -17,7 +18,7 @@ module LdapDisambiguate
|
|
17
18
|
end
|
18
19
|
|
19
20
|
def query_ldap_by_name(given_name, surname, attrs = [])
|
20
|
-
return if given_name.blank? # this method only work if we have a first name to play with
|
21
|
+
return [] if given_name.blank? # this method only work if we have a first name to play with
|
21
22
|
|
22
23
|
first_names = given_name.split(/[\s.]+/)
|
23
24
|
users = []
|
@@ -64,7 +65,7 @@ module LdapDisambiguate
|
|
64
65
|
|
65
66
|
def get_user_by_partial_id(id)
|
66
67
|
filter = Net::LDAP::Filter.construct("(& (uid=#{id}* ) #{person_filter})")
|
67
|
-
get_ldap_response(filter, %w
|
68
|
+
get_ldap_response(filter, %w[uid displayname])
|
68
69
|
end
|
69
70
|
|
70
71
|
def get_ldap_response(filter, attributes)
|
@@ -103,14 +104,20 @@ module LdapDisambiguate
|
|
103
104
|
|
104
105
|
def name_filters(first_name, middle_name, surname)
|
105
106
|
filters = []
|
106
|
-
|
107
|
+
if middle_name.blank?
|
108
|
+
filters << "(givenname=#{first_name}) (sn=#{surname})"
|
109
|
+
filters << "(givenname=#{first_name}*) (sn=#{surname})"
|
110
|
+
else
|
111
|
+
filters << "(givenname=#{first_name}*) (givenname=* #{middle_name}*) (sn=#{surname})"
|
112
|
+
middle_initial = middle_name[0]
|
113
|
+
filters << "(givenname=#{first_name}* #{middle_initial}*) (sn=#{surname})"
|
114
|
+
end
|
107
115
|
filters << "(givenname=#{first_name}) (sn=#{surname})"
|
108
|
-
filters << "(givenname=#{first_name}*) (sn=#{surname})"
|
109
116
|
filters
|
110
117
|
end
|
111
118
|
|
112
119
|
def default_attributes
|
113
|
-
[
|
120
|
+
%i[uid givenname sn mail eduPersonPrimaryAffiliation displayname]
|
114
121
|
end
|
115
122
|
|
116
123
|
def cache
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module LdapDisambiguate
|
3
4
|
# This class allows you to use LDAP to disambiguate a text name
|
4
5
|
#
|
@@ -8,55 +9,46 @@ module LdapDisambiguate
|
|
8
9
|
return if name.blank?
|
9
10
|
results = ldap_attributes_for_id(name) # text includes login id
|
10
11
|
results ||= Email.disambiguate(name) # text includes email(s)
|
11
|
-
results ||=
|
12
|
+
results ||= lookup_text_only_names(name) # straight text we need to query ldap
|
12
13
|
results
|
13
14
|
end
|
14
15
|
|
16
|
+
def clear_cache
|
17
|
+
@email_for_name_cache = {}
|
18
|
+
end
|
19
|
+
|
15
20
|
private
|
16
21
|
|
17
|
-
def
|
18
|
-
results =
|
19
|
-
|
20
|
-
result = text_only_name(n)
|
21
|
-
results << result unless result.blank?
|
22
|
+
def lookup_text_only_names(multi_name)
|
23
|
+
results = separate_names(multi_name).map do |name|
|
24
|
+
lookup_name(clean_name(name))
|
22
25
|
end
|
23
|
-
results
|
26
|
+
results.reject(&:blank?)
|
24
27
|
end
|
25
28
|
|
26
|
-
def
|
27
|
-
name = clean_name(name)
|
29
|
+
def lookup_name(name)
|
28
30
|
query_result = email_for_name(name)
|
29
31
|
query_result ||= title_after_name(name) # try again without the titles
|
30
|
-
|
32
|
+
query_result
|
31
33
|
rescue MultipleUserError
|
32
34
|
return nil
|
33
35
|
end
|
34
36
|
|
35
|
-
# titles after the name that namae had trouble parsing
|
36
|
-
def title_after_name(text_name)
|
37
|
-
result = nil
|
38
|
-
if text_name.count(',') > 0
|
39
|
-
new_name = text_name.split(',')[0]
|
40
|
-
result = email_for_name(new_name) if new_name.count(' ') > 0
|
41
|
-
end
|
42
|
-
result
|
43
|
-
end
|
44
|
-
|
45
37
|
def email_for_name(text_name)
|
46
|
-
return '' if text_name.blank?
|
47
|
-
return email_for_name_cache[text_name]
|
38
|
+
return '' if text_name.blank? || word_count(text_name) < 2
|
39
|
+
return email_for_name_cache[text_name] if email_for_name_cache.key?(text_name)
|
48
40
|
|
49
|
-
email_for_name_cache[text_name] =
|
50
|
-
email_for_name_cache[text_name]
|
41
|
+
email_for_name_cache[text_name] = translate_name_to_email(text_name)
|
51
42
|
end
|
52
43
|
|
53
|
-
|
54
|
-
|
44
|
+
# titles after the name that namae had trouble parsing
|
45
|
+
def title_after_name(text_name)
|
46
|
+
email_for_name(remove_titles(text_name))
|
55
47
|
end
|
56
48
|
|
57
|
-
def
|
58
|
-
text_name.gsub!(/[^\w\s,']/, ' ')
|
49
|
+
def translate_name_to_email(text_name)
|
59
50
|
parsed = Namae::Name.parse(text_name)
|
51
|
+
|
60
52
|
result = try_name(parsed.given, parsed.family)
|
61
53
|
result ||= title_before_name(parsed)
|
62
54
|
result ||= two_words_in_last_name(text_name)
|
@@ -65,42 +57,69 @@ module LdapDisambiguate
|
|
65
57
|
|
66
58
|
def try_name(given, family)
|
67
59
|
return nil if family.blank?
|
60
|
+
|
68
61
|
possible_users = LdapUser.query_ldap_by_name(given, family, ldap_attrs)
|
69
|
-
return nil if possible_users.blank? || possible_users.count == 0
|
70
62
|
raise(MultipleUserError, "too name results for #{given} #{family}") if possible_users.count > 1
|
71
63
|
possible_users.first
|
72
64
|
end
|
73
65
|
|
66
|
+
def title_before_name(parsed)
|
67
|
+
return unless parsed.given && multi_word?(parsed.given)
|
68
|
+
|
69
|
+
parts = name_parts(parsed.given, 1)
|
70
|
+
return if only_initial?(parts[:family])
|
71
|
+
|
72
|
+
try_name(parts[:family], parsed.family)
|
73
|
+
end
|
74
|
+
|
75
|
+
def two_words_in_last_name(text_name)
|
76
|
+
return unless word_count(text_name) > 2
|
77
|
+
|
78
|
+
parts = name_parts(text_name, 2)
|
79
|
+
try_name(parts[:given], parts[:family])
|
80
|
+
end
|
81
|
+
|
74
82
|
def name_parts(text_name, last_name_count)
|
75
|
-
|
83
|
+
return if word_count(text_name) < (last_name_count + 1)
|
84
|
+
|
85
|
+
parts = split_name_parts(text_name)
|
76
86
|
first_name_count = parts.count - last_name_count
|
77
|
-
return nil if first_name_count < 1
|
78
87
|
{ given: parts.first(first_name_count).join(' '), family: parts.last(last_name_count).join(' ') }
|
79
88
|
end
|
80
89
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
if parsed.given && parsed.given.count(' ') >= 1
|
86
|
-
parts = name_parts(parsed.given, 1)
|
87
|
-
result = try_name(parts[:family], parsed.family)
|
88
|
-
end
|
89
|
-
result
|
90
|
+
def clean_name(name)
|
91
|
+
name.gsub(/\([^)]*\)/, '')
|
92
|
+
.gsub(/[^\w\s,']/, ' ')
|
93
|
+
.strip.squeeze(' ')
|
90
94
|
end
|
91
95
|
|
92
|
-
def
|
93
|
-
|
94
|
-
if text_name.strip.count(' ') > 2
|
95
|
-
parts = name_parts(text_name, 2)
|
96
|
-
result = try_name(parts[:given], parts[:family])
|
97
|
-
end
|
98
|
-
result
|
96
|
+
def multi_word?(name)
|
97
|
+
word_count(name) > 1
|
99
98
|
end
|
100
99
|
|
101
|
-
def
|
102
|
-
name.
|
100
|
+
def word_count(name)
|
101
|
+
name.squeeze(' ').count(' ') + 1
|
103
102
|
end
|
104
|
-
|
103
|
+
|
104
|
+
def split_name_parts(name)
|
105
|
+
name.split(' ')
|
106
|
+
end
|
107
|
+
|
108
|
+
def only_initial?(name)
|
109
|
+
name.size <= 1
|
110
|
+
end
|
111
|
+
|
112
|
+
def separate_names(multi_name)
|
113
|
+
multi_name.split(/ and |;/)
|
114
|
+
end
|
115
|
+
|
116
|
+
def remove_titles(name)
|
117
|
+
name.split(',')[0]
|
118
|
+
end
|
119
|
+
|
120
|
+
def email_for_name_cache
|
121
|
+
@email_for_name_cache ||= {}
|
122
|
+
end
|
123
|
+
end
|
105
124
|
end
|
106
125
|
end
|