ldap_disambiguate 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/.rubocop_todo.yml +27 -6
- data/Gemfile +1 -0
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/ldap_disambiguate.gemspec +1 -0
- data/lib/ldap_disambiguate.rb +1 -0
- data/lib/ldap_disambiguate/base.rb +2 -1
- data/lib/ldap_disambiguate/email.rb +2 -1
- data/lib/ldap_disambiguate/ldap_user.rb +12 -5
- data/lib/ldap_disambiguate/multiple_user_error.rb +1 -0
- data/lib/ldap_disambiguate/name.rb +69 -50
- data/lib/ldap_disambiguate/version.rb +2 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b9a7376706b9375a3411d78c6b213126df03c4e
|
4
|
+
data.tar.gz: 62d7444cff4ff36b8be3dc8c85ee2b4c14ef3596
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54cea172b3246bdb8ecc4bf28e43f39fb56d4a5a19100863bb0fb48dd87bb360be67dc0c9cf5352ad324ddfc8ff5acfff0971f997a72e3fcac07d39309c88dd2
|
7
|
+
data.tar.gz: 8a6b99996fd35383184c8f1aaa2c5892798cdb0c700ad3c2f9b117b3135409646be22e957cd079b4ee01269016fd0523680e5eef55230a36c39e4336b952331b
|
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
@@ -1,19 +1,40 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on
|
3
|
+
# on 2017-09-14 10:19:42 -0400 using RuboCop version 0.49.1.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
+
# Offense count: 5
|
10
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
11
|
+
Metrics/BlockLength:
|
12
|
+
Max: 170
|
13
|
+
|
9
14
|
# Offense count: 1
|
10
15
|
# Configuration parameters: CountComments.
|
11
|
-
Metrics/
|
12
|
-
Max:
|
16
|
+
Metrics/MethodLength:
|
17
|
+
Max: 11
|
13
18
|
|
14
19
|
# Offense count: 1
|
15
|
-
|
20
|
+
# Configuration parameters: ExpectMatchingDefinition, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
21
|
+
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
22
|
+
Style/FileName:
|
23
|
+
Exclude:
|
24
|
+
- 'Gemfile'
|
25
|
+
|
26
|
+
# Offense count: 4
|
27
|
+
# Cop supports --auto-correct.
|
28
|
+
# Configuration parameters: AutoCorrect, EnforcedStyle, SupportedStyles.
|
29
|
+
# SupportedStyles: predicate, comparison
|
30
|
+
Style/NumericPredicate:
|
16
31
|
Exclude:
|
17
32
|
- 'spec/**/*'
|
18
|
-
- '
|
19
|
-
- 'lib/ldap_disambiguate.rb'
|
33
|
+
- 'lib/ldap_disambiguate/ldap_user.rb'
|
34
|
+
- 'lib/ldap_disambiguate/name.rb'
|
35
|
+
|
36
|
+
# Offense count: 1
|
37
|
+
# Cop supports --auto-correct.
|
38
|
+
Style/RedundantFreeze:
|
39
|
+
Exclude:
|
40
|
+
- 'lib/ldap_disambiguate/version.rb'
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
# Add your own tasks in files placed in lib/tasks ending in .rake,
|
3
4
|
# for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
|
4
5
|
begin
|
@@ -22,6 +23,6 @@ RuboCop::RakeTask.new(:rubocop) do |task|
|
|
22
23
|
end
|
23
24
|
|
24
25
|
desc "Run continuous integration tests"
|
25
|
-
task ci: [
|
26
|
+
task ci: %i[rubocop spec]
|
26
27
|
|
27
28
|
task default: :ci
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
data/ldap_disambiguate.gemspec
CHANGED
data/lib/ldap_disambiguate.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module LdapDisambiguate
|
3
4
|
# This class allows you to use LDAP to disambiguate a text name
|
4
5
|
#
|
@@ -12,7 +13,7 @@ module LdapDisambiguate
|
|
12
13
|
end
|
13
14
|
|
14
15
|
def ldap_attrs
|
15
|
-
[
|
16
|
+
%i[uid givenname sn mail eduPersonPrimaryAffiliation displayname]
|
16
17
|
end
|
17
18
|
end
|
18
19
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module LdapDisambiguate
|
3
4
|
# This class allows you to use LDAP to extract user information
|
4
5
|
# from an email or list of emails
|
@@ -14,7 +15,7 @@ module LdapDisambiguate
|
|
14
15
|
|
15
16
|
def email_in_name(email_list)
|
16
17
|
parts = email_list.split(' ')
|
17
|
-
emails = parts.
|
18
|
+
emails = parts.select { |part| part.include?('@') }
|
18
19
|
results = []
|
19
20
|
Array(emails).each do |email_str|
|
20
21
|
email = Mail::Address.new(email_str)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module LdapDisambiguate
|
3
4
|
# This class provides an api for quering LDAP with different portions of the user's
|
4
5
|
# information (name parts or id)
|
@@ -17,7 +18,7 @@ module LdapDisambiguate
|
|
17
18
|
end
|
18
19
|
|
19
20
|
def query_ldap_by_name(given_name, surname, attrs = [])
|
20
|
-
return if given_name.blank? # this method only work if we have a first name to play with
|
21
|
+
return [] if given_name.blank? # this method only work if we have a first name to play with
|
21
22
|
|
22
23
|
first_names = given_name.split(/[\s.]+/)
|
23
24
|
users = []
|
@@ -64,7 +65,7 @@ module LdapDisambiguate
|
|
64
65
|
|
65
66
|
def get_user_by_partial_id(id)
|
66
67
|
filter = Net::LDAP::Filter.construct("(& (uid=#{id}* ) #{person_filter})")
|
67
|
-
get_ldap_response(filter, %w
|
68
|
+
get_ldap_response(filter, %w[uid displayname])
|
68
69
|
end
|
69
70
|
|
70
71
|
def get_ldap_response(filter, attributes)
|
@@ -103,14 +104,20 @@ module LdapDisambiguate
|
|
103
104
|
|
104
105
|
def name_filters(first_name, middle_name, surname)
|
105
106
|
filters = []
|
106
|
-
|
107
|
+
if middle_name.blank?
|
108
|
+
filters << "(givenname=#{first_name}) (sn=#{surname})"
|
109
|
+
filters << "(givenname=#{first_name}*) (sn=#{surname})"
|
110
|
+
else
|
111
|
+
filters << "(givenname=#{first_name}*) (givenname=* #{middle_name}*) (sn=#{surname})"
|
112
|
+
middle_initial = middle_name[0]
|
113
|
+
filters << "(givenname=#{first_name}* #{middle_initial}*) (sn=#{surname})"
|
114
|
+
end
|
107
115
|
filters << "(givenname=#{first_name}) (sn=#{surname})"
|
108
|
-
filters << "(givenname=#{first_name}*) (sn=#{surname})"
|
109
116
|
filters
|
110
117
|
end
|
111
118
|
|
112
119
|
def default_attributes
|
113
|
-
[
|
120
|
+
%i[uid givenname sn mail eduPersonPrimaryAffiliation displayname]
|
114
121
|
end
|
115
122
|
|
116
123
|
def cache
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module LdapDisambiguate
|
3
4
|
# This class allows you to use LDAP to disambiguate a text name
|
4
5
|
#
|
@@ -8,55 +9,46 @@ module LdapDisambiguate
|
|
8
9
|
return if name.blank?
|
9
10
|
results = ldap_attributes_for_id(name) # text includes login id
|
10
11
|
results ||= Email.disambiguate(name) # text includes email(s)
|
11
|
-
results ||=
|
12
|
+
results ||= lookup_text_only_names(name) # straight text we need to query ldap
|
12
13
|
results
|
13
14
|
end
|
14
15
|
|
16
|
+
def clear_cache
|
17
|
+
@email_for_name_cache = {}
|
18
|
+
end
|
19
|
+
|
15
20
|
private
|
16
21
|
|
17
|
-
def
|
18
|
-
results =
|
19
|
-
|
20
|
-
result = text_only_name(n)
|
21
|
-
results << result unless result.blank?
|
22
|
+
def lookup_text_only_names(multi_name)
|
23
|
+
results = separate_names(multi_name).map do |name|
|
24
|
+
lookup_name(clean_name(name))
|
22
25
|
end
|
23
|
-
results
|
26
|
+
results.reject(&:blank?)
|
24
27
|
end
|
25
28
|
|
26
|
-
def
|
27
|
-
name = clean_name(name)
|
29
|
+
def lookup_name(name)
|
28
30
|
query_result = email_for_name(name)
|
29
31
|
query_result ||= title_after_name(name) # try again without the titles
|
30
|
-
|
32
|
+
query_result
|
31
33
|
rescue MultipleUserError
|
32
34
|
return nil
|
33
35
|
end
|
34
36
|
|
35
|
-
# titles after the name that namae had trouble parsing
|
36
|
-
def title_after_name(text_name)
|
37
|
-
result = nil
|
38
|
-
if text_name.count(',') > 0
|
39
|
-
new_name = text_name.split(',')[0]
|
40
|
-
result = email_for_name(new_name) if new_name.count(' ') > 0
|
41
|
-
end
|
42
|
-
result
|
43
|
-
end
|
44
|
-
|
45
37
|
def email_for_name(text_name)
|
46
|
-
return '' if text_name.blank?
|
47
|
-
return email_for_name_cache[text_name]
|
38
|
+
return '' if text_name.blank? || word_count(text_name) < 2
|
39
|
+
return email_for_name_cache[text_name] if email_for_name_cache.key?(text_name)
|
48
40
|
|
49
|
-
email_for_name_cache[text_name] =
|
50
|
-
email_for_name_cache[text_name]
|
41
|
+
email_for_name_cache[text_name] = translate_name_to_email(text_name)
|
51
42
|
end
|
52
43
|
|
53
|
-
|
54
|
-
|
44
|
+
# titles after the name that namae had trouble parsing
|
45
|
+
def title_after_name(text_name)
|
46
|
+
email_for_name(remove_titles(text_name))
|
55
47
|
end
|
56
48
|
|
57
|
-
def
|
58
|
-
text_name.gsub!(/[^\w\s,']/, ' ')
|
49
|
+
def translate_name_to_email(text_name)
|
59
50
|
parsed = Namae::Name.parse(text_name)
|
51
|
+
|
60
52
|
result = try_name(parsed.given, parsed.family)
|
61
53
|
result ||= title_before_name(parsed)
|
62
54
|
result ||= two_words_in_last_name(text_name)
|
@@ -65,42 +57,69 @@ module LdapDisambiguate
|
|
65
57
|
|
66
58
|
def try_name(given, family)
|
67
59
|
return nil if family.blank?
|
60
|
+
|
68
61
|
possible_users = LdapUser.query_ldap_by_name(given, family, ldap_attrs)
|
69
|
-
return nil if possible_users.blank? || possible_users.count == 0
|
70
62
|
raise(MultipleUserError, "too name results for #{given} #{family}") if possible_users.count > 1
|
71
63
|
possible_users.first
|
72
64
|
end
|
73
65
|
|
66
|
+
def title_before_name(parsed)
|
67
|
+
return unless parsed.given && multi_word?(parsed.given)
|
68
|
+
|
69
|
+
parts = name_parts(parsed.given, 1)
|
70
|
+
return if only_initial?(parts[:family])
|
71
|
+
|
72
|
+
try_name(parts[:family], parsed.family)
|
73
|
+
end
|
74
|
+
|
75
|
+
def two_words_in_last_name(text_name)
|
76
|
+
return unless word_count(text_name) > 2
|
77
|
+
|
78
|
+
parts = name_parts(text_name, 2)
|
79
|
+
try_name(parts[:given], parts[:family])
|
80
|
+
end
|
81
|
+
|
74
82
|
def name_parts(text_name, last_name_count)
|
75
|
-
|
83
|
+
return if word_count(text_name) < (last_name_count + 1)
|
84
|
+
|
85
|
+
parts = split_name_parts(text_name)
|
76
86
|
first_name_count = parts.count - last_name_count
|
77
|
-
return nil if first_name_count < 1
|
78
87
|
{ given: parts.first(first_name_count).join(' '), family: parts.last(last_name_count).join(' ') }
|
79
88
|
end
|
80
89
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
if parsed.given && parsed.given.count(' ') >= 1
|
86
|
-
parts = name_parts(parsed.given, 1)
|
87
|
-
result = try_name(parts[:family], parsed.family)
|
88
|
-
end
|
89
|
-
result
|
90
|
+
def clean_name(name)
|
91
|
+
name.gsub(/\([^)]*\)/, '')
|
92
|
+
.gsub(/[^\w\s,']/, ' ')
|
93
|
+
.strip.squeeze(' ')
|
90
94
|
end
|
91
95
|
|
92
|
-
def
|
93
|
-
|
94
|
-
if text_name.strip.count(' ') > 2
|
95
|
-
parts = name_parts(text_name, 2)
|
96
|
-
result = try_name(parts[:given], parts[:family])
|
97
|
-
end
|
98
|
-
result
|
96
|
+
def multi_word?(name)
|
97
|
+
word_count(name) > 1
|
99
98
|
end
|
100
99
|
|
101
|
-
def
|
102
|
-
name.
|
100
|
+
def word_count(name)
|
101
|
+
name.squeeze(' ').count(' ') + 1
|
103
102
|
end
|
104
|
-
|
103
|
+
|
104
|
+
def split_name_parts(name)
|
105
|
+
name.split(' ')
|
106
|
+
end
|
107
|
+
|
108
|
+
def only_initial?(name)
|
109
|
+
name.size <= 1
|
110
|
+
end
|
111
|
+
|
112
|
+
def separate_names(multi_name)
|
113
|
+
multi_name.split(/ and |;/)
|
114
|
+
end
|
115
|
+
|
116
|
+
def remove_titles(name)
|
117
|
+
name.split(',')[0]
|
118
|
+
end
|
119
|
+
|
120
|
+
def email_for_name_cache
|
121
|
+
@email_for_name_cache ||= {}
|
122
|
+
end
|
123
|
+
end
|
105
124
|
end
|
106
125
|
end
|