namesplit 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ # Pour les cas de Split les plus simples.
2
+ #
3
+ module Namesplit
4
+ private
5
+
6
+ # Private : Dans le cas où seuls deux éléments composent le string et qu'aucun
7
+ # nom ou prénom n'est détecté parmi l'un ou l'autre.
8
+ #
9
+ def self.with_space
10
+ words = @full_name.split(" ")
11
+ @result.first_names = words[0]
12
+ words.delete(words[0])
13
+
14
+ @result.last_name = words.join(" ")
15
+ @result.quality = 0.3
16
+ end
17
+
18
+ # Private : En fonction des majuscules ou non, on détecte le nom de la
19
+ # personne. Afin d'éviter les problèmes avec les "-" ou les accents majuscules
20
+ # "À" on effectue quelques pourcentage.
21
+ #
22
+ def self.with_uppercasing
23
+ words = @full_name.split(" ")
24
+ return if uppercase_percentage > 0.95
25
+
26
+ last_name = ""
27
+ last_index = 0
28
+ words.each.with_index do |word, index|
29
+ result = word.gsub(/[^A-Z]/, "").size / word.size.to_f
30
+
31
+ next if result < 0.7
32
+ next if last_name != "" && last_index != index - 1
33
+ last_name << " " + word
34
+ last_index = index
35
+ end
36
+
37
+ return if last_name == ""
38
+ last_name.strip! && last_name.split(" ").each { |w| words.delete(w) }
39
+ @result.last_name = last_name
40
+ @result.first_names = words.join(" ")
41
+ @result.quality = 0.8
42
+ end
43
+
44
+ # Private : Renvoie le pourcentage de lettre dans la phrase en majuscules.
45
+ #
46
+ def self.uppercase_percentage
47
+ name = @full_name.gsub(/[^A-Za-z]/, "")
48
+ name.gsub(/[^A-Z]/, "").size / name.size.to_f
49
+ end
50
+ end
@@ -0,0 +1,49 @@
1
+ # Ajoute la méthode titleize pour revoir certains formats de noms.
2
+ #
3
+ module Namesplit
4
+ # Public : Transforme en titre un string.
5
+ #
6
+ def self.titleize(target)
7
+ # Tous les mots sont traités un à un en découpant la phrase à partir des
8
+ # espaces.
9
+ words = target.to_s.split(" ").map do |word|
10
+ # Les apostrophes internes et les tirets sont gérés correctement
11
+ word.split("-").map! do |sub_word|
12
+ sub_word.split("'").map! do |subsub_word|
13
+ titleize_word(subsub_word)
14
+ end.join("'")
15
+ end.join("-")
16
+ end
17
+
18
+ words.join(" ")
19
+ end
20
+
21
+ private
22
+
23
+ # Private : Transforme un mot en minuscule et ajoute une majuscule à la
24
+ # première lettre.
25
+ #
26
+ # Exemple :
27
+ # "BOÎTE".titleize => "Boîte"
28
+ #
29
+ def self.titleize_word(word)
30
+ accents = { "É" => "é", "È" => "è", "Ê" => "ê", "Ë" => "ë", "À" => "à", "Â" => "â", "Ï" => "ï", "Î" => "î", "Ô" => "ô", "Ù" => "ù", "Û" => "û", "Ü" => "ü", "Ç" => "ç", "Ö" => "ö", "Ÿ" => "ÿ" }
31
+
32
+ final = []
33
+ word.chars.each.with_index do |char, index|
34
+ if index == 0
35
+ final << char.upcase
36
+ else
37
+ new_char = accents.values_at(char).first
38
+
39
+ if new_char.nil?
40
+ final << char.downcase
41
+ else
42
+ final << new_char
43
+ end
44
+ end
45
+ end
46
+
47
+ final.join("")
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ module Namesplit
2
+ VERSION = "0.0.6"
3
+ end
data/lib/namesplit.rb ADDED
@@ -0,0 +1,26 @@
1
+ require "ostruct"
2
+
3
+ require "namesplit/version"
4
+ require "namesplit/simple_split"
5
+ require "namesplit/clean"
6
+ require "namesplit/titleize"
7
+ require "namesplit/first_names"
8
+ require "namesplit/first_names_list"
9
+
10
+ module Namesplit
11
+ # Public : Cette méthode organise progressivement le split du nom et du
12
+ # prénom. Les différentes solutions sont testées une à une jusqu'à ce que
13
+ # l'une d'entre elle fonctionne.
14
+ #
15
+ def self.split(full_name)
16
+ @result = OpenStruct.new(quality: 0)
17
+ return @result if full_name.nil?
18
+ clean(full_name)
19
+
20
+ with_first_names
21
+ with_uppercasing if @result.first_names.nil?
22
+ with_space if @result.first_names.nil?
23
+
24
+ clean_output
25
+ end
26
+ end
data/namesplit.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "namesplit/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "namesplit"
8
+ spec.version = Namesplit::VERSION
9
+ spec.authors = ["Antoine Finkelstein"]
10
+ spec.email = ["antoine.fink@gmail.com"]
11
+ spec.summary = %q{Sépare nom et prénom}
12
+ spec.description = %q{À partir d'un nom complet, Nameplit renvoie le nom et le prénom.}
13
+ spec.homepage = "https://github.com/AntoineFinkelstein/Namesplit"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3"
24
+ spec.add_development_dependency "guard", "~> 2"
25
+ spec.add_development_dependency "guard-rspec", "~> 4"
26
+ spec.add_development_dependency "pry", "~> 0.1"
27
+ end
@@ -0,0 +1,129 @@
1
+ require "spec_helper"
2
+
3
+ describe Namesplit do
4
+ it "returns an empty Hash if no string or empty string is given" do
5
+ expect(Namesplit.split(nil).first_names).to eq(nil)
6
+ expect(Namesplit.split(nil).first_name).to eq(nil)
7
+ expect(Namesplit.split(nil).last_name).to eq(nil)
8
+ end
9
+
10
+ it "detects uppercasing 1/3" do
11
+ input = "Jfbkzebfz OVUIFVEEI"
12
+ expect(Namesplit.split(input).first_names).to eq("Jfbkzebfz")
13
+ expect(Namesplit.split(input).first_name).to eq("Jfbkzebfz")
14
+ expect(Namesplit.split(input).last_name).to eq("Ovuifveei")
15
+ end
16
+
17
+ it "detects uppercasing 2/3" do
18
+ input = "OVUIFVEEI jfbkzebfz"
19
+ expect(Namesplit.split(input).first_names).to eq("Jfbkzebfz")
20
+ expect(Namesplit.split(input).first_name).to eq("Jfbkzebfz")
21
+ expect(Namesplit.split(input).last_name).to eq("Ovuifveei")
22
+ end
23
+
24
+ it "detects uppercasing 3/3" do
25
+ input = "DE OVUIFVEEI jfbkzebfz"
26
+ expect(Namesplit.split(input).first_names).to eq("Jfbkzebfz")
27
+ expect(Namesplit.split(input).first_name).to eq("Jfbkzebfz")
28
+ expect(Namesplit.split(input).last_name).to eq("De Ovuifveei")
29
+ end
30
+
31
+ it "works with the simplest of cases" do
32
+ input = "Albert Einstein"
33
+ expect(Namesplit.split(input).first_names).to eq("Albert")
34
+ expect(Namesplit.split(input).first_name).to eq("Albert")
35
+ expect(Namesplit.split(input).last_name).to eq("Einstein")
36
+ end
37
+
38
+ it "works with the simplest of cases" do
39
+ input = "Albert D'Einstein-Goon"
40
+ expect(Namesplit.split(input).first_names).to eq("Albert")
41
+ expect(Namesplit.split(input).first_name).to eq("Albert")
42
+ expect(Namesplit.split(input).last_name).to eq("D'Einstein-Goon")
43
+ end
44
+
45
+ it "works with the simple case with two words last name" do
46
+ input = "Albert De Einstein"
47
+ expect(Namesplit.split(input).first_names).to eq("Albert")
48
+ expect(Namesplit.split(input).first_name).to eq("Albert")
49
+ expect(Namesplit.split(input).last_name).to eq("De Einstein")
50
+ end
51
+
52
+ it "with mutliple first names 1/2" do
53
+ input = "Albert Thomas Jacques Einstein"
54
+ expect(Namesplit.split(input).first_names).to eq("Albert Thomas Jacques")
55
+ expect(Namesplit.split(input).first_name).to eq("Albert")
56
+ expect(Namesplit.split(input).last_name).to eq("Einstein")
57
+ end
58
+
59
+ it "with mutliple first names 2/2" do
60
+ input = "Einstein Albert Thomas Jacques"
61
+ expect(Namesplit.split(input).first_names).to eq("Albert Thomas Jacques")
62
+ expect(Namesplit.split(input).first_name).to eq("Albert")
63
+ expect(Namesplit.split(input).last_name).to eq("Einstein")
64
+ end
65
+
66
+ it "cleans the punctuation" do
67
+ input = "Albert, Einstein!"
68
+ expect(Namesplit.split(input).first_names).to eq("Albert")
69
+ expect(Namesplit.split(input).first_name).to eq("Albert")
70
+ expect(Namesplit.split(input).last_name).to eq("Einstein")
71
+ end
72
+
73
+ it "cleans the spaces" do
74
+ input = "Albert Einstein "
75
+ expect(Namesplit.split(input).first_names).to eq("Albert")
76
+ expect(Namesplit.split(input).first_name).to eq("Albert")
77
+ expect(Namesplit.split(input).last_name).to eq("Einstein")
78
+ end
79
+
80
+ it "works with real data" do
81
+ input = "BEAUMANOIRE (Pierre-Yves)"
82
+ expect(Namesplit.split(input).first_names).to eq("Pierre-Yves")
83
+ expect(Namesplit.split(input).first_name).to eq("Pierre-Yves")
84
+ expect(Namesplit.split(input).last_name).to eq("Beaumanoire")
85
+ end
86
+
87
+ it "works with real data" do
88
+ input = "Delorme - Thomas, Cécile Jeanne Marie"
89
+ expect(Namesplit.split(input).first_names).to eq("Thomas Cécile Jeanne Marie")
90
+ expect(Namesplit.split(input).first_name).to eq("Thomas")
91
+ expect(Namesplit.split(input).last_name).to eq("Delorme")
92
+ end
93
+
94
+ it "works with real data" do
95
+ input = "BERGES MANON, ANASTASIA, ANTOINETTE"
96
+ expect(Namesplit.split(input).last_name).to eq("Berges")
97
+ expect(Namesplit.split(input).first_names).to eq("Manon Anastasia Antoinette")
98
+ expect(Namesplit.split(input).first_name).to eq("Manon")
99
+ end
100
+
101
+ it "works with real data" do
102
+ input = "LE ROUX Sylvain, Marc, Denis"
103
+ expect(Namesplit.split(input).first_names).to eq("Sylvain Marc Denis")
104
+ expect(Namesplit.split(input).first_name).to eq("Sylvain")
105
+ expect(Namesplit.split(input).last_name).to eq("Le Roux")
106
+ end
107
+
108
+ it "works with real data" do
109
+ input = "FINCK Jean-Eric"
110
+ expect(Namesplit.split(input).first_names).to eq("Jean-Eric")
111
+ expect(Namesplit.split(input).first_name).to eq("Jean-Eric")
112
+ expect(Namesplit.split(input).last_name).to eq("Finck")
113
+ end
114
+
115
+ it "works with real data" do
116
+ input = "DUARTE PEREIRA DA COSTA Nelson Edgar"
117
+ expect(Namesplit.split(input).first_names).to eq("Nelson Edgar")
118
+ expect(Namesplit.split(input).first_name).to eq("Nelson")
119
+ expect(Namesplit.split(input).last_name).to eq("Duarte Pereira Da Costa")
120
+ end
121
+
122
+ it "works with real data" do
123
+ input = "ORTIZ-GOBO Didier Florent"
124
+ expect(Namesplit.split(input).first_names).to eq("Didier Florent")
125
+ expect(Namesplit.split(input).first_name).to eq("Didier")
126
+ expect(Namesplit.split(input).last_name).to eq("Ortiz-Gobo")
127
+ expect(Namesplit.split(input).full_name).to eq("Ortiz-Gobo Didier Florent")
128
+ end
129
+ end
@@ -0,0 +1,2 @@
1
+ require "pry"
2
+ require "Namesplit"
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: namesplit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.6
5
+ platform: ruby
6
+ authors:
7
+ - Antoine Finkelstein
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '4'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '4'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.1'
97
+ description: "À partir d'un nom complet, Nameplit renvoie le nom et le prénom."
98
+ email:
99
+ - antoine.fink@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - Gemfile
106
+ - Guardfile
107
+ - LICENSE.txt
108
+ - README.md
109
+ - Rakefile
110
+ - lib/namesplit.rb
111
+ - lib/namesplit/clean.rb
112
+ - lib/namesplit/first_names.rb
113
+ - lib/namesplit/first_names_list.rb
114
+ - lib/namesplit/simple_split.rb
115
+ - lib/namesplit/titleize.rb
116
+ - lib/namesplit/version.rb
117
+ - namesplit.gemspec
118
+ - spec/namesplit_spec.rb
119
+ - spec/spec_helper.rb
120
+ homepage: https://github.com/AntoineFinkelstein/Namesplit
121
+ licenses:
122
+ - MIT
123
+ metadata: {}
124
+ post_install_message:
125
+ rdoc_options: []
126
+ require_paths:
127
+ - lib
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ requirements: []
139
+ rubyforge_project:
140
+ rubygems_version: 2.4.1
141
+ signing_key:
142
+ specification_version: 4
143
+ summary: Sépare nom et prénom
144
+ test_files:
145
+ - spec/namesplit_spec.rb
146
+ - spec/spec_helper.rb
147
+ has_rdoc: