noble_names 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +8 -1
- data/data/business_particles.yml +50 -0
- data/data/{particles.yml → nobility_particles.yml} +10 -1
- data/data/{prefixes.yml → nobility_prefixes.yml} +1 -1
- data/lib/noble_names.rb +35 -21
- data/lib/noble_names/config.rb +1 -1
- data/lib/noble_names/core_ext/string.rb +5 -1
- data/lib/noble_names/data.rb +8 -18
- data/lib/noble_names/match_index.rb +94 -0
- data/lib/noble_names/version.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6efdc8370018eb99c8e94a21abb59f7ea0d6b99a
|
4
|
+
data.tar.gz: 267a58e52253d8caab93db519568e16331365731
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 486061cc648a081981a0411443bcfc03ba9bbc7e2a81def3ecb079f2590409d8e2aba36e73688e1552678f1ee096d007730c876dc65ae1603fb0d2b8993043ac
|
7
|
+
data.tar.gz: b19cbeed4756a0ce0232427d5c3aecd8b585fd2dd45f85430527fd6a47b31ba3a8444204cbc784797d41a9d10c82c6559575cb7e27a0c1cbb462d827bef3ca75
|
data/README.md
CHANGED
@@ -39,7 +39,7 @@ my_string #=> "Joseph von und zu Reinbeck"
|
|
39
39
|
```
|
40
40
|
|
41
41
|
### Languages
|
42
|
-
So far English, German, French, Spanish and Portuguese are supported.
|
42
|
+
So far English, German, French, Spanish, Dutch and Portuguese are supported.
|
43
43
|
By default all available languages are used. If you want to configure which one
|
44
44
|
to use for your application, you can do it like so:
|
45
45
|
```ruby
|
@@ -52,6 +52,13 @@ This way other language particles will be ignored.
|
|
52
52
|
"joseph von und zu reinbeck".to_title #=> "Joseph Von Und Zu Reinbeck"
|
53
53
|
```
|
54
54
|
|
55
|
+
### Business Names
|
56
|
+
The current stance on business names is to only correct the business particle
|
57
|
+
if need be and to leave the rest of the name alone.
|
58
|
+
```ruby
|
59
|
+
"bauhelm co. kg".to_title #=> "bauhelm Co. KG"
|
60
|
+
```
|
61
|
+
|
55
62
|
## Development
|
56
63
|
|
57
64
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -0,0 +1,50 @@
|
|
1
|
+
business_particles:
|
2
|
+
german:
|
3
|
+
- OHG
|
4
|
+
- KG
|
5
|
+
- GmbH
|
6
|
+
- Co.
|
7
|
+
- GbR
|
8
|
+
- AG
|
9
|
+
- PartG
|
10
|
+
- PartGmbBH
|
11
|
+
- e.V.
|
12
|
+
- r.V.
|
13
|
+
- KGaA
|
14
|
+
english:
|
15
|
+
- Ltd.
|
16
|
+
- CIC
|
17
|
+
- CIO
|
18
|
+
- LLP
|
19
|
+
- LP
|
20
|
+
- plc
|
21
|
+
- Ccc
|
22
|
+
- LLLP
|
23
|
+
- LP
|
24
|
+
- LLC
|
25
|
+
- LC
|
26
|
+
- PLLC
|
27
|
+
- Corp.
|
28
|
+
- Inc.
|
29
|
+
dutch:
|
30
|
+
- Mts
|
31
|
+
- vof
|
32
|
+
- cv
|
33
|
+
- bv
|
34
|
+
- nv
|
35
|
+
french:
|
36
|
+
- EI
|
37
|
+
- EURL
|
38
|
+
- SASU
|
39
|
+
- FCP
|
40
|
+
- SICAV
|
41
|
+
- GIE
|
42
|
+
- SEP
|
43
|
+
- SNC
|
44
|
+
- SCS
|
45
|
+
- SCA
|
46
|
+
- SARL
|
47
|
+
- SA
|
48
|
+
- SCOP
|
49
|
+
- SEM
|
50
|
+
- SAS
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
nobility_particles:
|
2
2
|
german:
|
3
3
|
- 'von'
|
4
4
|
- 'und'
|
@@ -6,8 +6,13 @@ particles:
|
|
6
6
|
- 'v.'
|
7
7
|
- 'vom'
|
8
8
|
- 'zum'
|
9
|
+
- 'zur'
|
9
10
|
- 'der'
|
10
11
|
- 'dem'
|
12
|
+
- 'den'
|
13
|
+
- 'in'
|
14
|
+
- 'v.d.'
|
15
|
+
- 'geb.'
|
11
16
|
english:
|
12
17
|
- 'of'
|
13
18
|
- 'de'
|
@@ -22,5 +27,9 @@ particles:
|
|
22
27
|
- 'de'
|
23
28
|
- 'da'
|
24
29
|
- 'du'
|
30
|
+
dutch:
|
31
|
+
- 'van'
|
32
|
+
- 'de'
|
33
|
+
- 'ter'
|
25
34
|
|
26
35
|
|
data/lib/noble_names.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# Coding: UTF-8
|
2
|
+
require 'noble_names/match_index'
|
2
3
|
require 'noble_names/version'
|
3
4
|
require 'noble_names/config'
|
4
5
|
require 'noble_names/initializer'
|
@@ -11,8 +12,8 @@ module NobleNames
|
|
11
12
|
# @param [String] word the word that needs to be capitalized.
|
12
13
|
# @return [String] word the word either capitalized or not.
|
13
14
|
def self.noble_capitalize(word)
|
14
|
-
prefix = prefix?(word)
|
15
|
-
if in_particle_list?(word)
|
15
|
+
prefix = Data.nobility_prefixes.prefix?(word)
|
16
|
+
if Data.nobility_particles.in_particle_list?(word)
|
16
17
|
word.downcase
|
17
18
|
elsif prefix
|
18
19
|
capitalize(prefix) + capitalize(word.gsub(prefix, ''))
|
@@ -23,15 +24,23 @@ module NobleNames
|
|
23
24
|
|
24
25
|
# Upcases the first small letters in each word,
|
25
26
|
# seperated by hyphons.
|
27
|
+
# The word is also not capitalized if it already contains
|
28
|
+
# a capitalized letter. This is to allow Business Names
|
29
|
+
# to have custom capitalization.
|
26
30
|
# But beware, words seperated by spaces stay small.
|
27
31
|
# @return [String] the capitalized word.
|
28
32
|
# @example
|
29
33
|
# capitalize('hans-ebert') #=> 'Hans-Ebert'
|
30
34
|
# capitalize('john') #=> 'John'
|
31
35
|
# capitalize('john james') #=> 'John james'
|
36
|
+
# capitalize('eBase') #=> 'eBase'
|
32
37
|
def self.capitalize(word)
|
33
|
-
word
|
34
|
-
|
38
|
+
if word =~ /[A-Z]|Ä|Ö|Ü/
|
39
|
+
word
|
40
|
+
else
|
41
|
+
word.gsub first_small_letters do |letter|
|
42
|
+
upcase(letter)
|
43
|
+
end
|
35
44
|
end
|
36
45
|
end
|
37
46
|
|
@@ -58,26 +67,31 @@ module NobleNames
|
|
58
67
|
/((\A.|(?<=\-).))/
|
59
68
|
end
|
60
69
|
|
61
|
-
# Checks weither a word is in the
|
62
|
-
# @param [String] word
|
63
|
-
# @return [Boolean] `true` if `word` is
|
64
|
-
# `false` otherwise
|
65
|
-
def self.
|
66
|
-
Data.
|
70
|
+
# Checks weither a word is in the business particle list
|
71
|
+
# @param [String] word The word in question.
|
72
|
+
# @return [Boolean] result `true` if `word` is a business-particle
|
73
|
+
# `false` otherwise
|
74
|
+
def self.business_particle?(word)
|
75
|
+
Data.business_particles.in_particle_list? word
|
67
76
|
end
|
68
77
|
|
69
|
-
#
|
70
|
-
#
|
71
|
-
# @param [
|
72
|
-
# @return [
|
73
|
-
#
|
74
|
-
#
|
75
|
-
#
|
76
|
-
|
77
|
-
|
78
|
-
|
78
|
+
# Corrects only the business particle and leaves the
|
79
|
+
# other words alone.
|
80
|
+
# @param [Array] words An array of words to be checked.
|
81
|
+
# @return [Array] words An array of corrected words.
|
82
|
+
# @example A Business Name
|
83
|
+
# correct_business_particles([
|
84
|
+
# 'cool', 'and', 'hip', 'gmbh'
|
85
|
+
# ]) #=> ['cool', 'and', 'hip', 'GmbH']
|
86
|
+
def self.correct_business_particles(words)
|
87
|
+
words.map! do |word|
|
88
|
+
if business_particle?(word)
|
89
|
+
word
|
90
|
+
.replace(Data.business_particles.particles[word.downcase])
|
91
|
+
else
|
92
|
+
word
|
93
|
+
end
|
79
94
|
end
|
80
|
-
nil
|
81
95
|
end
|
82
96
|
|
83
97
|
# Applies the core extension
|
data/lib/noble_names/config.rb
CHANGED
@@ -21,7 +21,11 @@ module NobleNames
|
|
21
21
|
# str #=> 'Jamie of Windsor'
|
22
22
|
def to_title!
|
23
23
|
words = split(/\s+/)
|
24
|
-
words.
|
24
|
+
if words.any? { |w| NobleNames.business_particle? w }
|
25
|
+
NobleNames.correct_business_particles words
|
26
|
+
else
|
27
|
+
words.map! { |w| NobleNames.noble_capitalize(w) }
|
28
|
+
end
|
25
29
|
replace(words * ' ')
|
26
30
|
end
|
27
31
|
end
|
data/lib/noble_names/data.rb
CHANGED
@@ -1,28 +1,18 @@
|
|
1
|
-
require 'yaml'
|
2
|
-
|
3
1
|
module NobleNames
|
4
2
|
# The module responsible for maintaining and delivering
|
5
3
|
# the match data as defined in the `data` directory.
|
6
4
|
module Data
|
7
5
|
DATA_PATH = File.expand_path('../../../data/', __FILE__).freeze
|
8
|
-
MATCH_DATA = Hash[Dir.glob(DATA_PATH + '/*.yml').collect do |f|
|
9
|
-
yaml = YAML.load_file(f)
|
10
|
-
yaml.first
|
11
|
-
end]
|
12
|
-
|
13
|
-
def self.particles
|
14
|
-
select_languages(MATCH_DATA['particles'])
|
15
|
-
end
|
16
6
|
|
17
|
-
|
18
|
-
|
19
|
-
|
7
|
+
@nobility_particles = MatchIndex.new('nobility_particles.yml')
|
8
|
+
@nobility_prefixes = MatchIndex.new('nobility_prefixes.yml')
|
9
|
+
@business_particles = MatchIndex.new('business_particles.yml')
|
20
10
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
11
|
+
# This returns an instance variable if it exists.
|
12
|
+
# Otherwise it calls super.
|
13
|
+
def self.method_missing(method, *args, &block)
|
14
|
+
var = instance_variable_get("@#{method}")
|
15
|
+
var ? var : super(method, args, block)
|
26
16
|
end
|
27
17
|
end
|
28
18
|
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module NobleNames
|
4
|
+
# A {MatchIndex} holds the data necessary for finding
|
5
|
+
# prefixes and particles in Strings and checks them for it.
|
6
|
+
# {MatchIndex}s use Hashes for finding particles to guarantee
|
7
|
+
# constant performance in big particle lists.
|
8
|
+
# A {MatchIndex} has a lot of mutable state to cache as much matching
|
9
|
+
# information as possible.
|
10
|
+
class MatchIndex
|
11
|
+
attr_accessor :data
|
12
|
+
|
13
|
+
# Takes either a String or any Object and tries to
|
14
|
+
# convert it to a hash.
|
15
|
+
# @return [MatchIndex] match_index a new {MatchIndex}
|
16
|
+
# @param [String, Object] list if this is a string, it will
|
17
|
+
# be treated as a file-name in the `data/` folder from
|
18
|
+
# which a Hash will be extracted, containing
|
19
|
+
# language-keys and data.
|
20
|
+
# @example A correct data list
|
21
|
+
# MatchIndex.new({
|
22
|
+
# 'german' => ['von', 'zu'],
|
23
|
+
# 'english' => ['of']
|
24
|
+
# })
|
25
|
+
def initialize(list)
|
26
|
+
case list
|
27
|
+
when String
|
28
|
+
@data = YAML.load_file(File.expand_path(list, Data::DATA_PATH))
|
29
|
+
@data = @data.values.first
|
30
|
+
else
|
31
|
+
@data = Hash[list]
|
32
|
+
end
|
33
|
+
@lanugages = NobleNames.configuration.languages
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns and caches the particles of a MatchIndex.
|
37
|
+
# @return [Hash] particles A Hash containing particles to
|
38
|
+
# match against.
|
39
|
+
# @example A particle hash
|
40
|
+
# MatchIndex.new('nobility_particles.yml')
|
41
|
+
# .particles.has_key?('von') #=> true
|
42
|
+
# MatchIndex.new('nobility_particles.yml')
|
43
|
+
# .particles['von'] #=> 'von'
|
44
|
+
def particles
|
45
|
+
@particles ||= Hash[selected_data.collect { |v| [v.downcase, v] }]
|
46
|
+
end
|
47
|
+
|
48
|
+
# Checks weither a word is in the nobility particle list.
|
49
|
+
# @param [String] word the word that is checked.
|
50
|
+
# @return [Boolean] `true` if `word` is in the particle_list,
|
51
|
+
# `false` otherwise.
|
52
|
+
def in_particle_list?(word)
|
53
|
+
reindex if @languages != NobleNames.configuration.languages
|
54
|
+
particles.key? word
|
55
|
+
end
|
56
|
+
|
57
|
+
# Caches the particles or prefixes by the languages selected
|
58
|
+
# in the config.
|
59
|
+
# @return [Array] selected_data The data filtered by
|
60
|
+
# used languages.
|
61
|
+
def selected_data
|
62
|
+
@selected_data ||=
|
63
|
+
@data
|
64
|
+
.select { |l| @languages.include? l.to_sym }
|
65
|
+
.values
|
66
|
+
.flatten
|
67
|
+
end
|
68
|
+
|
69
|
+
# Checks weither a word has a prefix as defined in
|
70
|
+
# the MatchIndexs data and returns it.
|
71
|
+
# @param [String] word the word that needs to be checked.
|
72
|
+
# @return [String] pre the Prefix of the word. `nil` if
|
73
|
+
# it has none.
|
74
|
+
# @example
|
75
|
+
# prefix?('mcdormer') #=> 'mc'
|
76
|
+
def prefix?(word)
|
77
|
+
reindex if @languages != NobleNames.configuration.languages
|
78
|
+
prefixes.each do |pre|
|
79
|
+
return pre if (word =~ Regexp.new(pre)) == 0
|
80
|
+
end
|
81
|
+
nil
|
82
|
+
end
|
83
|
+
|
84
|
+
alias prefixes selected_data
|
85
|
+
|
86
|
+
# Resets the state of the MatchIndex
|
87
|
+
def reindex
|
88
|
+
@languages = NobleNames.configuration.languages
|
89
|
+
@selected_data = nil
|
90
|
+
@prefixes = nil
|
91
|
+
@particles = nil
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
data/lib/noble_names/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: noble_names
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Martensen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -100,13 +100,15 @@ files:
|
|
100
100
|
- bin/build.sh
|
101
101
|
- bin/console
|
102
102
|
- bin/setup
|
103
|
-
- data/
|
104
|
-
- data/
|
103
|
+
- data/business_particles.yml
|
104
|
+
- data/nobility_particles.yml
|
105
|
+
- data/nobility_prefixes.yml
|
105
106
|
- lib/noble_names.rb
|
106
107
|
- lib/noble_names/config.rb
|
107
108
|
- lib/noble_names/core_ext/string.rb
|
108
109
|
- lib/noble_names/data.rb
|
109
110
|
- lib/noble_names/initializer.rb
|
111
|
+
- lib/noble_names/match_index.rb
|
110
112
|
- lib/noble_names/version.rb
|
111
113
|
homepage: https://github.com/Haniyya/noble_names
|
112
114
|
licenses:
|