Package not found. Please check the package name and try again.
noble_names 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -1
- data/data/business_particles.yml +50 -0
- data/data/{particles.yml → nobility_particles.yml} +10 -1
- data/data/{prefixes.yml → nobility_prefixes.yml} +1 -1
- data/lib/noble_names.rb +35 -21
- data/lib/noble_names/config.rb +1 -1
- data/lib/noble_names/core_ext/string.rb +5 -1
- data/lib/noble_names/data.rb +8 -18
- data/lib/noble_names/match_index.rb +94 -0
- data/lib/noble_names/version.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6efdc8370018eb99c8e94a21abb59f7ea0d6b99a
|
|
4
|
+
data.tar.gz: 267a58e52253d8caab93db519568e16331365731
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 486061cc648a081981a0411443bcfc03ba9bbc7e2a81def3ecb079f2590409d8e2aba36e73688e1552678f1ee096d007730c876dc65ae1603fb0d2b8993043ac
|
|
7
|
+
data.tar.gz: b19cbeed4756a0ce0232427d5c3aecd8b585fd2dd45f85430527fd6a47b31ba3a8444204cbc784797d41a9d10c82c6559575cb7e27a0c1cbb462d827bef3ca75
|
data/README.md
CHANGED
|
@@ -39,7 +39,7 @@ my_string #=> "Joseph von und zu Reinbeck"
|
|
|
39
39
|
```
|
|
40
40
|
|
|
41
41
|
### Languages
|
|
42
|
-
So far English, German, French, Spanish and Portuguese are supported.
|
|
42
|
+
So far English, German, French, Spanish, Dutch and Portuguese are supported.
|
|
43
43
|
By default all available languages are used. If you want to configure which one
|
|
44
44
|
to use for your application, you can do it like so:
|
|
45
45
|
```ruby
|
|
@@ -52,6 +52,13 @@ This way other language particles will be ignored.
|
|
|
52
52
|
"joseph von und zu reinbeck".to_title #=> "Joseph Von Und Zu Reinbeck"
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
+
### Business Names
|
|
56
|
+
The current stance on business names is to only correct the business particle
|
|
57
|
+
if need be and to leave the rest of the name alone.
|
|
58
|
+
```ruby
|
|
59
|
+
"bauhelm co. kg".to_title #=> "bauhelm Co. KG"
|
|
60
|
+
```
|
|
61
|
+
|
|
55
62
|
## Development
|
|
56
63
|
|
|
57
64
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
business_particles:
|
|
2
|
+
german:
|
|
3
|
+
- OHG
|
|
4
|
+
- KG
|
|
5
|
+
- GmbH
|
|
6
|
+
- Co.
|
|
7
|
+
- GbR
|
|
8
|
+
- AG
|
|
9
|
+
- PartG
|
|
10
|
+
- PartGmbBH
|
|
11
|
+
- e.V.
|
|
12
|
+
- r.V.
|
|
13
|
+
- KGaA
|
|
14
|
+
english:
|
|
15
|
+
- Ltd.
|
|
16
|
+
- CIC
|
|
17
|
+
- CIO
|
|
18
|
+
- LLP
|
|
19
|
+
- LP
|
|
20
|
+
- plc
|
|
21
|
+
- Ccc
|
|
22
|
+
- LLLP
|
|
23
|
+
- LP
|
|
24
|
+
- LLC
|
|
25
|
+
- LC
|
|
26
|
+
- PLLC
|
|
27
|
+
- Corp.
|
|
28
|
+
- Inc.
|
|
29
|
+
dutch:
|
|
30
|
+
- Mts
|
|
31
|
+
- vof
|
|
32
|
+
- cv
|
|
33
|
+
- bv
|
|
34
|
+
- nv
|
|
35
|
+
french:
|
|
36
|
+
- EI
|
|
37
|
+
- EURL
|
|
38
|
+
- SASU
|
|
39
|
+
- FCP
|
|
40
|
+
- SICAV
|
|
41
|
+
- GIE
|
|
42
|
+
- SEP
|
|
43
|
+
- SNC
|
|
44
|
+
- SCS
|
|
45
|
+
- SCA
|
|
46
|
+
- SARL
|
|
47
|
+
- SA
|
|
48
|
+
- SCOP
|
|
49
|
+
- SEM
|
|
50
|
+
- SAS
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
nobility_particles:
|
|
2
2
|
german:
|
|
3
3
|
- 'von'
|
|
4
4
|
- 'und'
|
|
@@ -6,8 +6,13 @@ particles:
|
|
|
6
6
|
- 'v.'
|
|
7
7
|
- 'vom'
|
|
8
8
|
- 'zum'
|
|
9
|
+
- 'zur'
|
|
9
10
|
- 'der'
|
|
10
11
|
- 'dem'
|
|
12
|
+
- 'den'
|
|
13
|
+
- 'in'
|
|
14
|
+
- 'v.d.'
|
|
15
|
+
- 'geb.'
|
|
11
16
|
english:
|
|
12
17
|
- 'of'
|
|
13
18
|
- 'de'
|
|
@@ -22,5 +27,9 @@ particles:
|
|
|
22
27
|
- 'de'
|
|
23
28
|
- 'da'
|
|
24
29
|
- 'du'
|
|
30
|
+
dutch:
|
|
31
|
+
- 'van'
|
|
32
|
+
- 'de'
|
|
33
|
+
- 'ter'
|
|
25
34
|
|
|
26
35
|
|
data/lib/noble_names.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# Coding: UTF-8
|
|
2
|
+
require 'noble_names/match_index'
|
|
2
3
|
require 'noble_names/version'
|
|
3
4
|
require 'noble_names/config'
|
|
4
5
|
require 'noble_names/initializer'
|
|
@@ -11,8 +12,8 @@ module NobleNames
|
|
|
11
12
|
# @param [String] word the word that needs to be capitalized.
|
|
12
13
|
# @return [String] word the word either capitalized or not.
|
|
13
14
|
def self.noble_capitalize(word)
|
|
14
|
-
prefix = prefix?(word)
|
|
15
|
-
if in_particle_list?(word)
|
|
15
|
+
prefix = Data.nobility_prefixes.prefix?(word)
|
|
16
|
+
if Data.nobility_particles.in_particle_list?(word)
|
|
16
17
|
word.downcase
|
|
17
18
|
elsif prefix
|
|
18
19
|
capitalize(prefix) + capitalize(word.gsub(prefix, ''))
|
|
@@ -23,15 +24,23 @@ module NobleNames
|
|
|
23
24
|
|
|
24
25
|
# Upcases the first small letters in each word,
|
|
25
26
|
# seperated by hyphons.
|
|
27
|
+
# The word is also not capitalized if it already contains
|
|
28
|
+
# a capitalized letter. This is to allow Business Names
|
|
29
|
+
# to have custom capitalization.
|
|
26
30
|
# But beware, words seperated by spaces stay small.
|
|
27
31
|
# @return [String] the capitalized word.
|
|
28
32
|
# @example
|
|
29
33
|
# capitalize('hans-ebert') #=> 'Hans-Ebert'
|
|
30
34
|
# capitalize('john') #=> 'John'
|
|
31
35
|
# capitalize('john james') #=> 'John james'
|
|
36
|
+
# capitalize('eBase') #=> 'eBase'
|
|
32
37
|
def self.capitalize(word)
|
|
33
|
-
word
|
|
34
|
-
|
|
38
|
+
if word =~ /[A-Z]|Ä|Ö|Ü/
|
|
39
|
+
word
|
|
40
|
+
else
|
|
41
|
+
word.gsub first_small_letters do |letter|
|
|
42
|
+
upcase(letter)
|
|
43
|
+
end
|
|
35
44
|
end
|
|
36
45
|
end
|
|
37
46
|
|
|
@@ -58,26 +67,31 @@ module NobleNames
|
|
|
58
67
|
/((\A.|(?<=\-).))/
|
|
59
68
|
end
|
|
60
69
|
|
|
61
|
-
# Checks weither a word is in the
|
|
62
|
-
# @param [String] word
|
|
63
|
-
# @return [Boolean] `true` if `word` is
|
|
64
|
-
# `false` otherwise
|
|
65
|
-
def self.
|
|
66
|
-
Data.
|
|
70
|
+
# Checks weither a word is in the business particle list
|
|
71
|
+
# @param [String] word The word in question.
|
|
72
|
+
# @return [Boolean] result `true` if `word` is a business-particle
|
|
73
|
+
# `false` otherwise
|
|
74
|
+
def self.business_particle?(word)
|
|
75
|
+
Data.business_particles.in_particle_list? word
|
|
67
76
|
end
|
|
68
77
|
|
|
69
|
-
#
|
|
70
|
-
#
|
|
71
|
-
# @param [
|
|
72
|
-
# @return [
|
|
73
|
-
#
|
|
74
|
-
#
|
|
75
|
-
#
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
78
|
+
# Corrects only the business particle and leaves the
|
|
79
|
+
# other words alone.
|
|
80
|
+
# @param [Array] words An array of words to be checked.
|
|
81
|
+
# @return [Array] words An array of corrected words.
|
|
82
|
+
# @example A Business Name
|
|
83
|
+
# correct_business_particles([
|
|
84
|
+
# 'cool', 'and', 'hip', 'gmbh'
|
|
85
|
+
# ]) #=> ['cool', 'and', 'hip', 'GmbH']
|
|
86
|
+
def self.correct_business_particles(words)
|
|
87
|
+
words.map! do |word|
|
|
88
|
+
if business_particle?(word)
|
|
89
|
+
word
|
|
90
|
+
.replace(Data.business_particles.particles[word.downcase])
|
|
91
|
+
else
|
|
92
|
+
word
|
|
93
|
+
end
|
|
79
94
|
end
|
|
80
|
-
nil
|
|
81
95
|
end
|
|
82
96
|
|
|
83
97
|
# Applies the core extension
|
data/lib/noble_names/config.rb
CHANGED
|
@@ -21,7 +21,11 @@ module NobleNames
|
|
|
21
21
|
# str #=> 'Jamie of Windsor'
|
|
22
22
|
def to_title!
|
|
23
23
|
words = split(/\s+/)
|
|
24
|
-
words.
|
|
24
|
+
if words.any? { |w| NobleNames.business_particle? w }
|
|
25
|
+
NobleNames.correct_business_particles words
|
|
26
|
+
else
|
|
27
|
+
words.map! { |w| NobleNames.noble_capitalize(w) }
|
|
28
|
+
end
|
|
25
29
|
replace(words * ' ')
|
|
26
30
|
end
|
|
27
31
|
end
|
data/lib/noble_names/data.rb
CHANGED
|
@@ -1,28 +1,18 @@
|
|
|
1
|
-
require 'yaml'
|
|
2
|
-
|
|
3
1
|
module NobleNames
|
|
4
2
|
# The module responsible for maintaining and delivering
|
|
5
3
|
# the match data as defined in the `data` directory.
|
|
6
4
|
module Data
|
|
7
5
|
DATA_PATH = File.expand_path('../../../data/', __FILE__).freeze
|
|
8
|
-
MATCH_DATA = Hash[Dir.glob(DATA_PATH + '/*.yml').collect do |f|
|
|
9
|
-
yaml = YAML.load_file(f)
|
|
10
|
-
yaml.first
|
|
11
|
-
end]
|
|
12
|
-
|
|
13
|
-
def self.particles
|
|
14
|
-
select_languages(MATCH_DATA['particles'])
|
|
15
|
-
end
|
|
16
6
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
7
|
+
@nobility_particles = MatchIndex.new('nobility_particles.yml')
|
|
8
|
+
@nobility_prefixes = MatchIndex.new('nobility_prefixes.yml')
|
|
9
|
+
@business_particles = MatchIndex.new('business_particles.yml')
|
|
20
10
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
11
|
+
# This returns an instance variable if it exists.
|
|
12
|
+
# Otherwise it calls super.
|
|
13
|
+
def self.method_missing(method, *args, &block)
|
|
14
|
+
var = instance_variable_get("@#{method}")
|
|
15
|
+
var ? var : super(method, args, block)
|
|
26
16
|
end
|
|
27
17
|
end
|
|
28
18
|
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
require 'yaml'
|
|
2
|
+
|
|
3
|
+
module NobleNames
|
|
4
|
+
# A {MatchIndex} holds the data necessary for finding
|
|
5
|
+
# prefixes and particles in Strings and checks them for it.
|
|
6
|
+
# {MatchIndex}s use Hashes for finding particles to guarantee
|
|
7
|
+
# constant performance in big particle lists.
|
|
8
|
+
# A {MatchIndex} has a lot of mutable state to cache as much matching
|
|
9
|
+
# information as possible.
|
|
10
|
+
class MatchIndex
|
|
11
|
+
attr_accessor :data
|
|
12
|
+
|
|
13
|
+
# Takes either a String or any Object and tries to
|
|
14
|
+
# convert it to a hash.
|
|
15
|
+
# @return [MatchIndex] match_index a new {MatchIndex}
|
|
16
|
+
# @param [String, Object] list if this is a string, it will
|
|
17
|
+
# be treated as a file-name in the `data/` folder from
|
|
18
|
+
# which a Hash will be extracted, containing
|
|
19
|
+
# language-keys and data.
|
|
20
|
+
# @example A correct data list
|
|
21
|
+
# MatchIndex.new({
|
|
22
|
+
# 'german' => ['von', 'zu'],
|
|
23
|
+
# 'english' => ['of']
|
|
24
|
+
# })
|
|
25
|
+
def initialize(list)
|
|
26
|
+
case list
|
|
27
|
+
when String
|
|
28
|
+
@data = YAML.load_file(File.expand_path(list, Data::DATA_PATH))
|
|
29
|
+
@data = @data.values.first
|
|
30
|
+
else
|
|
31
|
+
@data = Hash[list]
|
|
32
|
+
end
|
|
33
|
+
@lanugages = NobleNames.configuration.languages
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Returns and caches the particles of a MatchIndex.
|
|
37
|
+
# @return [Hash] particles A Hash containing particles to
|
|
38
|
+
# match against.
|
|
39
|
+
# @example A particle hash
|
|
40
|
+
# MatchIndex.new('nobility_particles.yml')
|
|
41
|
+
# .particles.has_key?('von') #=> true
|
|
42
|
+
# MatchIndex.new('nobility_particles.yml')
|
|
43
|
+
# .particles['von'] #=> 'von'
|
|
44
|
+
def particles
|
|
45
|
+
@particles ||= Hash[selected_data.collect { |v| [v.downcase, v] }]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Checks weither a word is in the nobility particle list.
|
|
49
|
+
# @param [String] word the word that is checked.
|
|
50
|
+
# @return [Boolean] `true` if `word` is in the particle_list,
|
|
51
|
+
# `false` otherwise.
|
|
52
|
+
def in_particle_list?(word)
|
|
53
|
+
reindex if @languages != NobleNames.configuration.languages
|
|
54
|
+
particles.key? word
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Caches the particles or prefixes by the languages selected
|
|
58
|
+
# in the config.
|
|
59
|
+
# @return [Array] selected_data The data filtered by
|
|
60
|
+
# used languages.
|
|
61
|
+
def selected_data
|
|
62
|
+
@selected_data ||=
|
|
63
|
+
@data
|
|
64
|
+
.select { |l| @languages.include? l.to_sym }
|
|
65
|
+
.values
|
|
66
|
+
.flatten
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Checks weither a word has a prefix as defined in
|
|
70
|
+
# the MatchIndexs data and returns it.
|
|
71
|
+
# @param [String] word the word that needs to be checked.
|
|
72
|
+
# @return [String] pre the Prefix of the word. `nil` if
|
|
73
|
+
# it has none.
|
|
74
|
+
# @example
|
|
75
|
+
# prefix?('mcdormer') #=> 'mc'
|
|
76
|
+
def prefix?(word)
|
|
77
|
+
reindex if @languages != NobleNames.configuration.languages
|
|
78
|
+
prefixes.each do |pre|
|
|
79
|
+
return pre if (word =~ Regexp.new(pre)) == 0
|
|
80
|
+
end
|
|
81
|
+
nil
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
alias prefixes selected_data
|
|
85
|
+
|
|
86
|
+
# Resets the state of the MatchIndex
|
|
87
|
+
def reindex
|
|
88
|
+
@languages = NobleNames.configuration.languages
|
|
89
|
+
@selected_data = nil
|
|
90
|
+
@prefixes = nil
|
|
91
|
+
@particles = nil
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
data/lib/noble_names/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: noble_names
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Paul Martensen
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-05-
|
|
11
|
+
date: 2016-05-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -100,13 +100,15 @@ files:
|
|
|
100
100
|
- bin/build.sh
|
|
101
101
|
- bin/console
|
|
102
102
|
- bin/setup
|
|
103
|
-
- data/
|
|
104
|
-
- data/
|
|
103
|
+
- data/business_particles.yml
|
|
104
|
+
- data/nobility_particles.yml
|
|
105
|
+
- data/nobility_prefixes.yml
|
|
105
106
|
- lib/noble_names.rb
|
|
106
107
|
- lib/noble_names/config.rb
|
|
107
108
|
- lib/noble_names/core_ext/string.rb
|
|
108
109
|
- lib/noble_names/data.rb
|
|
109
110
|
- lib/noble_names/initializer.rb
|
|
111
|
+
- lib/noble_names/match_index.rb
|
|
110
112
|
- lib/noble_names/version.rb
|
|
111
113
|
homepage: https://github.com/Haniyya/noble_names
|
|
112
114
|
licenses:
|