consistent_company_ruby 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/consistent_company_ruby.rb +7 -0
- data/lib/consistent_company_ruby/instance_methods.rb +225 -0
- data/test/test_consistent_company_ruby.rb +62 -0
- metadata +47 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2fc44a535ad1632ed8dad8dd11b4341e8c1598c5
|
4
|
+
data.tar.gz: 6a02550a697d5edd52451a04358ee267426ff181
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 088800d076d8456185fd3b4e5e3b267f0369922197af06bc7733ded382cf3719666e041c184fd5ed405b01907905b029120e6bd36c17b7e6207a363ecfd60a93
|
7
|
+
data.tar.gz: 69ae7fdade375d865311a8e92a45c2160bd286761f8466772659c98d836f90b42d09ae5129ce194265c00874f0e023d5999b3093ed1e361b011b8ed01a9c1317
|
@@ -0,0 +1,225 @@
|
|
1
|
+
module ConsistentCompanyRuby
|
2
|
+
module InstanceMethods
|
3
|
+
def namer# company
|
4
|
+
return self if self == ''
|
5
|
+
|
6
|
+
in_string = self.dup
|
7
|
+
|
8
|
+
# while processing we turn & = AND, + = PLUS
|
9
|
+
# and we add space at front and back
|
10
|
+
in_string.gsub('&',' & ')
|
11
|
+
in_string.gsub('+',' + ')
|
12
|
+
|
13
|
+
in_string = in_string.upcase
|
14
|
+
in_string.strip!
|
15
|
+
|
16
|
+
# in_string.gsub(/\(|\)/,' ')
|
17
|
+
num_lefts = 0
|
18
|
+
num_rights = 0
|
19
|
+
left1 = -1
|
20
|
+
right1 = -1
|
21
|
+
left2 = -1
|
22
|
+
right2 = -1
|
23
|
+
len = in_string.size
|
24
|
+
|
25
|
+
(0..len-1).each do |i|
|
26
|
+
if in_string[i] == '('
|
27
|
+
num_lefts += 1
|
28
|
+
if num_lefts == 1
|
29
|
+
left1 = i
|
30
|
+
elsif num_lefts == 2
|
31
|
+
left2 = i
|
32
|
+
end
|
33
|
+
elsif in_string[i] == ')'
|
34
|
+
num_rights += 1
|
35
|
+
if num_rights == 1
|
36
|
+
right1 = i
|
37
|
+
elsif num_rights == 2
|
38
|
+
right2 = i
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
if num_lefts == 0 || in_string[0] == '('
|
44
|
+
# Do Nothing
|
45
|
+
elsif num_lefts == 1
|
46
|
+
if right1 > left1
|
47
|
+
# ..(xx).. -> ....
|
48
|
+
in_string = "#{in_string[0..left1-1]}#{in_string[right1+1..len-1]}"
|
49
|
+
else
|
50
|
+
# ..(xx -> ..
|
51
|
+
in_string = in_string[0..left1-1]
|
52
|
+
end
|
53
|
+
elsif num_lefts == 2
|
54
|
+
if (left1 < right1) && (right1 < left2) && (left2 < right2)
|
55
|
+
# ..(xx)..(xx).. -> ......
|
56
|
+
in_string = "#{in_string[0..left1-1]}#{in_string[right1+1..left2-1]}#{in_string[right2+1..len-1]}"
|
57
|
+
elsif (left1 < left2) && (left2 < right1) && (right1 < right2)
|
58
|
+
# ..(xx(xx)xx).. -> ....
|
59
|
+
in_string = "#{in_string[0..left1-1]}#{in_string[right2+1..len-1]}"
|
60
|
+
elsif (left1 < right1) && (right1 < left2) && (right2 == -1)
|
61
|
+
# ..(xx)..(xx -> ....
|
62
|
+
in_string = "#{in_string[0..left1-1]}#{in_string[right1+1..left2-1]}"
|
63
|
+
elsif (left1 < left2) && (left2 < right1) && (right2 == -1)
|
64
|
+
# ..(xx(xx)xx -> ..
|
65
|
+
in_string = in_string[0..left1-1]
|
66
|
+
elsif (right1 == -1) && (right2 == -1)
|
67
|
+
# ..(xx(xx -> ..
|
68
|
+
in_string = in_string[0..left1-1]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
return_string = ''
|
73
|
+
(0..in_string.size-1).each do |i|
|
74
|
+
ch = in_string[i]
|
75
|
+
asc = ch.ord
|
76
|
+
|
77
|
+
if (asc >= 65 && asc <= 90) ||
|
78
|
+
(asc >= 48 && asc <= 57) ||
|
79
|
+
asc > 128 # A-Z, 0-9, and high order chars
|
80
|
+
return_string = "#{return_string}#{ch}"
|
81
|
+
elsif asc == 39 # '
|
82
|
+
# not keeping it
|
83
|
+
elsif asc == 38 && return_string.size > 0 # &
|
84
|
+
if return_string[return_string.size-1] != ' '
|
85
|
+
return_string = "#{return_string} AND "
|
86
|
+
else
|
87
|
+
return_string = "#{return_string}AND "
|
88
|
+
end
|
89
|
+
elsif asc == 43 # +
|
90
|
+
if return_string == 'A' || return_string == 'A '
|
91
|
+
return_string = 'A PLUS '
|
92
|
+
elsif return_string.size > 0
|
93
|
+
if return_string[return_string.size-1] != ' '
|
94
|
+
return_string = "#{return_string} AND "
|
95
|
+
else
|
96
|
+
return_string = "#{return_string}AND "
|
97
|
+
end
|
98
|
+
end
|
99
|
+
elsif return_string.size > 0 &&
|
100
|
+
return_string[return_string.size-1] != ' '
|
101
|
+
return_string = "#{return_string} "
|
102
|
+
end
|
103
|
+
# puts "#{i}#{ch}#{asc}:#{return_string}"
|
104
|
+
end
|
105
|
+
|
106
|
+
return '' if return_string.nil? || return_string == ''
|
107
|
+
|
108
|
+
str_replace return_string, ' AND ', ' & '
|
109
|
+
|
110
|
+
return_string.strip!
|
111
|
+
return_string = transform_company return_string
|
112
|
+
return_string.strip!
|
113
|
+
return_string.gsub!(' ','')
|
114
|
+
return_string
|
115
|
+
end
|
116
|
+
|
117
|
+
def transform_company res
|
118
|
+
res = " #{res} "
|
119
|
+
str_replace res, " THE ", " "
|
120
|
+
str_replace res, " ONE ", " 1 "
|
121
|
+
str_replace res, " TWO ", " 2 "
|
122
|
+
str_replace res, " TO ", " 2 "
|
123
|
+
str_replace res, " THREE ", " 3 "
|
124
|
+
str_replace res, " FOUR ", " 4 "
|
125
|
+
str_replace res, " FOR ", " 4 "
|
126
|
+
str_replace res, " FIVE ", " 5 "
|
127
|
+
str_replace res, " SIX ", " 6 "
|
128
|
+
str_replace res, " SEVEN ", " 7 "
|
129
|
+
str_replace res, " EIGHT ", " 8 "
|
130
|
+
str_replace res, " NINE ", " 9 "
|
131
|
+
str_replace res, " TEN ", " 10 "
|
132
|
+
str_replace res, " ELEVEN ", " 11 "
|
133
|
+
|
134
|
+
str_replace res, " FIRST ", " 1ST "
|
135
|
+
str_replace res, " SECOND ", " 2ND "
|
136
|
+
str_replace res, " THIRD ", " 3RD "
|
137
|
+
str_replace res, " FOURTH ", " 4TH "
|
138
|
+
str_replace res, " FIFTH ", " 5TH "
|
139
|
+
str_replace res, " SIXTH ", " 6TH "
|
140
|
+
str_replace res, " SEVENTH ", " 7TH "
|
141
|
+
str_replace res, " EIGHTH ", " 8TH "
|
142
|
+
str_replace res, " NINTH ", " 9TH "
|
143
|
+
str_replace res, " TENTH ", " 10TH "
|
144
|
+
str_replace res, " CENTRE ", " CTR "
|
145
|
+
str_replace res, " CENTER ", " CTR "
|
146
|
+
str_replace res, " CNTR ", " CTR "
|
147
|
+
str_replace res, " CENT ", " CTR "
|
148
|
+
str_replace res, " CENTR ", " CTR "
|
149
|
+
str_replace res, " AUTOMOTIVE ", " AUTO "
|
150
|
+
str_replace res, " AUTOMOBILE ", " AUTO "
|
151
|
+
str_replace res, " AUTOS ", " AUTO "
|
152
|
+
str_replace res, " AVENUE ", " AVE "
|
153
|
+
str_replace res, " DRIVE ", " DR "
|
154
|
+
str_replace res, " PHOTOGRAPHY ", " PHOTO "
|
155
|
+
str_replace res, " BROTHERS ", " BROS "
|
156
|
+
str_replace res, " TECHNOLOGY ", " TEC "
|
157
|
+
str_replace res, " TECH ", " TEC "
|
158
|
+
str_replace res, " TELEVISION ", " TV "
|
159
|
+
str_replace res, " INFORMATION ", " INFO "
|
160
|
+
str_replace res, " SOCIETY ", " SOC "
|
161
|
+
str_replace res, " DEPARTMENT ", " DEPT "
|
162
|
+
str_replace res, " REGIONAL ", " REG "
|
163
|
+
str_replace res, " REGION ", " REG "
|
164
|
+
str_replace res, " AUTHORITY ", " AUTH "
|
165
|
+
str_replace res, " NATIONAL ", " NATL "
|
166
|
+
str_replace res, " INTERNATIONAL ", " INT "
|
167
|
+
str_replace res, " INTERNATION ", " INT "
|
168
|
+
str_replace res, " INTL ", " INT "
|
169
|
+
str_replace res, " MARKETING ", " MKT "
|
170
|
+
str_replace res, " MKTG ", " MKT "
|
171
|
+
str_replace res, " MANAGEMENT ", " MGT "
|
172
|
+
str_replace res, " MGMT ", " MGT "
|
173
|
+
|
174
|
+
res.strip!
|
175
|
+
space_loc = res.index ' '
|
176
|
+
|
177
|
+
if space_loc && res.size > 3
|
178
|
+
|
179
|
+
# Check for "A" as the first word, and
|
180
|
+
# make sure that second word is not an initital or the word "PLUS"
|
181
|
+
# For example: "A C & R" do not remove "A"; "A TOUCH OF CLASS" remove the "A"
|
182
|
+
if res.start_with?('A ') &&
|
183
|
+
!res[2..res.size].start_with?('&') &&
|
184
|
+
!res[3..res.size].start_with?(' ') &&
|
185
|
+
!res[2..res.size].start_with?('PLUS')
|
186
|
+
res.gsub! 'A ', ''
|
187
|
+
end
|
188
|
+
|
189
|
+
# Remove last word if it is a company word or &
|
190
|
+
space_loc = res.index ' '
|
191
|
+
if space_loc
|
192
|
+
res_array = res.split(' ')
|
193
|
+
if is_company_word(res_array.last)
|
194
|
+
res_array.pop
|
195
|
+
if is_company_word(res_array.last) # Look at the new last word
|
196
|
+
res_array.pop
|
197
|
+
end
|
198
|
+
res = res_array.join(' ')
|
199
|
+
end
|
200
|
+
|
201
|
+
res = res[0..res.size-2] if res.end_with? '&'
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
res
|
206
|
+
end
|
207
|
+
|
208
|
+
def is_company_word(in_word)
|
209
|
+
if ["ADV","ADVERTISING","AGCY","AGENCY","AGY","ASC","ASS","ASSN","ASSOC","ASSOCIAT","ASSOCIATES","ASSOCIATION","ATTORNEY","ATTRNY","ATTY","ATY","AUTO","CO","COMP","COMPANIES","COMPANY","CORP","CORPORATION","CT","CONTRA","DEPARTMENT","DEPT","DIR","DIRECT","DIV","DIVISION","GROUP","HOLDINGS","INC","INCORPORATED","INT","LIMITED","LLC","LLP","LOCAL","LTD","PC","PLC","PROD","PRODS","PRODUCT","PRODUCTIONS","PRODUCTS","TR","TRADE"].include?(in_word)
|
210
|
+
true
|
211
|
+
else
|
212
|
+
false
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# It's more than a gsub
|
217
|
+
def str_replace(orig, rep, with)
|
218
|
+
s = orig
|
219
|
+
while s.include?(rep)
|
220
|
+
s.sub! rep, with
|
221
|
+
end
|
222
|
+
orig
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'consistent_company_ruby'
|
3
|
+
|
4
|
+
class ConsistentCompanyRubyTest < Minitest::Test
|
5
|
+
def test_namer
|
6
|
+
# don't change the calling string
|
7
|
+
str = ' my test '
|
8
|
+
_company = str.namer
|
9
|
+
assert_equal(' my test ', str)
|
10
|
+
|
11
|
+
# empty name
|
12
|
+
assert_equal("", "".namer)
|
13
|
+
assert_equal("", " ".namer)
|
14
|
+
assert_equal("", "___".namer)
|
15
|
+
# remove leading and trailing space
|
16
|
+
assert_equal('TEST', " test ".namer)
|
17
|
+
# remove embedded space
|
18
|
+
assert_equal('TEST', " te st ".namer)
|
19
|
+
# remove Company
|
20
|
+
assert_equal("MYTEST", "My Test Company".namer)
|
21
|
+
assert_equal("MYCOMPANYTEST", ("MY COMPANY TEST").namer)
|
22
|
+
assert_equal("MYTEST", ("MY TEST COMPANY COMP").namer)
|
23
|
+
# remove leading The
|
24
|
+
assert_equal("AAA", "The AAA Company".namer)
|
25
|
+
# remove punctuation
|
26
|
+
assert_equal("TESTERS", %q{The, ?%^* tester's company!}.namer)
|
27
|
+
# a very long name
|
28
|
+
assert_equal("A"*1000+"NAMEISHERE", (" A"*1000 + 'NAME IS HERE ').namer)
|
29
|
+
# parenthesis matching
|
30
|
+
assert_equal("BBEE", ("BB(xx)EE").namer)
|
31
|
+
assert_equal("BE", ("B(xx)E").namer)
|
32
|
+
assert_equal("XX", ("(xx)").namer)
|
33
|
+
assert_equal("BB", ("BB(xx").namer)
|
34
|
+
assert_equal("XX", ("(xx").namer)
|
35
|
+
assert_equal("BBMMEE", ("BB(xx)MM(xx)EE").namer)
|
36
|
+
assert_equal("BBEE", ("BB(xx(xx)xx)EE").namer)
|
37
|
+
assert_equal("BBMM", ("BB(xx)MM(xx").namer)
|
38
|
+
assert_equal("BB", ("BB(xx(xx)xx").namer)
|
39
|
+
assert_equal("BB", ("BB(xx(xx").namer)
|
40
|
+
|
41
|
+
# handle and &
|
42
|
+
assert_equal("PRE&POST", ("pre and post").namer)
|
43
|
+
assert_equal("PRE&POST", ("pre & post").namer)
|
44
|
+
assert_equal("PRE&POST", ("&pre and post&").namer)
|
45
|
+
assert_equal("PRE&POST", ("& pre and post &").namer)
|
46
|
+
assert_equal("ANDPRE&POSTAND", ("and pre and post and").namer)
|
47
|
+
|
48
|
+
# leading A
|
49
|
+
assert_equal("ABTEST", ("A B TEST").namer)
|
50
|
+
assert_equal("BTEST", ("A BTEST").namer)
|
51
|
+
assert_equal("APLUSTEST", ("A PLUS TEST").namer)
|
52
|
+
assert_equal("APLUSTEST", ("A + TEST").namer)
|
53
|
+
assert_equal("APLUSTEST", ("A+ TEST").namer)
|
54
|
+
|
55
|
+
# common name shortening
|
56
|
+
assert_equal("TESTCTRCTRCTR", ("Test Center Center Center").namer)
|
57
|
+
assert_equal("My Test Advertising Co".namer, "MY TEST ADV COMPANY".namer)
|
58
|
+
|
59
|
+
# rigorous checking
|
60
|
+
assert_equal "ABA123&&MCCDAFDS&&B", "aba 123 ~!@\#$%^& *()_+ <>? ,./ {} [] ;' : 'mccdafd s & and b".namer
|
61
|
+
end
|
62
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: consistent_company_ruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gene Wu
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-08-19 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Written in Ruby. To normalize a company name.
|
14
|
+
email: genehk@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/consistent_company_ruby.rb
|
20
|
+
- lib/consistent_company_ruby/instance_methods.rb
|
21
|
+
- test/test_consistent_company_ruby.rb
|
22
|
+
homepage: http://github.com/gwu1/consistent_company_ruby
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 2.5.1
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: Normalize company name
|
46
|
+
test_files:
|
47
|
+
- test/test_consistent_company_ruby.rb
|