consistent_company_ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2fc44a535ad1632ed8dad8dd11b4341e8c1598c5
4
+ data.tar.gz: 6a02550a697d5edd52451a04358ee267426ff181
5
+ SHA512:
6
+ metadata.gz: 088800d076d8456185fd3b4e5e3b267f0369922197af06bc7733ded382cf3719666e041c184fd5ed405b01907905b029120e6bd36c17b7e6207a363ecfd60a93
7
+ data.tar.gz: 69ae7fdade375d865311a8e92a45c2160bd286761f8466772659c98d836f90b42d09ae5129ce194265c00874f0e023d5999b3093ed1e361b011b8ed01a9c1317
@@ -0,0 +1,7 @@
1
+ require File.expand_path('consistent_company_ruby/instance_methods', File.dirname(__FILE__))
2
+ #
3
+ # String class extension
4
+ #
5
+ class String
6
+ include ConsistentCompanyRuby::InstanceMethods
7
+ end
@@ -0,0 +1,225 @@
1
+ module ConsistentCompanyRuby
2
+ module InstanceMethods
3
+ def namer# company
4
+ return self if self == ''
5
+
6
+ in_string = self.dup
7
+
8
+ # while processing we turn & = AND, + = PLUS
9
+ # and we add space at front and back
10
+ in_string.gsub('&',' & ')
11
+ in_string.gsub('+',' + ')
12
+
13
+ in_string = in_string.upcase
14
+ in_string.strip!
15
+
16
+ # in_string.gsub(/\(|\)/,' ')
17
+ num_lefts = 0
18
+ num_rights = 0
19
+ left1 = -1
20
+ right1 = -1
21
+ left2 = -1
22
+ right2 = -1
23
+ len = in_string.size
24
+
25
+ (0..len-1).each do |i|
26
+ if in_string[i] == '('
27
+ num_lefts += 1
28
+ if num_lefts == 1
29
+ left1 = i
30
+ elsif num_lefts == 2
31
+ left2 = i
32
+ end
33
+ elsif in_string[i] == ')'
34
+ num_rights += 1
35
+ if num_rights == 1
36
+ right1 = i
37
+ elsif num_rights == 2
38
+ right2 = i
39
+ end
40
+ end
41
+ end
42
+
43
+ if num_lefts == 0 || in_string[0] == '('
44
+ # Do Nothing
45
+ elsif num_lefts == 1
46
+ if right1 > left1
47
+ # ..(xx).. -> ....
48
+ in_string = "#{in_string[0..left1-1]}#{in_string[right1+1..len-1]}"
49
+ else
50
+ # ..(xx -> ..
51
+ in_string = in_string[0..left1-1]
52
+ end
53
+ elsif num_lefts == 2
54
+ if (left1 < right1) && (right1 < left2) && (left2 < right2)
55
+ # ..(xx)..(xx).. -> ......
56
+ in_string = "#{in_string[0..left1-1]}#{in_string[right1+1..left2-1]}#{in_string[right2+1..len-1]}"
57
+ elsif (left1 < left2) && (left2 < right1) && (right1 < right2)
58
+ # ..(xx(xx)xx).. -> ....
59
+ in_string = "#{in_string[0..left1-1]}#{in_string[right2+1..len-1]}"
60
+ elsif (left1 < right1) && (right1 < left2) && (right2 == -1)
61
+ # ..(xx)..(xx -> ....
62
+ in_string = "#{in_string[0..left1-1]}#{in_string[right1+1..left2-1]}"
63
+ elsif (left1 < left2) && (left2 < right1) && (right2 == -1)
64
+ # ..(xx(xx)xx -> ..
65
+ in_string = in_string[0..left1-1]
66
+ elsif (right1 == -1) && (right2 == -1)
67
+ # ..(xx(xx -> ..
68
+ in_string = in_string[0..left1-1]
69
+ end
70
+ end
71
+
72
+ return_string = ''
73
+ (0..in_string.size-1).each do |i|
74
+ ch = in_string[i]
75
+ asc = ch.ord
76
+
77
+ if (asc >= 65 && asc <= 90) ||
78
+ (asc >= 48 && asc <= 57) ||
79
+ asc > 128 # A-Z, 0-9, and high order chars
80
+ return_string = "#{return_string}#{ch}"
81
+ elsif asc == 39 # '
82
+ # not keeping it
83
+ elsif asc == 38 && return_string.size > 0 # &
84
+ if return_string[return_string.size-1] != ' '
85
+ return_string = "#{return_string} AND "
86
+ else
87
+ return_string = "#{return_string}AND "
88
+ end
89
+ elsif asc == 43 # +
90
+ if return_string == 'A' || return_string == 'A '
91
+ return_string = 'A PLUS '
92
+ elsif return_string.size > 0
93
+ if return_string[return_string.size-1] != ' '
94
+ return_string = "#{return_string} AND "
95
+ else
96
+ return_string = "#{return_string}AND "
97
+ end
98
+ end
99
+ elsif return_string.size > 0 &&
100
+ return_string[return_string.size-1] != ' '
101
+ return_string = "#{return_string} "
102
+ end
103
+ # puts "#{i}#{ch}#{asc}:#{return_string}"
104
+ end
105
+
106
+ return '' if return_string.nil? || return_string == ''
107
+
108
+ str_replace return_string, ' AND ', ' & '
109
+
110
+ return_string.strip!
111
+ return_string = transform_company return_string
112
+ return_string.strip!
113
+ return_string.gsub!(' ','')
114
+ return_string
115
+ end
116
+
117
+ def transform_company res
118
+ res = " #{res} "
119
+ str_replace res, " THE ", " "
120
+ str_replace res, " ONE ", " 1 "
121
+ str_replace res, " TWO ", " 2 "
122
+ str_replace res, " TO ", " 2 "
123
+ str_replace res, " THREE ", " 3 "
124
+ str_replace res, " FOUR ", " 4 "
125
+ str_replace res, " FOR ", " 4 "
126
+ str_replace res, " FIVE ", " 5 "
127
+ str_replace res, " SIX ", " 6 "
128
+ str_replace res, " SEVEN ", " 7 "
129
+ str_replace res, " EIGHT ", " 8 "
130
+ str_replace res, " NINE ", " 9 "
131
+ str_replace res, " TEN ", " 10 "
132
+ str_replace res, " ELEVEN ", " 11 "
133
+
134
+ str_replace res, " FIRST ", " 1ST "
135
+ str_replace res, " SECOND ", " 2ND "
136
+ str_replace res, " THIRD ", " 3RD "
137
+ str_replace res, " FOURTH ", " 4TH "
138
+ str_replace res, " FIFTH ", " 5TH "
139
+ str_replace res, " SIXTH ", " 6TH "
140
+ str_replace res, " SEVENTH ", " 7TH "
141
+ str_replace res, " EIGHTH ", " 8TH "
142
+ str_replace res, " NINTH ", " 9TH "
143
+ str_replace res, " TENTH ", " 10TH "
144
+ str_replace res, " CENTRE ", " CTR "
145
+ str_replace res, " CENTER ", " CTR "
146
+ str_replace res, " CNTR ", " CTR "
147
+ str_replace res, " CENT ", " CTR "
148
+ str_replace res, " CENTR ", " CTR "
149
+ str_replace res, " AUTOMOTIVE ", " AUTO "
150
+ str_replace res, " AUTOMOBILE ", " AUTO "
151
+ str_replace res, " AUTOS ", " AUTO "
152
+ str_replace res, " AVENUE ", " AVE "
153
+ str_replace res, " DRIVE ", " DR "
154
+ str_replace res, " PHOTOGRAPHY ", " PHOTO "
155
+ str_replace res, " BROTHERS ", " BROS "
156
+ str_replace res, " TECHNOLOGY ", " TEC "
157
+ str_replace res, " TECH ", " TEC "
158
+ str_replace res, " TELEVISION ", " TV "
159
+ str_replace res, " INFORMATION ", " INFO "
160
+ str_replace res, " SOCIETY ", " SOC "
161
+ str_replace res, " DEPARTMENT ", " DEPT "
162
+ str_replace res, " REGIONAL ", " REG "
163
+ str_replace res, " REGION ", " REG "
164
+ str_replace res, " AUTHORITY ", " AUTH "
165
+ str_replace res, " NATIONAL ", " NATL "
166
+ str_replace res, " INTERNATIONAL ", " INT "
167
+ str_replace res, " INTERNATION ", " INT "
168
+ str_replace res, " INTL ", " INT "
169
+ str_replace res, " MARKETING ", " MKT "
170
+ str_replace res, " MKTG ", " MKT "
171
+ str_replace res, " MANAGEMENT ", " MGT "
172
+ str_replace res, " MGMT ", " MGT "
173
+
174
+ res.strip!
175
+ space_loc = res.index ' '
176
+
177
+ if space_loc && res.size > 3
178
+
179
+ # Check for "A" as the first word, and
180
+ # make sure that second word is not an initital or the word "PLUS"
181
+ # For example: "A C & R" do not remove "A"; "A TOUCH OF CLASS" remove the "A"
182
+ if res.start_with?('A ') &&
183
+ !res[2..res.size].start_with?('&') &&
184
+ !res[3..res.size].start_with?(' ') &&
185
+ !res[2..res.size].start_with?('PLUS')
186
+ res.gsub! 'A ', ''
187
+ end
188
+
189
+ # Remove last word if it is a company word or &
190
+ space_loc = res.index ' '
191
+ if space_loc
192
+ res_array = res.split(' ')
193
+ if is_company_word(res_array.last)
194
+ res_array.pop
195
+ if is_company_word(res_array.last) # Look at the new last word
196
+ res_array.pop
197
+ end
198
+ res = res_array.join(' ')
199
+ end
200
+
201
+ res = res[0..res.size-2] if res.end_with? '&'
202
+ end
203
+ end
204
+
205
+ res
206
+ end
207
+
208
+ def is_company_word(in_word)
209
+ if ["ADV","ADVERTISING","AGCY","AGENCY","AGY","ASC","ASS","ASSN","ASSOC","ASSOCIAT","ASSOCIATES","ASSOCIATION","ATTORNEY","ATTRNY","ATTY","ATY","AUTO","CO","COMP","COMPANIES","COMPANY","CORP","CORPORATION","CT","CONTRA","DEPARTMENT","DEPT","DIR","DIRECT","DIV","DIVISION","GROUP","HOLDINGS","INC","INCORPORATED","INT","LIMITED","LLC","LLP","LOCAL","LTD","PC","PLC","PROD","PRODS","PRODUCT","PRODUCTIONS","PRODUCTS","TR","TRADE"].include?(in_word)
210
+ true
211
+ else
212
+ false
213
+ end
214
+ end
215
+
216
+ # It's more than a gsub
217
+ def str_replace(orig, rep, with)
218
+ s = orig
219
+ while s.include?(rep)
220
+ s.sub! rep, with
221
+ end
222
+ orig
223
+ end
224
+ end
225
+ end
@@ -0,0 +1,62 @@
1
+ require 'minitest/autorun'
2
+ require 'consistent_company_ruby'
3
+
4
+ class ConsistentCompanyRubyTest < Minitest::Test
5
+ def test_namer
6
+ # don't change the calling string
7
+ str = ' my test '
8
+ _company = str.namer
9
+ assert_equal(' my test ', str)
10
+
11
+ # empty name
12
+ assert_equal("", "".namer)
13
+ assert_equal("", " ".namer)
14
+ assert_equal("", "___".namer)
15
+ # remove leading and trailing space
16
+ assert_equal('TEST', " test ".namer)
17
+ # remove embedded space
18
+ assert_equal('TEST', " te st ".namer)
19
+ # remove Company
20
+ assert_equal("MYTEST", "My Test Company".namer)
21
+ assert_equal("MYCOMPANYTEST", ("MY COMPANY TEST").namer)
22
+ assert_equal("MYTEST", ("MY TEST COMPANY COMP").namer)
23
+ # remove leading The
24
+ assert_equal("AAA", "The AAA Company".namer)
25
+ # remove punctuation
26
+ assert_equal("TESTERS", %q{The, ?%^* tester's company!}.namer)
27
+ # a very long name
28
+ assert_equal("A"*1000+"NAMEISHERE", (" A"*1000 + 'NAME IS HERE ').namer)
29
+ # parenthesis matching
30
+ assert_equal("BBEE", ("BB(xx)EE").namer)
31
+ assert_equal("BE", ("B(xx)E").namer)
32
+ assert_equal("XX", ("(xx)").namer)
33
+ assert_equal("BB", ("BB(xx").namer)
34
+ assert_equal("XX", ("(xx").namer)
35
+ assert_equal("BBMMEE", ("BB(xx)MM(xx)EE").namer)
36
+ assert_equal("BBEE", ("BB(xx(xx)xx)EE").namer)
37
+ assert_equal("BBMM", ("BB(xx)MM(xx").namer)
38
+ assert_equal("BB", ("BB(xx(xx)xx").namer)
39
+ assert_equal("BB", ("BB(xx(xx").namer)
40
+
41
+ # handle and &
42
+ assert_equal("PRE&POST", ("pre and post").namer)
43
+ assert_equal("PRE&POST", ("pre & post").namer)
44
+ assert_equal("PRE&POST", ("&pre and post&").namer)
45
+ assert_equal("PRE&POST", ("& pre and post &").namer)
46
+ assert_equal("ANDPRE&POSTAND", ("and pre and post and").namer)
47
+
48
+ # leading A
49
+ assert_equal("ABTEST", ("A B TEST").namer)
50
+ assert_equal("BTEST", ("A BTEST").namer)
51
+ assert_equal("APLUSTEST", ("A PLUS TEST").namer)
52
+ assert_equal("APLUSTEST", ("A + TEST").namer)
53
+ assert_equal("APLUSTEST", ("A+ TEST").namer)
54
+
55
+ # common name shortening
56
+ assert_equal("TESTCTRCTRCTR", ("Test Center Center Center").namer)
57
+ assert_equal("My Test Advertising Co".namer, "MY TEST ADV COMPANY".namer)
58
+
59
+ # rigorous checking
60
+ assert_equal "ABA123&&MCCDAFDS&&B", "aba 123 ~!@\#$%^& *()_+ <>? ,./ {} [] ;' : 'mccdafd s & and b".namer
61
+ end
62
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: consistent_company_ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Gene Wu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-08-19 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Written in Ruby. To normalize a company name.
14
+ email: genehk@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/consistent_company_ruby.rb
20
+ - lib/consistent_company_ruby/instance_methods.rb
21
+ - test/test_consistent_company_ruby.rb
22
+ homepage: http://github.com/gwu1/consistent_company_ruby
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.5.1
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Normalize company name
46
+ test_files:
47
+ - test/test_consistent_company_ruby.rb