consistent_company 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/consistent_company.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{consistent_company}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = [%q{Doug Cleven}]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-10-06}
|
13
13
|
s.description = %q{Ruby C Extension to normalize a company name. Useful when company names come from various sources.}
|
14
14
|
s.email = %q{dcleven@marketron.com}
|
15
15
|
s.extensions = [%q{ext/consistent_company/extconf.rb}]
|
@@ -6,9 +6,8 @@
|
|
6
6
|
|
7
7
|
char * TransformCompany(char * inString);
|
8
8
|
static int IsCompanyWord(char * inWord);
|
9
|
-
char *trimwhitespace(char *str);
|
10
|
-
char *
|
11
|
-
char *str_replace(char *orig, const char *rep, const char *with);
|
9
|
+
char * trimwhitespace(char *str);
|
10
|
+
char * str_replace(char *orig, const char *rep, const char *with);
|
12
11
|
|
13
12
|
static VALUE rb_ConsistentCompany_Init(VALUE self)
|
14
13
|
{
|
@@ -19,10 +18,12 @@ static VALUE rb_ConsistentCompany_Init(VALUE self)
|
|
19
18
|
static VALUE rb_CompanyNamer(VALUE self)
|
20
19
|
{
|
21
20
|
char * pSelf = RSTRING_PTR(self);
|
22
|
-
int selfLen = strlen(pSelf)+2;
|
23
|
-
int workLen = selfLen;
|
21
|
+
int selfLen = (int)strlen(pSelf)+2;
|
22
|
+
int workLen = (int)selfLen;
|
24
23
|
char * s = pSelf;
|
25
|
-
|
24
|
+
if (*pSelf == '\0')
|
25
|
+
return self;
|
26
|
+
|
26
27
|
// calc size of work strings
|
27
28
|
// while processing we turn & = AND, + = PLUS
|
28
29
|
// and we add space at front and back
|
@@ -31,7 +32,7 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
31
32
|
workLen +=3; // worst case we add 3 chars
|
32
33
|
s++;
|
33
34
|
}
|
34
|
-
workLen +=
|
35
|
+
workLen += 90; // add space front and back
|
35
36
|
//////////////
|
36
37
|
|
37
38
|
// for company only
|
@@ -50,7 +51,7 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
50
51
|
inString[i] = toupper( inString[i] );
|
51
52
|
|
52
53
|
inString = trimwhitespace(inString);
|
53
|
-
|
54
|
+
int len = (int)strlen(inString);
|
54
55
|
for (i = 0; i < len; i++)
|
55
56
|
{
|
56
57
|
if (inString[i] == '(')
|
@@ -79,7 +80,7 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
79
80
|
{
|
80
81
|
// ..(xx)..
|
81
82
|
inString[left1++] = ' ';
|
82
|
-
|
83
|
+
memmove(&inString[left1], &inString[right1+1], strlen(inString+right1+1)+1);
|
83
84
|
}
|
84
85
|
else
|
85
86
|
// ..(xx
|
@@ -91,21 +92,21 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
91
92
|
{
|
92
93
|
// ..(xx)..(xx)..
|
93
94
|
inString[left1] = ' ';
|
94
|
-
|
95
|
+
memmove(inString + left1 + 1, inString + right1 + 1, left2-right1-1);
|
95
96
|
inString[left1+1+left2-right1-1] = ' ';
|
96
|
-
|
97
|
+
memmove(inString+left1+1+left2-right1, inString + right2+1, strlen(inString+right2+1)+1);
|
97
98
|
}
|
98
99
|
else if ((left1 < left2) && (left2 < right1) && (right1 < right2))
|
99
100
|
{
|
100
101
|
// ..(xx(xx)xx)..
|
101
102
|
inString[left1] = ' ';
|
102
|
-
|
103
|
+
memmove(inString+left1+1, inString+right2+1, strlen(inString+right2+1)+1);
|
103
104
|
}
|
104
105
|
else if ((left1 < right1) && (right1 < left2) && (right2 == -1))
|
105
106
|
{
|
106
107
|
// ..(xx)..(xx
|
107
108
|
inString[left1] = ' ';
|
108
|
-
|
109
|
+
memmove(inString+left1+1, inString+right1+1, left2-right1-1);
|
109
110
|
inString[left1+1+left2-right1] = '\0';
|
110
111
|
}
|
111
112
|
else if ((left1 < left2) && (left2 < right1) && (right2 == -1))
|
@@ -122,7 +123,7 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
122
123
|
char singleCharStr[2];
|
123
124
|
singleCharStr[1] = '\0';
|
124
125
|
returnString[0] = '\0';
|
125
|
-
for (i = 0; i < strlen(inString); i++)
|
126
|
+
for (i = 0; i < (int)strlen(inString); i++)
|
126
127
|
{
|
127
128
|
ch = inString[i];
|
128
129
|
asc = (int)ch;
|
@@ -161,19 +162,11 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
161
162
|
strcat(returnString, " ");
|
162
163
|
}
|
163
164
|
}
|
164
|
-
|
165
|
-
// {
|
166
|
-
// char buff[200];
|
167
|
-
// sprintf(buff, "workLen %d %s workString %d returnString %d %s", workLen, pSelf, strlen(workString), strlen(returnString), returnString);
|
168
|
-
// return rb_str_new2(trimwhitespace(buff));
|
169
|
-
// }
|
170
|
-
char * p;
|
165
|
+
|
171
166
|
str_replace(returnString, " AND ", " & ");
|
172
167
|
|
173
|
-
int oldLen = strlen(returnString);
|
174
|
-
// returnString = trimsuffix(returnString, "s");
|
175
168
|
returnString = trimwhitespace(returnString);
|
176
|
-
|
169
|
+
returnString = TransformCompany(returnString);
|
177
170
|
VALUE return_value = rb_str_new2(trimwhitespace(returnString));
|
178
171
|
free(returnString);
|
179
172
|
free(workString);
|
@@ -190,16 +183,15 @@ FIRST FEDERAL SAVINGS becomes 1ST FEDERAL SAVINGS
|
|
190
183
|
char * TransformCompany(char * resultString)
|
191
184
|
{
|
192
185
|
// resultString should have been allocated with 2 extra char for our padding here
|
193
|
-
char * buf = malloc(strlen(resultString)+
|
186
|
+
char * buf = malloc(strlen(resultString)+30);
|
194
187
|
strcpy(buf, " ");
|
195
188
|
strcat(buf,resultString);
|
196
189
|
strcat(buf, " ");
|
197
190
|
strcpy(resultString, buf);
|
198
191
|
free(buf);
|
199
|
-
|
192
|
+
|
200
193
|
char * spaceLoc;
|
201
194
|
char * s = resultString;
|
202
|
-
|
203
195
|
str_replace(s, " THE ", " ");
|
204
196
|
str_replace(s, " ONE ", " 1 ");
|
205
197
|
str_replace(s, " TWO ", " 2 ");
|
@@ -228,7 +220,6 @@ char * TransformCompany(char * resultString)
|
|
228
220
|
str_replace(s, " CENTRE ", " CTR ");
|
229
221
|
str_replace(s, " CENTER ", " CTR ");
|
230
222
|
str_replace(s, " CNTR ", " CTR ");
|
231
|
-
str_replace(s, " CTR ", " CTR ");
|
232
223
|
str_replace(s, " CENT ", " CTR ");
|
233
224
|
str_replace(s, " CENTR ", " CTR ");
|
234
225
|
str_replace(s, " AUTOMOTIVE ", " AUTO ");
|
@@ -251,11 +242,12 @@ char * TransformCompany(char * resultString)
|
|
251
242
|
str_replace(s, " INTERNATIONAL ", " INT ");
|
252
243
|
str_replace(s, " INTERNATION ", " INT ");
|
253
244
|
str_replace(s, " INTL ", " INT ");
|
254
|
-
str_replace(s, " MARKETING ", "
|
245
|
+
str_replace(s, " MARKETING ", " MKT ");
|
246
|
+
str_replace(s, " MKTG ", " MKT ");
|
255
247
|
str_replace(s, " MANAGEMENT ", " MGT ");
|
256
248
|
str_replace(s, " MGMT ", " MGT ");
|
257
249
|
|
258
|
-
s = trimwhitespace(s);
|
250
|
+
s = trimwhitespace(s);
|
259
251
|
spaceLoc = strstr(s, " ");
|
260
252
|
//spaceLoc = resultString.IndexOf(" ");
|
261
253
|
if (spaceLoc && strlen(s) > 3) // More than one word and more than 3 chars
|
@@ -268,7 +260,7 @@ char * TransformCompany(char * resultString)
|
|
268
260
|
strncmp(s+3, " ", 1) != 0 &&
|
269
261
|
strstr(s, "PLUS") != s + 2)
|
270
262
|
{
|
271
|
-
|
263
|
+
memmove(s, s+2, strlen(s+2)+1);
|
272
264
|
}
|
273
265
|
|
274
266
|
spaceLoc = strrchr(s, ' ');
|
@@ -283,6 +275,8 @@ char * TransformCompany(char * resultString)
|
|
283
275
|
spaceLoc = strrchr(s, ' ');
|
284
276
|
if (spaceLoc) // Look at the new last word
|
285
277
|
{
|
278
|
+
free(lastWord);
|
279
|
+
lastWord = malloc(strlen(spaceLoc)+1);
|
286
280
|
strcpy(lastWord, spaceLoc + 1);
|
287
281
|
if (IsCompanyWord(lastWord))
|
288
282
|
{
|
@@ -295,8 +289,7 @@ char * TransformCompany(char * resultString)
|
|
295
289
|
if (s[strlen(s)-1] == '&')
|
296
290
|
s[strlen(s)-1] = '\0';
|
297
291
|
}
|
298
|
-
|
299
|
-
str_replace(s, " ", "");
|
292
|
+
s = str_replace(s, " ", "");
|
300
293
|
return s;
|
301
294
|
}
|
302
295
|
|
@@ -367,73 +360,41 @@ int IsCompanyWord(char * inWord)
|
|
367
360
|
|
368
361
|
/*
|
369
362
|
Trim whitespace from front and back of string
|
363
|
+
return the same ptr as received, move the non-whitespace chars
|
364
|
+
to the front and trim the end with \0
|
370
365
|
*/
|
371
|
-
char *trimwhitespace(char *str)
|
366
|
+
char * trimwhitespace(char *str)
|
372
367
|
{
|
373
|
-
|
368
|
+
char *end;
|
369
|
+
char *start = str;
|
370
|
+
// Trim leading space
|
371
|
+
while(isspace(*str)) str++;
|
374
372
|
|
375
|
-
//
|
376
|
-
while(isspace(*str)) str++;
|
377
|
-
|
378
|
-
if(*str == 0) // All spaces?
|
379
|
-
return str;
|
380
|
-
|
381
|
-
// Trim trailing space
|
382
|
-
end = str + strlen(str) - 1;
|
383
|
-
while(end > str && isspace(*end)) end--;
|
384
|
-
|
385
|
-
// Write new null terminator
|
386
|
-
*(end+1) = 0;
|
387
|
-
|
388
|
-
return str;
|
389
|
-
}
|
390
|
-
|
391
|
-
char *trimsuffix(char *str, const char *suffix)
|
392
|
-
{
|
393
|
-
char delims[] = " ";
|
394
|
-
char *result = NULL;
|
395
|
-
char *workString = malloc(strlen(str)+3);
|
396
|
-
char *workBuffer = malloc(strlen(str)+3);
|
397
|
-
strcpy(workString, str);
|
398
|
-
str[0] = '\0';
|
399
|
-
result = strtok(workString, delims);
|
400
|
-
while(result != NULL)
|
373
|
+
if(*str == 0) // All spaces?
|
401
374
|
{
|
402
|
-
|
403
|
-
|
404
|
-
if (len > 3)
|
405
|
-
{
|
406
|
-
if (workBuffer[len-1] == 'S')
|
407
|
-
{
|
408
|
-
char * p = strstr(workBuffer, "IES");
|
409
|
-
if (p && p[3] == '\0' && strcmp(workBuffer, "SERIES") != 0)
|
410
|
-
{
|
411
|
-
*p = 'Y';
|
412
|
-
*++p = '\0';
|
413
|
-
}
|
414
|
-
if (strcmp(workBuffer, "PLUS") != 0)
|
415
|
-
workBuffer[len-1] = '\0';
|
416
|
-
}
|
417
|
-
}
|
418
|
-
strcat(str, workBuffer);
|
419
|
-
result = strtok( NULL, delims );
|
420
|
-
if (result)
|
421
|
-
strcat(str, " ");
|
375
|
+
*start = '\0';
|
376
|
+
return start;
|
422
377
|
}
|
423
|
-
|
424
|
-
|
425
|
-
|
378
|
+
// Trim trailing space
|
379
|
+
end = str + strlen(str) - 1;
|
380
|
+
while(end > str && isspace(*end)) end--;
|
381
|
+
|
382
|
+
// Write new null terminator
|
383
|
+
*(end+1) = 0;
|
384
|
+
memmove(start, str, strlen(str)+1);
|
385
|
+
|
386
|
+
return start;
|
426
387
|
}
|
427
388
|
|
428
389
|
|
429
390
|
// !!!! This ONLY works where rep is longer than with
|
430
|
-
char *str_replace(char *orig, const char *rep, const char *with)
|
391
|
+
char * str_replace(char *orig, const char *rep, const char *with)
|
431
392
|
{
|
432
393
|
char * s = orig;
|
433
394
|
while (s=strstr(s, rep))
|
434
395
|
{
|
435
|
-
|
436
|
-
|
396
|
+
memmove(s, with, strlen(with));
|
397
|
+
memmove(s+strlen(with), s+strlen(rep), strlen(s+strlen(rep))+1);
|
437
398
|
s = s + strlen(with)-1;
|
438
399
|
}
|
439
400
|
return orig;
|
Binary file
|
@@ -9,6 +9,10 @@ class TestConsistentCompany < Test::Unit::TestCase
|
|
9
9
|
str = ' my test '
|
10
10
|
company = str.company_namer
|
11
11
|
assert_equal(' my test ', str)
|
12
|
+
# empty name
|
13
|
+
assert_equal("", "".company_namer)
|
14
|
+
assert_equal("", " ".company_namer)
|
15
|
+
assert_equal("", "___".company_namer)
|
12
16
|
# remove leading and trailing space
|
13
17
|
assert_equal('TEST', " test ".company_namer)
|
14
18
|
# remove embedded space
|
@@ -21,8 +25,6 @@ class TestConsistentCompany < Test::Unit::TestCase
|
|
21
25
|
assert_equal("AAA", "The AAA Company".company_namer)
|
22
26
|
# remove punctuation
|
23
27
|
assert_equal("TESTERS", %q{The, ?%^* tester's company!}.company_namer)
|
24
|
-
# empty name
|
25
|
-
assert_equal("", "".company_namer)
|
26
28
|
# a very long name
|
27
29
|
assert_equal("A"*1000+"NAMEISHERE", (" A"*1000 + 'NAME IS HERE ').company_namer)
|
28
30
|
# parenthesis matching
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: consistent_company
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-10-06 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: shoulda
|
16
|
-
requirement: &
|
16
|
+
requirement: &70092755072840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70092755072840
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bundler
|
27
|
-
requirement: &
|
27
|
+
requirement: &70092755071920 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.0.18
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70092755071920
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: jeweler
|
38
|
-
requirement: &
|
38
|
+
requirement: &70092755070940 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.6.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70092755070940
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rcov
|
49
|
-
requirement: &
|
49
|
+
requirement: &70092755070060 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70092755070060
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: pry
|
60
|
-
requirement: &
|
60
|
+
requirement: &70092755068760 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70092755068760
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rake-compiler
|
71
|
-
requirement: &
|
71
|
+
requirement: &70092755067580 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: 0.7.6
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70092755067580
|
80
80
|
description: Ruby C Extension to normalize a company name. Useful when company names
|
81
81
|
come from various sources.
|
82
82
|
email: dcleven@marketron.com
|
@@ -117,7 +117,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
117
117
|
version: '0'
|
118
118
|
segments:
|
119
119
|
- 0
|
120
|
-
hash:
|
120
|
+
hash: 3143384625833077520
|
121
121
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
122
|
none: false
|
123
123
|
requirements:
|