consistent_company 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/consistent_company.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{consistent_company}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = [%q{Doug Cleven}]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-10-06}
|
13
13
|
s.description = %q{Ruby C Extension to normalize a company name. Useful when company names come from various sources.}
|
14
14
|
s.email = %q{dcleven@marketron.com}
|
15
15
|
s.extensions = [%q{ext/consistent_company/extconf.rb}]
|
@@ -6,9 +6,8 @@
|
|
6
6
|
|
7
7
|
char * TransformCompany(char * inString);
|
8
8
|
static int IsCompanyWord(char * inWord);
|
9
|
-
char *trimwhitespace(char *str);
|
10
|
-
char *
|
11
|
-
char *str_replace(char *orig, const char *rep, const char *with);
|
9
|
+
char * trimwhitespace(char *str);
|
10
|
+
char * str_replace(char *orig, const char *rep, const char *with);
|
12
11
|
|
13
12
|
static VALUE rb_ConsistentCompany_Init(VALUE self)
|
14
13
|
{
|
@@ -19,10 +18,12 @@ static VALUE rb_ConsistentCompany_Init(VALUE self)
|
|
19
18
|
static VALUE rb_CompanyNamer(VALUE self)
|
20
19
|
{
|
21
20
|
char * pSelf = RSTRING_PTR(self);
|
22
|
-
int selfLen = strlen(pSelf)+2;
|
23
|
-
int workLen = selfLen;
|
21
|
+
int selfLen = (int)strlen(pSelf)+2;
|
22
|
+
int workLen = (int)selfLen;
|
24
23
|
char * s = pSelf;
|
25
|
-
|
24
|
+
if (*pSelf == '\0')
|
25
|
+
return self;
|
26
|
+
|
26
27
|
// calc size of work strings
|
27
28
|
// while processing we turn & = AND, + = PLUS
|
28
29
|
// and we add space at front and back
|
@@ -31,7 +32,7 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
31
32
|
workLen +=3; // worst case we add 3 chars
|
32
33
|
s++;
|
33
34
|
}
|
34
|
-
workLen +=
|
35
|
+
workLen += 90; // add space front and back
|
35
36
|
//////////////
|
36
37
|
|
37
38
|
// for company only
|
@@ -50,7 +51,7 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
50
51
|
inString[i] = toupper( inString[i] );
|
51
52
|
|
52
53
|
inString = trimwhitespace(inString);
|
53
|
-
|
54
|
+
int len = (int)strlen(inString);
|
54
55
|
for (i = 0; i < len; i++)
|
55
56
|
{
|
56
57
|
if (inString[i] == '(')
|
@@ -79,7 +80,7 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
79
80
|
{
|
80
81
|
// ..(xx)..
|
81
82
|
inString[left1++] = ' ';
|
82
|
-
|
83
|
+
memmove(&inString[left1], &inString[right1+1], strlen(inString+right1+1)+1);
|
83
84
|
}
|
84
85
|
else
|
85
86
|
// ..(xx
|
@@ -91,21 +92,21 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
91
92
|
{
|
92
93
|
// ..(xx)..(xx)..
|
93
94
|
inString[left1] = ' ';
|
94
|
-
|
95
|
+
memmove(inString + left1 + 1, inString + right1 + 1, left2-right1-1);
|
95
96
|
inString[left1+1+left2-right1-1] = ' ';
|
96
|
-
|
97
|
+
memmove(inString+left1+1+left2-right1, inString + right2+1, strlen(inString+right2+1)+1);
|
97
98
|
}
|
98
99
|
else if ((left1 < left2) && (left2 < right1) && (right1 < right2))
|
99
100
|
{
|
100
101
|
// ..(xx(xx)xx)..
|
101
102
|
inString[left1] = ' ';
|
102
|
-
|
103
|
+
memmove(inString+left1+1, inString+right2+1, strlen(inString+right2+1)+1);
|
103
104
|
}
|
104
105
|
else if ((left1 < right1) && (right1 < left2) && (right2 == -1))
|
105
106
|
{
|
106
107
|
// ..(xx)..(xx
|
107
108
|
inString[left1] = ' ';
|
108
|
-
|
109
|
+
memmove(inString+left1+1, inString+right1+1, left2-right1-1);
|
109
110
|
inString[left1+1+left2-right1] = '\0';
|
110
111
|
}
|
111
112
|
else if ((left1 < left2) && (left2 < right1) && (right2 == -1))
|
@@ -122,7 +123,7 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
122
123
|
char singleCharStr[2];
|
123
124
|
singleCharStr[1] = '\0';
|
124
125
|
returnString[0] = '\0';
|
125
|
-
for (i = 0; i < strlen(inString); i++)
|
126
|
+
for (i = 0; i < (int)strlen(inString); i++)
|
126
127
|
{
|
127
128
|
ch = inString[i];
|
128
129
|
asc = (int)ch;
|
@@ -161,19 +162,11 @@ static VALUE rb_CompanyNamer(VALUE self)
|
|
161
162
|
strcat(returnString, " ");
|
162
163
|
}
|
163
164
|
}
|
164
|
-
|
165
|
-
// {
|
166
|
-
// char buff[200];
|
167
|
-
// sprintf(buff, "workLen %d %s workString %d returnString %d %s", workLen, pSelf, strlen(workString), strlen(returnString), returnString);
|
168
|
-
// return rb_str_new2(trimwhitespace(buff));
|
169
|
-
// }
|
170
|
-
char * p;
|
165
|
+
|
171
166
|
str_replace(returnString, " AND ", " & ");
|
172
167
|
|
173
|
-
int oldLen = strlen(returnString);
|
174
|
-
// returnString = trimsuffix(returnString, "s");
|
175
168
|
returnString = trimwhitespace(returnString);
|
176
|
-
|
169
|
+
returnString = TransformCompany(returnString);
|
177
170
|
VALUE return_value = rb_str_new2(trimwhitespace(returnString));
|
178
171
|
free(returnString);
|
179
172
|
free(workString);
|
@@ -190,16 +183,15 @@ FIRST FEDERAL SAVINGS becomes 1ST FEDERAL SAVINGS
|
|
190
183
|
char * TransformCompany(char * resultString)
|
191
184
|
{
|
192
185
|
// resultString should have been allocated with 2 extra char for our padding here
|
193
|
-
char * buf = malloc(strlen(resultString)+
|
186
|
+
char * buf = malloc(strlen(resultString)+30);
|
194
187
|
strcpy(buf, " ");
|
195
188
|
strcat(buf,resultString);
|
196
189
|
strcat(buf, " ");
|
197
190
|
strcpy(resultString, buf);
|
198
191
|
free(buf);
|
199
|
-
|
192
|
+
|
200
193
|
char * spaceLoc;
|
201
194
|
char * s = resultString;
|
202
|
-
|
203
195
|
str_replace(s, " THE ", " ");
|
204
196
|
str_replace(s, " ONE ", " 1 ");
|
205
197
|
str_replace(s, " TWO ", " 2 ");
|
@@ -228,7 +220,6 @@ char * TransformCompany(char * resultString)
|
|
228
220
|
str_replace(s, " CENTRE ", " CTR ");
|
229
221
|
str_replace(s, " CENTER ", " CTR ");
|
230
222
|
str_replace(s, " CNTR ", " CTR ");
|
231
|
-
str_replace(s, " CTR ", " CTR ");
|
232
223
|
str_replace(s, " CENT ", " CTR ");
|
233
224
|
str_replace(s, " CENTR ", " CTR ");
|
234
225
|
str_replace(s, " AUTOMOTIVE ", " AUTO ");
|
@@ -251,11 +242,12 @@ char * TransformCompany(char * resultString)
|
|
251
242
|
str_replace(s, " INTERNATIONAL ", " INT ");
|
252
243
|
str_replace(s, " INTERNATION ", " INT ");
|
253
244
|
str_replace(s, " INTL ", " INT ");
|
254
|
-
str_replace(s, " MARKETING ", "
|
245
|
+
str_replace(s, " MARKETING ", " MKT ");
|
246
|
+
str_replace(s, " MKTG ", " MKT ");
|
255
247
|
str_replace(s, " MANAGEMENT ", " MGT ");
|
256
248
|
str_replace(s, " MGMT ", " MGT ");
|
257
249
|
|
258
|
-
s = trimwhitespace(s);
|
250
|
+
s = trimwhitespace(s);
|
259
251
|
spaceLoc = strstr(s, " ");
|
260
252
|
//spaceLoc = resultString.IndexOf(" ");
|
261
253
|
if (spaceLoc && strlen(s) > 3) // More than one word and more than 3 chars
|
@@ -268,7 +260,7 @@ char * TransformCompany(char * resultString)
|
|
268
260
|
strncmp(s+3, " ", 1) != 0 &&
|
269
261
|
strstr(s, "PLUS") != s + 2)
|
270
262
|
{
|
271
|
-
|
263
|
+
memmove(s, s+2, strlen(s+2)+1);
|
272
264
|
}
|
273
265
|
|
274
266
|
spaceLoc = strrchr(s, ' ');
|
@@ -283,6 +275,8 @@ char * TransformCompany(char * resultString)
|
|
283
275
|
spaceLoc = strrchr(s, ' ');
|
284
276
|
if (spaceLoc) // Look at the new last word
|
285
277
|
{
|
278
|
+
free(lastWord);
|
279
|
+
lastWord = malloc(strlen(spaceLoc)+1);
|
286
280
|
strcpy(lastWord, spaceLoc + 1);
|
287
281
|
if (IsCompanyWord(lastWord))
|
288
282
|
{
|
@@ -295,8 +289,7 @@ char * TransformCompany(char * resultString)
|
|
295
289
|
if (s[strlen(s)-1] == '&')
|
296
290
|
s[strlen(s)-1] = '\0';
|
297
291
|
}
|
298
|
-
|
299
|
-
str_replace(s, " ", "");
|
292
|
+
s = str_replace(s, " ", "");
|
300
293
|
return s;
|
301
294
|
}
|
302
295
|
|
@@ -367,73 +360,41 @@ int IsCompanyWord(char * inWord)
|
|
367
360
|
|
368
361
|
/*
|
369
362
|
Trim whitespace from front and back of string
|
363
|
+
return the same ptr as received, move the non-whitespace chars
|
364
|
+
to the front and trim the end with \0
|
370
365
|
*/
|
371
|
-
char *trimwhitespace(char *str)
|
366
|
+
char * trimwhitespace(char *str)
|
372
367
|
{
|
373
|
-
|
368
|
+
char *end;
|
369
|
+
char *start = str;
|
370
|
+
// Trim leading space
|
371
|
+
while(isspace(*str)) str++;
|
374
372
|
|
375
|
-
//
|
376
|
-
while(isspace(*str)) str++;
|
377
|
-
|
378
|
-
if(*str == 0) // All spaces?
|
379
|
-
return str;
|
380
|
-
|
381
|
-
// Trim trailing space
|
382
|
-
end = str + strlen(str) - 1;
|
383
|
-
while(end > str && isspace(*end)) end--;
|
384
|
-
|
385
|
-
// Write new null terminator
|
386
|
-
*(end+1) = 0;
|
387
|
-
|
388
|
-
return str;
|
389
|
-
}
|
390
|
-
|
391
|
-
char *trimsuffix(char *str, const char *suffix)
|
392
|
-
{
|
393
|
-
char delims[] = " ";
|
394
|
-
char *result = NULL;
|
395
|
-
char *workString = malloc(strlen(str)+3);
|
396
|
-
char *workBuffer = malloc(strlen(str)+3);
|
397
|
-
strcpy(workString, str);
|
398
|
-
str[0] = '\0';
|
399
|
-
result = strtok(workString, delims);
|
400
|
-
while(result != NULL)
|
373
|
+
if(*str == 0) // All spaces?
|
401
374
|
{
|
402
|
-
|
403
|
-
|
404
|
-
if (len > 3)
|
405
|
-
{
|
406
|
-
if (workBuffer[len-1] == 'S')
|
407
|
-
{
|
408
|
-
char * p = strstr(workBuffer, "IES");
|
409
|
-
if (p && p[3] == '\0' && strcmp(workBuffer, "SERIES") != 0)
|
410
|
-
{
|
411
|
-
*p = 'Y';
|
412
|
-
*++p = '\0';
|
413
|
-
}
|
414
|
-
if (strcmp(workBuffer, "PLUS") != 0)
|
415
|
-
workBuffer[len-1] = '\0';
|
416
|
-
}
|
417
|
-
}
|
418
|
-
strcat(str, workBuffer);
|
419
|
-
result = strtok( NULL, delims );
|
420
|
-
if (result)
|
421
|
-
strcat(str, " ");
|
375
|
+
*start = '\0';
|
376
|
+
return start;
|
422
377
|
}
|
423
|
-
|
424
|
-
|
425
|
-
|
378
|
+
// Trim trailing space
|
379
|
+
end = str + strlen(str) - 1;
|
380
|
+
while(end > str && isspace(*end)) end--;
|
381
|
+
|
382
|
+
// Write new null terminator
|
383
|
+
*(end+1) = 0;
|
384
|
+
memmove(start, str, strlen(str)+1);
|
385
|
+
|
386
|
+
return start;
|
426
387
|
}
|
427
388
|
|
428
389
|
|
429
390
|
// !!!! This ONLY works where rep is longer than with
|
430
|
-
char *str_replace(char *orig, const char *rep, const char *with)
|
391
|
+
char * str_replace(char *orig, const char *rep, const char *with)
|
431
392
|
{
|
432
393
|
char * s = orig;
|
433
394
|
while (s=strstr(s, rep))
|
434
395
|
{
|
435
|
-
|
436
|
-
|
396
|
+
memmove(s, with, strlen(with));
|
397
|
+
memmove(s+strlen(with), s+strlen(rep), strlen(s+strlen(rep))+1);
|
437
398
|
s = s + strlen(with)-1;
|
438
399
|
}
|
439
400
|
return orig;
|
Binary file
|
@@ -9,6 +9,10 @@ class TestConsistentCompany < Test::Unit::TestCase
|
|
9
9
|
str = ' my test '
|
10
10
|
company = str.company_namer
|
11
11
|
assert_equal(' my test ', str)
|
12
|
+
# empty name
|
13
|
+
assert_equal("", "".company_namer)
|
14
|
+
assert_equal("", " ".company_namer)
|
15
|
+
assert_equal("", "___".company_namer)
|
12
16
|
# remove leading and trailing space
|
13
17
|
assert_equal('TEST', " test ".company_namer)
|
14
18
|
# remove embedded space
|
@@ -21,8 +25,6 @@ class TestConsistentCompany < Test::Unit::TestCase
|
|
21
25
|
assert_equal("AAA", "The AAA Company".company_namer)
|
22
26
|
# remove punctuation
|
23
27
|
assert_equal("TESTERS", %q{The, ?%^* tester's company!}.company_namer)
|
24
|
-
# empty name
|
25
|
-
assert_equal("", "".company_namer)
|
26
28
|
# a very long name
|
27
29
|
assert_equal("A"*1000+"NAMEISHERE", (" A"*1000 + 'NAME IS HERE ').company_namer)
|
28
30
|
# parenthesis matching
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: consistent_company
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-10-06 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: shoulda
|
16
|
-
requirement: &
|
16
|
+
requirement: &70092755072840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70092755072840
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bundler
|
27
|
-
requirement: &
|
27
|
+
requirement: &70092755071920 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.0.18
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70092755071920
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: jeweler
|
38
|
-
requirement: &
|
38
|
+
requirement: &70092755070940 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.6.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70092755070940
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rcov
|
49
|
-
requirement: &
|
49
|
+
requirement: &70092755070060 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70092755070060
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: pry
|
60
|
-
requirement: &
|
60
|
+
requirement: &70092755068760 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70092755068760
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rake-compiler
|
71
|
-
requirement: &
|
71
|
+
requirement: &70092755067580 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: 0.7.6
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70092755067580
|
80
80
|
description: Ruby C Extension to normalize a company name. Useful when company names
|
81
81
|
come from various sources.
|
82
82
|
email: dcleven@marketron.com
|
@@ -117,7 +117,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
117
117
|
version: '0'
|
118
118
|
segments:
|
119
119
|
- 0
|
120
|
-
hash:
|
120
|
+
hash: 3143384625833077520
|
121
121
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
122
|
none: false
|
123
123
|
requirements:
|