geo_coder 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +12 -0
- data/Gemfile.lock +32 -0
- data/History.txt +6 -0
- data/Makefile +13 -0
- data/Manifest.txt +18 -0
- data/README.rdoc +197 -0
- data/Rakefile +53 -0
- data/TODO.txt +8 -0
- data/VERSION +1 -0
- data/bin/build_indexes +8 -0
- data/bin/rebuild_cluster +22 -0
- data/bin/rebuild_metaphones +23 -0
- data/bin/tiger_import +59 -0
- data/demos/demo/app/ext/geocodewrap.rb +84 -0
- data/demos/demo/app/views/index.builder +13 -0
- data/demos/demo/app/views/index.erb +71 -0
- data/demos/demo/config.ru +12 -0
- data/demos/demo/config/bootstraps.rb +130 -0
- data/demos/demo/config/geoenvironment.rb +25 -0
- data/demos/demo/geocoder_helper.rb +12 -0
- data/demos/demo/geocom_geocode.rb +10 -0
- data/demos/demo/main.rb +3 -0
- data/demos/demo/rakefile.rb +17 -0
- data/demos/demo/tmp/restart.txt +0 -0
- data/demos/simpledemo/views/index.builder +13 -0
- data/demos/simpledemo/views/index.erb +69 -0
- data/demos/simpledemo/ws.rb +83 -0
- data/doc/Makefile +7 -0
- data/doc/html4css1.css +279 -0
- data/doc/lookup.rst +193 -0
- data/doc/parsing.rst +125 -0
- data/doc/voidspace.css +147 -0
- data/geo_coder.gemspec +172 -0
- data/lib/geocoder/us.rb +21 -0
- data/lib/geocoder/us/address.rb +290 -0
- data/lib/geocoder/us/constants.rb +670 -0
- data/lib/geocoder/us/database.rb +745 -0
- data/lib/geocoder/us/import.rb +181 -0
- data/lib/geocoder/us/import/tiger.rb +13 -0
- data/lib/geocoder/us/numbers.rb +58 -0
- data/navteq/README +4 -0
- data/navteq/convert.sql +37 -0
- data/navteq/navteq_import +39 -0
- data/navteq/prepare.sql +92 -0
- data/sql/cluster.sql +16 -0
- data/sql/convert.sql +80 -0
- data/sql/create.sql +37 -0
- data/sql/index.sql +12 -0
- data/sql/place.csv +104944 -0
- data/sql/place.sql +104948 -0
- data/sql/setup.sql +78 -0
- data/src/Makefile +13 -0
- data/src/README +14 -0
- data/src/liblwgeom/Makefile +75 -0
- data/src/liblwgeom/box2d.c +54 -0
- data/src/liblwgeom/lex.yy.c +4799 -0
- data/src/liblwgeom/liblwgeom.h +1405 -0
- data/src/liblwgeom/lwalgorithm.c +946 -0
- data/src/liblwgeom/lwalgorithm.h +52 -0
- data/src/liblwgeom/lwcircstring.c +759 -0
- data/src/liblwgeom/lwcollection.c +541 -0
- data/src/liblwgeom/lwcompound.c +118 -0
- data/src/liblwgeom/lwcurvepoly.c +86 -0
- data/src/liblwgeom/lwgeom.c +886 -0
- data/src/liblwgeom/lwgeom_api.c +2201 -0
- data/src/liblwgeom/lwgparse.c +1219 -0
- data/src/liblwgeom/lwgunparse.c +1054 -0
- data/src/liblwgeom/lwline.c +525 -0
- data/src/liblwgeom/lwmcurve.c +125 -0
- data/src/liblwgeom/lwmline.c +137 -0
- data/src/liblwgeom/lwmpoint.c +138 -0
- data/src/liblwgeom/lwmpoly.c +141 -0
- data/src/liblwgeom/lwmsurface.c +129 -0
- data/src/liblwgeom/lwpoint.c +439 -0
- data/src/liblwgeom/lwpoly.c +579 -0
- data/src/liblwgeom/lwsegmentize.c +1047 -0
- data/src/liblwgeom/lwutil.c +369 -0
- data/src/liblwgeom/measures.c +861 -0
- data/src/liblwgeom/postgis_config.h +93 -0
- data/src/liblwgeom/ptarray.c +847 -0
- data/src/liblwgeom/vsprintf.c +179 -0
- data/src/liblwgeom/wktparse.h +126 -0
- data/src/liblwgeom/wktparse.lex +74 -0
- data/src/liblwgeom/wktparse.tab.c +2353 -0
- data/src/liblwgeom/wktparse.tab.h +145 -0
- data/src/liblwgeom/wktparse.y +385 -0
- data/src/libsqlite3_geocoder/Makefile +22 -0
- data/src/libsqlite3_geocoder/Makefile.nix +15 -0
- data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
- data/src/libsqlite3_geocoder/extension.c +121 -0
- data/src/libsqlite3_geocoder/extension.h +13 -0
- data/src/libsqlite3_geocoder/levenshtein.c +42 -0
- data/src/libsqlite3_geocoder/metaphon.c +278 -0
- data/src/libsqlite3_geocoder/util.c +37 -0
- data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
- data/src/metaphone/Makefile +7 -0
- data/src/metaphone/README +49 -0
- data/src/metaphone/extension.c +37 -0
- data/src/metaphone/metaphon.c +251 -0
- data/src/shp2sqlite/Makefile +37 -0
- data/src/shp2sqlite/Makefile.nix +36 -0
- data/src/shp2sqlite/Makefile.redhat +35 -0
- data/src/shp2sqlite/dbfopen.c +1595 -0
- data/src/shp2sqlite/getopt.c +695 -0
- data/src/shp2sqlite/getopt.h +127 -0
- data/src/shp2sqlite/shapefil.h +500 -0
- data/src/shp2sqlite/shp2sqlite.c +1974 -0
- data/src/shp2sqlite/shpopen.c +1894 -0
- data/tests/address.rb +236 -0
- data/tests/benchmark.rb +20 -0
- data/tests/constants.rb +57 -0
- data/tests/data/address-sample.csv +52 -0
- data/tests/data/db-test.csv +57 -0
- data/tests/data/locations.csv +4 -0
- data/tests/database.rb +137 -0
- data/tests/generate.rb +34 -0
- data/tests/numbers.rb +46 -0
- data/tests/run.rb +11 -0
- metadata +237 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
all: libsqlite3_geocoder.so
|
2
|
+
CC=gcc -fPIC
|
3
|
+
libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o
|
4
|
+
$(CC) -lsqlite3 -lm -I/usr/include -shared $^ -o $@
|
5
|
+
test: wkb_compress.c
|
6
|
+
$(CC) -DTEST -o wkb_compress $^
|
7
|
+
clean:
|
8
|
+
rm -f *.o *.so wkb_compress
|
9
|
+
|
10
|
+
libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o levenshtein.o
|
11
|
+
$(CC) -lsqlite3 -lm -I/usr/include -shared $^ -o $@
|
12
|
+
|
13
|
+
test: test_wkb_compress test_levenshtein
|
14
|
+
|
15
|
+
test_wkb_compress: wkb_compress.c
|
16
|
+
$(CC) -DTEST -o wkb_compress $^
|
17
|
+
|
18
|
+
test_levenshtein: levenshtein.c
|
19
|
+
$(CC) -DTEST -o levenshtein $^
|
20
|
+
|
21
|
+
clean:
|
22
|
+
rm -f *.o *.so wkb_compress levenshtein
|
@@ -0,0 +1,15 @@
|
|
1
|
+
all: libsqlite3_geocoder.so
|
2
|
+
|
3
|
+
libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o levenshtein.o
|
4
|
+
$(CC) -shared $^ -o $@
|
5
|
+
|
6
|
+
test: test_wkb_compress test_levenshtein
|
7
|
+
|
8
|
+
test_wkb_compress: wkb_compress.c
|
9
|
+
$(CC) -DTEST -o wkb_compress $^
|
10
|
+
|
11
|
+
test_levenshtein: levenshtein.c
|
12
|
+
$(CC) -DTEST -o levenshtein $^
|
13
|
+
|
14
|
+
clean:
|
15
|
+
rm -f *.o *.so wkb_compress levenshtein
|
@@ -0,0 +1,15 @@
|
|
1
|
+
all: libsqlite3_geocoder.so
|
2
|
+
CFLAGS=-fPIC
|
3
|
+
libsqlite3_geocoder.so: extension.o wkb_compress.o util.o metaphon.o levenshtein.o
|
4
|
+
$(CC) $(CFLAGS) -shared $^ -o $@
|
5
|
+
|
6
|
+
test: test_wkb_compress test_levenshtein
|
7
|
+
|
8
|
+
test_wkb_compress: wkb_compress.c
|
9
|
+
$(CC) -DTEST -o wkb_compress $^
|
10
|
+
|
11
|
+
test_levenshtein: levenshtein.c
|
12
|
+
$(CC) -DTEST -o levenshtein $^
|
13
|
+
|
14
|
+
clean:
|
15
|
+
rm -f *.o *.so wkb_compress levenshtein
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# include <sqlite3ext.h>
|
2
|
+
# include <stdio.h>
|
3
|
+
# include <string.h>
|
4
|
+
# include <assert.h>
|
5
|
+
# include <math.h>
|
6
|
+
|
7
|
+
# include "extension.h"
|
8
|
+
|
9
|
+
static SQLITE_EXTENSION_INIT1;
|
10
|
+
|
11
|
+
static void
|
12
|
+
sqlite3_metaphone (sqlite3_context *context, int argc, sqlite3_value **argv) {
|
13
|
+
const unsigned char *input = sqlite3_value_text(argv[0]);
|
14
|
+
int max_phones = 0;
|
15
|
+
char *output;
|
16
|
+
int len;
|
17
|
+
if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
|
18
|
+
sqlite3_result_null(context);
|
19
|
+
return;
|
20
|
+
}
|
21
|
+
if (argc > 1)
|
22
|
+
max_phones = sqlite3_value_int(argv[1]);
|
23
|
+
if (max_phones <= 0)
|
24
|
+
max_phones = strlen(input);
|
25
|
+
output = sqlite3_malloc((max_phones+1)*sizeof(char));
|
26
|
+
len = metaphone(input, output, max_phones);
|
27
|
+
sqlite3_result_text(context, output, len, sqlite3_free);
|
28
|
+
}
|
29
|
+
|
30
|
+
static void
|
31
|
+
sqlite3_levenshtein (sqlite3_context *context, int argc, sqlite3_value **argv) {
|
32
|
+
const unsigned char *s1 = sqlite3_value_text(argv[0]),
|
33
|
+
*s2 = sqlite3_value_text(argv[1]);
|
34
|
+
double dist;
|
35
|
+
if (sqlite3_value_type(argv[0]) == SQLITE_NULL ||
|
36
|
+
sqlite3_value_type(argv[1]) == SQLITE_NULL) {
|
37
|
+
sqlite3_result_null(context);
|
38
|
+
return;
|
39
|
+
}
|
40
|
+
dist = levenshtein_distance(s1, s2);
|
41
|
+
sqlite3_result_double(context, dist);
|
42
|
+
}
|
43
|
+
|
44
|
+
static void
|
45
|
+
sqlite3_digit_suffix (sqlite3_context *context,
|
46
|
+
int argc, sqlite3_value **argv) {
|
47
|
+
if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
|
48
|
+
sqlite3_result_null(context);
|
49
|
+
return;
|
50
|
+
}
|
51
|
+
const unsigned char *input = sqlite3_value_text(argv[0]);
|
52
|
+
char *output = sqlite3_malloc((strlen(input)+1) * sizeof(char));
|
53
|
+
size_t len = digit_suffix(input, output);
|
54
|
+
sqlite3_result_text(context, output, len, sqlite3_free);
|
55
|
+
}
|
56
|
+
|
57
|
+
static void
|
58
|
+
sqlite3_nondigit_prefix (sqlite3_context *context,
|
59
|
+
int argc, sqlite3_value **argv) {
|
60
|
+
if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
|
61
|
+
sqlite3_result_null(context);
|
62
|
+
return;
|
63
|
+
}
|
64
|
+
const unsigned char *input = sqlite3_value_text(argv[0]);
|
65
|
+
char *output = sqlite3_malloc((strlen(input)+1) * sizeof(char));
|
66
|
+
size_t len = nondigit_prefix(input, output);
|
67
|
+
sqlite3_result_text(context, output, len, sqlite3_free);
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
static void
|
72
|
+
sqlite3_compress_wkb_line (sqlite3_context *context,
|
73
|
+
int argc, sqlite3_value **argv) {
|
74
|
+
if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
|
75
|
+
sqlite3_result_null(context);
|
76
|
+
return;
|
77
|
+
}
|
78
|
+
unsigned long input_len = sqlite3_value_bytes(argv[0]);
|
79
|
+
const void *input = sqlite3_value_blob(argv[0]);
|
80
|
+
unsigned long output_len = ceil((input_len-9)/8.0) * 4;
|
81
|
+
unsigned long len = 0;
|
82
|
+
void *output = sqlite3_malloc(output_len);
|
83
|
+
len = compress_wkb_line(output, input, input_len);
|
84
|
+
assert(len == output_len);
|
85
|
+
sqlite3_result_blob(context, output, len, sqlite3_free);
|
86
|
+
}
|
87
|
+
|
88
|
+
static void
|
89
|
+
sqlite3_uncompress_wkb_line (sqlite3_context *context,
|
90
|
+
int argc, sqlite3_value **argv) {
|
91
|
+
unsigned long input_len = sqlite3_value_bytes(argv[0]);
|
92
|
+
const void *input = sqlite3_value_blob(argv[0]);
|
93
|
+
unsigned long output_len = input_len*2+9;
|
94
|
+
unsigned long len = 0;
|
95
|
+
void *output = sqlite3_malloc(output_len);
|
96
|
+
len = uncompress_wkb_line(output, input, input_len);
|
97
|
+
assert(len == output_len);
|
98
|
+
sqlite3_result_blob(context, output, len, sqlite3_free);
|
99
|
+
}
|
100
|
+
|
101
|
+
int sqlite3_extension_init (sqlite3 * db, char **pzErrMsg,
|
102
|
+
const sqlite3_api_routines *pApi) {
|
103
|
+
SQLITE_EXTENSION_INIT2(pApi);
|
104
|
+
|
105
|
+
sqlite3_create_function(db, "metaphone", 1, SQLITE_ANY,
|
106
|
+
NULL, sqlite3_metaphone, NULL, NULL);
|
107
|
+
sqlite3_create_function(db, "metaphone", 2, SQLITE_ANY,
|
108
|
+
NULL, sqlite3_metaphone, NULL, NULL);
|
109
|
+
|
110
|
+
sqlite3_create_function(db, "levenshtein", 2, SQLITE_ANY,
|
111
|
+
NULL, sqlite3_levenshtein, NULL, NULL);
|
112
|
+
sqlite3_create_function(db, "compress_wkb_line", 1, SQLITE_ANY,
|
113
|
+
NULL, sqlite3_compress_wkb_line, NULL, NULL);
|
114
|
+
sqlite3_create_function(db, "uncompress_wkb_line", 1, SQLITE_ANY,
|
115
|
+
NULL, sqlite3_uncompress_wkb_line, NULL, NULL);
|
116
|
+
sqlite3_create_function(db, "digit_suffix", 1, SQLITE_ANY,
|
117
|
+
NULL, sqlite3_digit_suffix, NULL, NULL);
|
118
|
+
sqlite3_create_function(db, "nondigit_prefix", 1, SQLITE_ANY,
|
119
|
+
NULL, sqlite3_nondigit_prefix, NULL, NULL);
|
120
|
+
return 0;
|
121
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#ifndef SQLITE3_GEOCODER
|
2
|
+
#define SQLITE3_GEOCODER
|
3
|
+
|
4
|
+
#include <stdint.h>
|
5
|
+
|
6
|
+
int metaphone(const char *Word, char *Metaph, int max_phones);
|
7
|
+
double levenshtein_distance (const unsigned char *s1, const unsigned char *s2);
|
8
|
+
signed int rindex_nondigit (const char *string);
|
9
|
+
signed int nondigit_prefix (const char *input, char *output);
|
10
|
+
uint32_t compress_wkb_line (void *dest, const void *src, uint32_t len);
|
11
|
+
uint32_t uncompress_wkb_line (void *dest, const void *src, uint32_t len);
|
12
|
+
|
13
|
+
#endif
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# include <string.h>
|
2
|
+
# define STRLEN_MAX 256
|
3
|
+
# define min(x, y) ((x) < (y) ? (x) : (y))
|
4
|
+
# define max(x, y) ((x) > (y) ? (x) : (y))
|
5
|
+
# define NO_CASE (~(unsigned char)32)
|
6
|
+
# define eql(x, y) (((x) & NO_CASE) == ((y) & NO_CASE))
|
7
|
+
|
8
|
+
static int d[STRLEN_MAX][STRLEN_MAX]; // this isn't thread safe
|
9
|
+
|
10
|
+
double levenshtein_distance (const unsigned char *s1, const unsigned char *s2) {
|
11
|
+
const size_t len1 = min(strlen(s1), STRLEN_MAX-1),
|
12
|
+
len2 = min(strlen(s2), STRLEN_MAX-1);
|
13
|
+
int cost, i, j;
|
14
|
+
|
15
|
+
for (i = 1; i <= len1; ++i) d[i][0] = i;
|
16
|
+
for (i = 1; i <= len2; ++i) d[0][i] = i;
|
17
|
+
for (i = 1; i <= len1; ++i) {
|
18
|
+
for (j = 1; j <= len2; ++j) {
|
19
|
+
cost = (eql(s1[i-1], s2[j-1]) ? 0 : 1);
|
20
|
+
d[i][j] = min(min(
|
21
|
+
d[i-1][j ] + 1, /* deletion */
|
22
|
+
d[i ][j-1] + 1), /* insertion */
|
23
|
+
d[i-1][j-1] + cost); /* substitution */
|
24
|
+
if (i > 1 && j > 1 && eql(s1[i-1], s2[j-2]) && eql(s1[i-2], s2[j-1])) {
|
25
|
+
d[i][j] = min( d[i][j],
|
26
|
+
d[i-2][j-2] + cost ); /* transposition */
|
27
|
+
}
|
28
|
+
}
|
29
|
+
}
|
30
|
+
return (d[len1][len2] / (double) max(len1, len2));
|
31
|
+
}
|
32
|
+
|
33
|
+
#ifdef TEST
|
34
|
+
#include <stdio.h>
|
35
|
+
|
36
|
+
int main (int argc, char **argv) {
|
37
|
+
if (argc < 3) return -1;
|
38
|
+
printf("%.1f%%\n", levenshtein_distance(argv[1],argv[2]) * 100);
|
39
|
+
return 0;
|
40
|
+
}
|
41
|
+
|
42
|
+
#endif
|
@@ -0,0 +1,278 @@
|
|
1
|
+
/* +++Customized by SDE for sqlite3 use 09-Mar-2009 */
|
2
|
+
/* +++File obtained from http://www.shedai.net/c/new/METAPHON.C */
|
3
|
+
/* +++Date previously modified: 05-Jul-1997 */
|
4
|
+
|
5
|
+
/*
|
6
|
+
** METAPHON.C - Phonetic string matching
|
7
|
+
**
|
8
|
+
** The Metaphone algorithm was developed by Lawrence Phillips. Like the
|
9
|
+
** Soundex algorithm, it compares words that sound alike but are spelled
|
10
|
+
** differently. Metaphone was designed to overcome difficulties encountered
|
11
|
+
** with Soundex.
|
12
|
+
**
|
13
|
+
** This implementation was written by Gary A. Parker and originally published
|
14
|
+
** in the June/July, 1991 (vol. 5 nr. 4) issue of C Gazette. As published,
|
15
|
+
** this code was explicitly placed in the public domain by the author.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#include <ctype.h>
|
19
|
+
#include <string.h> /* strlen() */
|
20
|
+
#include <stdio.h>
|
21
|
+
#define malloc(x) sqlite3_malloc((x))
|
22
|
+
#define free(x) sqlite3_free((x))
|
23
|
+
|
24
|
+
/*
|
25
|
+
** Character coding array
|
26
|
+
*/
|
27
|
+
|
28
|
+
static char vsvfn[26] = {
|
29
|
+
1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0};
|
30
|
+
/* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */
|
31
|
+
|
32
|
+
/*
|
33
|
+
** Macros to access the character coding array
|
34
|
+
*/
|
35
|
+
|
36
|
+
#define vowel(x) (vsvfn[(x) - 'A'] & 1) /* AEIOU */
|
37
|
+
#define same(x) (vsvfn[(x) - 'A'] & 2) /* FJLMNR */
|
38
|
+
#define varson(x) (vsvfn[(x) - 'A'] & 4) /* CGPST */
|
39
|
+
#define frontv(x) (vsvfn[(x) - 'A'] & 8) /* EIY */
|
40
|
+
#define noghf(x) (vsvfn[(x) - 'A'] & 16) /* BDH */
|
41
|
+
|
42
|
+
int metaphone(const char *Word, char *Metaph, int max_phones) {
|
43
|
+
char *n, *n_start, *n_end; /* Pointers to string */
|
44
|
+
char *metaph_start = Metaph, *metaph_end;
|
45
|
+
/* Pointers to metaph */
|
46
|
+
int ntrans_len = strlen(Word)+4;
|
47
|
+
char *ntrans = (char *)malloc(sizeof(char) * ntrans_len);
|
48
|
+
/* Word with uppercase letters */
|
49
|
+
int KSflag; /* State flag for X translation */
|
50
|
+
|
51
|
+
/* SDE -- special case: if the word starts with a number, just
|
52
|
+
* copy the leading digits and return. This means we don't
|
53
|
+
* metaphone cardinal number suffixes (i.e. "st","nd","rd") */
|
54
|
+
int leading_digit = isdigit(*Word);
|
55
|
+
/* SDE -- check for a leading semivowel. needed because
|
56
|
+
* the copy in ntrans gets destroyed by the metaphone process. */
|
57
|
+
char leading_semivowel = '\0';
|
58
|
+
|
59
|
+
/*
|
60
|
+
** Copy word to internal buffer, dropping non-alphabetic characters
|
61
|
+
** and converting to upper case.
|
62
|
+
*/
|
63
|
+
for (n = ntrans + 1, n_end = ntrans + ntrans_len - 2;
|
64
|
+
*Word && n < n_end; ++Word)
|
65
|
+
{
|
66
|
+
/* SDE -- see previous comment */
|
67
|
+
if (leading_digit && isalpha(*Word))
|
68
|
+
break;
|
69
|
+
/* SDE -- copy numbers as well, for geocoding street names */
|
70
|
+
/* was: if (isalpha(*Word)) */
|
71
|
+
if (isalnum(*Word))
|
72
|
+
*n++ = toupper(*Word);
|
73
|
+
}
|
74
|
+
|
75
|
+
if (n == ntrans + 1) {
|
76
|
+
free(ntrans);
|
77
|
+
Metaph[0]='\0';
|
78
|
+
return 0; /* Return if zero characters */
|
79
|
+
}
|
80
|
+
else n_end = n; /* Set end of string pointer */
|
81
|
+
|
82
|
+
/*
|
83
|
+
** Pad with '\0's, front and rear
|
84
|
+
*/
|
85
|
+
|
86
|
+
*n++ = '\0';
|
87
|
+
*n = '\0';
|
88
|
+
n = ntrans;
|
89
|
+
*n++ = '\0';
|
90
|
+
|
91
|
+
/* SDE: check for leading semivowel here */
|
92
|
+
if (ntrans[1] == 'W' || ntrans[1] == 'Y')
|
93
|
+
leading_semivowel = ntrans[1];
|
94
|
+
|
95
|
+
/*
|
96
|
+
** Check for PN, KN, GN, WR, WH, and X at start
|
97
|
+
*/
|
98
|
+
|
99
|
+
switch (*n)
|
100
|
+
{
|
101
|
+
case 'P':
|
102
|
+
case 'K':
|
103
|
+
case 'G':
|
104
|
+
if ('N' == *(n + 1))
|
105
|
+
*n++ = '\0';
|
106
|
+
break;
|
107
|
+
|
108
|
+
case 'A':
|
109
|
+
if ('E' == *(n + 1))
|
110
|
+
*n++ = '\0';
|
111
|
+
break;
|
112
|
+
|
113
|
+
case 'W':
|
114
|
+
if ('R' == *(n + 1))
|
115
|
+
*n++ = '\0';
|
116
|
+
else if ('H' == *(n + 1))
|
117
|
+
{
|
118
|
+
*(n + 1) = *n;
|
119
|
+
*n++ = '\0';
|
120
|
+
}
|
121
|
+
break;
|
122
|
+
|
123
|
+
case 'X':
|
124
|
+
*n = 'S';
|
125
|
+
break;
|
126
|
+
}
|
127
|
+
|
128
|
+
/*
|
129
|
+
** Now loop through the string, stopping at the end of the string
|
130
|
+
** or when the computed Metaphone code is max_phones characters long.
|
131
|
+
*/
|
132
|
+
|
133
|
+
KSflag = 0; /* State flag for KStranslation */
|
134
|
+
for (metaph_end = Metaph + max_phones, n_start = n;
|
135
|
+
n <= n_end && Metaph < metaph_end; ++n)
|
136
|
+
{
|
137
|
+
if (KSflag)
|
138
|
+
{
|
139
|
+
KSflag = 0;
|
140
|
+
*Metaph++ = *n;
|
141
|
+
}
|
142
|
+
else
|
143
|
+
{
|
144
|
+
/* SDE -- special case: copy numbers verbatim */
|
145
|
+
if (isdigit(*n)) {
|
146
|
+
*Metaph++ = *n;
|
147
|
+
continue;
|
148
|
+
}
|
149
|
+
|
150
|
+
/* Drop duplicates except for CC */
|
151
|
+
if (*(n - 1) == *n && *n != 'C')
|
152
|
+
continue;
|
153
|
+
|
154
|
+
/* Check for F J L M N R or first letter vowel */
|
155
|
+
|
156
|
+
if (same(*n) || (n == n_start && vowel(*n)))
|
157
|
+
*Metaph++ = *n;
|
158
|
+
else switch (*n)
|
159
|
+
{
|
160
|
+
case 'B':
|
161
|
+
if (n < n_end || *(n - 1) != 'M')
|
162
|
+
*Metaph++ = *n;
|
163
|
+
break;
|
164
|
+
|
165
|
+
case 'C':
|
166
|
+
if (*(n - 1) != 'S' || !frontv(*(n + 1)))
|
167
|
+
{
|
168
|
+
if ('I' == *(n + 1) && 'A' == *(n + 2))
|
169
|
+
*Metaph++ = 'X';
|
170
|
+
else if (frontv(*(n + 1)))
|
171
|
+
*Metaph++ = 'S';
|
172
|
+
else if ('H' == *(n + 1))
|
173
|
+
*Metaph++ = ((n == n_start &&
|
174
|
+
!vowel(*(n + 2))) ||
|
175
|
+
'S' == *(n - 1)) ? 'K' : 'X';
|
176
|
+
else *Metaph++ = 'K';
|
177
|
+
}
|
178
|
+
break;
|
179
|
+
|
180
|
+
case 'D':
|
181
|
+
*Metaph++ = ('G' == *(n + 1) && frontv(*(n + 2))) ?
|
182
|
+
'J' : 'T';
|
183
|
+
break;
|
184
|
+
|
185
|
+
case 'G':
|
186
|
+
if ((*(n + 1) != 'H' || vowel(*(n + 2))) &&
|
187
|
+
(*(n + 1) != 'N' || ((n + 1) < n_end &&
|
188
|
+
(*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
|
189
|
+
(*(n - 1) != 'D' || !frontv(*(n + 1))))
|
190
|
+
{
|
191
|
+
*Metaph++ = (frontv(*(n + 1)) &&
|
192
|
+
*(n + 2) != 'G') ? 'J' : 'K';
|
193
|
+
}
|
194
|
+
else if ('H' == *(n + 1) && !noghf(*(n - 3)) &&
|
195
|
+
*(n - 4) != 'H')
|
196
|
+
{
|
197
|
+
*Metaph++ = 'F';
|
198
|
+
}
|
199
|
+
break;
|
200
|
+
|
201
|
+
case 'H':
|
202
|
+
if (!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
|
203
|
+
vowel(*(n + 1))))
|
204
|
+
{
|
205
|
+
*Metaph++ = 'H';
|
206
|
+
}
|
207
|
+
break;
|
208
|
+
|
209
|
+
case 'K':
|
210
|
+
if (*(n - 1) != 'C')
|
211
|
+
*Metaph++ = 'K';
|
212
|
+
break;
|
213
|
+
|
214
|
+
case 'P':
|
215
|
+
*Metaph++ = ('H' == *(n + 1)) ? 'F' : 'P';
|
216
|
+
break;
|
217
|
+
|
218
|
+
case 'Q':
|
219
|
+
*Metaph++ = 'K';
|
220
|
+
break;
|
221
|
+
|
222
|
+
case 'S':
|
223
|
+
*Metaph++ = ('H' == *(n + 1) || ('I' == *(n + 1) &&
|
224
|
+
('O' == *(n + 2) || 'A' == *(n + 2)))) ?
|
225
|
+
'X' : 'S';
|
226
|
+
break;
|
227
|
+
|
228
|
+
case 'T':
|
229
|
+
if ('I' == *(n + 1) && ('O' == *(n + 2) ||
|
230
|
+
'A' == *(n + 2)))
|
231
|
+
{
|
232
|
+
*Metaph++ = 'X';
|
233
|
+
}
|
234
|
+
else if ('H' == *(n + 1))
|
235
|
+
/* SDE: was:
|
236
|
+
*Metaph++ = 'O';
|
237
|
+
but that's WRONG. */
|
238
|
+
*Metaph++ = '0';
|
239
|
+
else if (*(n + 1) != 'C' || *(n + 2) != 'H')
|
240
|
+
*Metaph++ = 'T';
|
241
|
+
break;
|
242
|
+
|
243
|
+
case 'V':
|
244
|
+
*Metaph++ = 'F';
|
245
|
+
break;
|
246
|
+
|
247
|
+
case 'W':
|
248
|
+
case 'Y':
|
249
|
+
if (vowel(*(n + 1)))
|
250
|
+
*Metaph++ = *n;
|
251
|
+
break;
|
252
|
+
|
253
|
+
case 'X':
|
254
|
+
if (n == n_start)
|
255
|
+
*Metaph++ = 'S';
|
256
|
+
else
|
257
|
+
{
|
258
|
+
*Metaph++ = 'K';
|
259
|
+
KSflag = 1;
|
260
|
+
}
|
261
|
+
break;
|
262
|
+
|
263
|
+
case 'Z':
|
264
|
+
*Metaph++ = 'S';
|
265
|
+
break;
|
266
|
+
}
|
267
|
+
}
|
268
|
+
}
|
269
|
+
|
270
|
+
/* SDE: special case: if word consists solely of W or Y, use that. */
|
271
|
+
if (Metaph == metaph_start && leading_semivowel)
|
272
|
+
*Metaph++ = leading_semivowel;
|
273
|
+
|
274
|
+
*Metaph = '\0';
|
275
|
+
free(ntrans);
|
276
|
+
return strlen(metaph_start);
|
277
|
+
}
|
278
|
+
|