geo_normalize 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,92 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <iconv.h>
|
4
|
+
#include <errno.h>
|
5
|
+
|
6
|
+
static char *geo_charmap[] = {
|
7
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
8
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
9
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
10
|
+
"0" , "1" , "2" , "3" , "4" , "5" , "6" , "7" , "8" , "9" , NULL , NULL , NULL , NULL , NULL , NULL ,
|
11
|
+
NULL , "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" , "i" , "j" , "k" , "l" , "m" , "n" , "o" ,
|
12
|
+
"p" , "q" , "r" , "s" , "t" , "u" , "v" , "w" , "x" , "y" , "z" , NULL , NULL , NULL , NULL , NULL ,
|
13
|
+
NULL , "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" , "i" , "j" , "k" , "l" , "m" , "n" , "o" ,
|
14
|
+
"p" , "q" , "r" , "s" , "t" , "u" , "v" , "w" , "x" , "y" , "z" , NULL , NULL , NULL , NULL , NULL ,
|
15
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
16
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
17
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
18
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
19
|
+
"a" , "a" , "a" , "a" , "a" , "a" , "ae" , "c" , "e" , "e" , "e" , "e" , "i" , "i" , "i" , "i" ,
|
20
|
+
"d" , "n" , "o" , "o" , "o" , "o" , "o" , "x" , "o" , "u" , "u" , "u" , "u" , "y" , "th" , "ss" ,
|
21
|
+
"a" , "a" , "a" , "a" , "a" , "a" , "ae" , "c" , "e" , "e" , "e" , "e" , "i" , "i" , "i" , "i" ,
|
22
|
+
"o" , "n" , "o" , "o" , "o" , "o" , "o" , NULL , "o" , "u" , "u" , "u" , "u" , "y" , "th" , "y" ,
|
23
|
+
};
|
24
|
+
|
25
|
+
/*
|
26
|
+
* This takes a UTF-8 string like "Rivière-du-Loup" and, optionally,
|
27
|
+
* a pre-allocated UTF-8 to ISO-8859-1//IGNORE conversion descriptor,
|
28
|
+
* and returns a normalized ascii version "riviereduloup"
|
29
|
+
*
|
30
|
+
* It's indended for processing maxmind geo data as well as geo-targetting rules before
|
31
|
+
* doing strcmp(), to tolerate variations in accent/whitespace/punctuation/capitalization
|
32
|
+
*
|
33
|
+
* Returns a freshly-malloc()ed string on success, NULL on failure. Be sure to free()
|
34
|
+
* result when you no longer need it
|
35
|
+
*/
|
36
|
+
char *
|
37
|
+
geo_normalize(char *in, iconv_t cd)
|
38
|
+
{
|
39
|
+
iconv_t local_cd = NULL;
|
40
|
+
char *inptr, *out, *outptr, *latin, *latinptr;
|
41
|
+
char *c, *r;
|
42
|
+
size_t inlen, latinlen, conv;
|
43
|
+
|
44
|
+
if (in == NULL)
|
45
|
+
return NULL;
|
46
|
+
inlen = strlen(in);
|
47
|
+
|
48
|
+
if (cd == NULL) {
|
49
|
+
local_cd = iconv_open("ISO-8859-1//IGNORE", "UTF-8");
|
50
|
+
if (local_cd == (iconv_t) -1)
|
51
|
+
return NULL;
|
52
|
+
cd = local_cd;
|
53
|
+
}
|
54
|
+
|
55
|
+
// Do a single allocation to be used for:
|
56
|
+
// [normalized output]\x00[iconv output]\x00
|
57
|
+
out = malloc((inlen*4)+2);
|
58
|
+
if (out == NULL) {
|
59
|
+
if (local_cd != NULL)
|
60
|
+
iconv_close(local_cd);
|
61
|
+
return NULL;
|
62
|
+
}
|
63
|
+
latin = out + (inlen*2) + 1;
|
64
|
+
latinlen = (inlen*2) + 1;
|
65
|
+
|
66
|
+
inptr = in;
|
67
|
+
latinptr = latin;
|
68
|
+
conv = iconv(cd, &inptr, &inlen, &latinptr, &latinlen);
|
69
|
+
if (local_cd != NULL)
|
70
|
+
iconv_close(local_cd);
|
71
|
+
|
72
|
+
if (conv == (size_t) -1 && errno == E2BIG) {
|
73
|
+
// Failed
|
74
|
+
free(out);
|
75
|
+
return NULL;
|
76
|
+
}
|
77
|
+
|
78
|
+
// Scan latin and normalize into out
|
79
|
+
outptr = out;
|
80
|
+
for (c = latin; c < latinptr; c++) {
|
81
|
+
if ((r = geo_charmap[(unsigned char)*c]) != NULL) {
|
82
|
+
while (*r != 0) {
|
83
|
+
*outptr = *r;
|
84
|
+
r++;
|
85
|
+
outptr++;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
*outptr = 0;
|
90
|
+
|
91
|
+
return out;
|
92
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <iconv.h>
|
3
|
+
|
4
|
+
VALUE GeoNormalize = Qnil;
|
5
|
+
|
6
|
+
void Init_geo_normalize();
|
7
|
+
VALUE method_geo_normalize(VALUE self, VALUE str);
|
8
|
+
char *geo_normalize(char *str, iconv_t cd);
|
9
|
+
|
10
|
+
void Init_geo_normalize() {
|
11
|
+
GeoNormalize = rb_define_module("GeoNormalize");
|
12
|
+
rb_define_singleton_method(GeoNormalize, "normalize", method_geo_normalize, 1);
|
13
|
+
}
|
14
|
+
|
15
|
+
VALUE method_geo_normalize(VALUE self, VALUE str) {
|
16
|
+
if (TYPE(str) != T_STRING)
|
17
|
+
rb_raise(rb_eArgError, "normalize needs a string parameter");
|
18
|
+
|
19
|
+
size_t length = RSTRING_LEN(str);
|
20
|
+
char *buffer = malloc(length+1);
|
21
|
+
memcpy(buffer, RSTRING_PTR(str), length);
|
22
|
+
buffer[length] = 0;
|
23
|
+
|
24
|
+
char *result = geo_normalize(buffer, NULL);
|
25
|
+
|
26
|
+
free(buffer);
|
27
|
+
|
28
|
+
if (result == NULL)
|
29
|
+
rb_raise(rb_eStandardError, "geo_normalize call failed");
|
30
|
+
|
31
|
+
VALUE ret = rb_str_new2(result);
|
32
|
+
|
33
|
+
free(result);
|
34
|
+
|
35
|
+
return ret;
|
36
|
+
}
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: geo_normalize
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jeremie Lasalle Ratelle
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-05-23 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake-compiler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description:
|
31
|
+
email:
|
32
|
+
executables: []
|
33
|
+
extensions:
|
34
|
+
- ext/geo_normalize/extconf.rb
|
35
|
+
extra_rdoc_files: []
|
36
|
+
files:
|
37
|
+
- ext/geo_normalize/geo_normalize.c
|
38
|
+
- ext/geo_normalize/ruby_geo_normalize.c
|
39
|
+
- ext/geo_normalize/extconf.rb
|
40
|
+
homepage:
|
41
|
+
licenses: []
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.8.25
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: GeoIP city name normalization
|
64
|
+
test_files: []
|