geo_normalize 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <iconv.h>
|
4
|
+
#include <errno.h>
|
5
|
+
|
6
|
+
static char *geo_charmap[] = {
|
7
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
8
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
9
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
10
|
+
"0" , "1" , "2" , "3" , "4" , "5" , "6" , "7" , "8" , "9" , NULL , NULL , NULL , NULL , NULL , NULL ,
|
11
|
+
NULL , "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" , "i" , "j" , "k" , "l" , "m" , "n" , "o" ,
|
12
|
+
"p" , "q" , "r" , "s" , "t" , "u" , "v" , "w" , "x" , "y" , "z" , NULL , NULL , NULL , NULL , NULL ,
|
13
|
+
NULL , "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" , "i" , "j" , "k" , "l" , "m" , "n" , "o" ,
|
14
|
+
"p" , "q" , "r" , "s" , "t" , "u" , "v" , "w" , "x" , "y" , "z" , NULL , NULL , NULL , NULL , NULL ,
|
15
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
16
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
17
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
18
|
+
NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
|
19
|
+
"a" , "a" , "a" , "a" , "a" , "a" , "ae" , "c" , "e" , "e" , "e" , "e" , "i" , "i" , "i" , "i" ,
|
20
|
+
"d" , "n" , "o" , "o" , "o" , "o" , "o" , "x" , "o" , "u" , "u" , "u" , "u" , "y" , "th" , "ss" ,
|
21
|
+
"a" , "a" , "a" , "a" , "a" , "a" , "ae" , "c" , "e" , "e" , "e" , "e" , "i" , "i" , "i" , "i" ,
|
22
|
+
"o" , "n" , "o" , "o" , "o" , "o" , "o" , NULL , "o" , "u" , "u" , "u" , "u" , "y" , "th" , "y" ,
|
23
|
+
};
|
24
|
+
|
25
|
+
/*
|
26
|
+
* This takes a UTF-8 string like "Rivière-du-Loup" and, optionally,
|
27
|
+
* a pre-allocated UTF-8 to ISO-8859-1//IGNORE conversion descriptor,
|
28
|
+
* and returns a normalized ascii version "riviereduloup"
|
29
|
+
*
|
30
|
+
* It's indended for processing maxmind geo data as well as geo-targetting rules before
|
31
|
+
* doing strcmp(), to tolerate variations in accent/whitespace/punctuation/capitalization
|
32
|
+
*
|
33
|
+
* Returns a freshly-malloc()ed string on success, NULL on failure. Be sure to free()
|
34
|
+
* result when you no longer need it
|
35
|
+
*/
|
36
|
+
char *
|
37
|
+
geo_normalize(char *in, iconv_t cd)
|
38
|
+
{
|
39
|
+
iconv_t local_cd = NULL;
|
40
|
+
char *inptr, *out, *outptr, *latin, *latinptr;
|
41
|
+
char *c, *r;
|
42
|
+
size_t inlen, latinlen, conv;
|
43
|
+
|
44
|
+
if (in == NULL)
|
45
|
+
return NULL;
|
46
|
+
inlen = strlen(in);
|
47
|
+
|
48
|
+
if (cd == NULL) {
|
49
|
+
local_cd = iconv_open("ISO-8859-1//IGNORE", "UTF-8");
|
50
|
+
if (local_cd == (iconv_t) -1)
|
51
|
+
return NULL;
|
52
|
+
cd = local_cd;
|
53
|
+
}
|
54
|
+
|
55
|
+
// Do a single allocation to be used for:
|
56
|
+
// [normalized output]\x00[iconv output]\x00
|
57
|
+
out = malloc((inlen*4)+2);
|
58
|
+
if (out == NULL) {
|
59
|
+
if (local_cd != NULL)
|
60
|
+
iconv_close(local_cd);
|
61
|
+
return NULL;
|
62
|
+
}
|
63
|
+
latin = out + (inlen*2) + 1;
|
64
|
+
latinlen = (inlen*2) + 1;
|
65
|
+
|
66
|
+
inptr = in;
|
67
|
+
latinptr = latin;
|
68
|
+
conv = iconv(cd, &inptr, &inlen, &latinptr, &latinlen);
|
69
|
+
if (local_cd != NULL)
|
70
|
+
iconv_close(local_cd);
|
71
|
+
|
72
|
+
if (conv == (size_t) -1 && errno == E2BIG) {
|
73
|
+
// Failed
|
74
|
+
free(out);
|
75
|
+
return NULL;
|
76
|
+
}
|
77
|
+
|
78
|
+
// Scan latin and normalize into out
|
79
|
+
outptr = out;
|
80
|
+
for (c = latin; c < latinptr; c++) {
|
81
|
+
if ((r = geo_charmap[(unsigned char)*c]) != NULL) {
|
82
|
+
while (*r != 0) {
|
83
|
+
*outptr = *r;
|
84
|
+
r++;
|
85
|
+
outptr++;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
*outptr = 0;
|
90
|
+
|
91
|
+
return out;
|
92
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <iconv.h>
|
3
|
+
|
4
|
+
VALUE GeoNormalize = Qnil;
|
5
|
+
|
6
|
+
void Init_geo_normalize();
|
7
|
+
VALUE method_geo_normalize(VALUE self, VALUE str);
|
8
|
+
char *geo_normalize(char *str, iconv_t cd);
|
9
|
+
|
10
|
+
void Init_geo_normalize() {
|
11
|
+
GeoNormalize = rb_define_module("GeoNormalize");
|
12
|
+
rb_define_singleton_method(GeoNormalize, "normalize", method_geo_normalize, 1);
|
13
|
+
}
|
14
|
+
|
15
|
+
VALUE method_geo_normalize(VALUE self, VALUE str) {
|
16
|
+
if (TYPE(str) != T_STRING)
|
17
|
+
rb_raise(rb_eArgError, "normalize needs a string parameter");
|
18
|
+
|
19
|
+
size_t length = RSTRING_LEN(str);
|
20
|
+
char *buffer = malloc(length+1);
|
21
|
+
memcpy(buffer, RSTRING_PTR(str), length);
|
22
|
+
buffer[length] = 0;
|
23
|
+
|
24
|
+
char *result = geo_normalize(buffer, NULL);
|
25
|
+
|
26
|
+
free(buffer);
|
27
|
+
|
28
|
+
if (result == NULL)
|
29
|
+
rb_raise(rb_eStandardError, "geo_normalize call failed");
|
30
|
+
|
31
|
+
VALUE ret = rb_str_new2(result);
|
32
|
+
|
33
|
+
free(result);
|
34
|
+
|
35
|
+
return ret;
|
36
|
+
}
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: geo_normalize
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jeremie Lasalle Ratelle
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-05-23 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake-compiler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description:
|
31
|
+
email:
|
32
|
+
executables: []
|
33
|
+
extensions:
|
34
|
+
- ext/geo_normalize/extconf.rb
|
35
|
+
extra_rdoc_files: []
|
36
|
+
files:
|
37
|
+
- ext/geo_normalize/geo_normalize.c
|
38
|
+
- ext/geo_normalize/ruby_geo_normalize.c
|
39
|
+
- ext/geo_normalize/extconf.rb
|
40
|
+
homepage:
|
41
|
+
licenses: []
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.8.25
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: GeoIP city name normalization
|
64
|
+
test_files: []
|