geo_normalize 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ extension_name = 'geo_normalize'
3
+ dir_config(extension_name)
4
+ create_makefile(extension_name)
@@ -0,0 +1,92 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <iconv.h>
4
+ #include <errno.h>
5
+
6
+ static char *geo_charmap[] = {
7
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
8
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
9
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
10
+ "0" , "1" , "2" , "3" , "4" , "5" , "6" , "7" , "8" , "9" , NULL , NULL , NULL , NULL , NULL , NULL ,
11
+ NULL , "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" , "i" , "j" , "k" , "l" , "m" , "n" , "o" ,
12
+ "p" , "q" , "r" , "s" , "t" , "u" , "v" , "w" , "x" , "y" , "z" , NULL , NULL , NULL , NULL , NULL ,
13
+ NULL , "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" , "i" , "j" , "k" , "l" , "m" , "n" , "o" ,
14
+ "p" , "q" , "r" , "s" , "t" , "u" , "v" , "w" , "x" , "y" , "z" , NULL , NULL , NULL , NULL , NULL ,
15
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
16
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
17
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
18
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
19
+ "a" , "a" , "a" , "a" , "a" , "a" , "ae" , "c" , "e" , "e" , "e" , "e" , "i" , "i" , "i" , "i" ,
20
+ "d" , "n" , "o" , "o" , "o" , "o" , "o" , "x" , "o" , "u" , "u" , "u" , "u" , "y" , "th" , "ss" ,
21
+ "a" , "a" , "a" , "a" , "a" , "a" , "ae" , "c" , "e" , "e" , "e" , "e" , "i" , "i" , "i" , "i" ,
22
+ "o" , "n" , "o" , "o" , "o" , "o" , "o" , NULL , "o" , "u" , "u" , "u" , "u" , "y" , "th" , "y" ,
23
+ };
24
+
25
+ /*
26
+ * This takes a UTF-8 string like "Rivière-du-Loup" and, optionally,
27
+ * a pre-allocated UTF-8 to ISO-8859-1//IGNORE conversion descriptor,
28
+ * and returns a normalized ascii version "riviereduloup"
29
+ *
30
+ * It's indended for processing maxmind geo data as well as geo-targetting rules before
31
+ * doing strcmp(), to tolerate variations in accent/whitespace/punctuation/capitalization
32
+ *
33
+ * Returns a freshly-malloc()ed string on success, NULL on failure. Be sure to free()
34
+ * result when you no longer need it
35
+ */
36
+ char *
37
+ geo_normalize(char *in, iconv_t cd)
38
+ {
39
+ iconv_t local_cd = NULL;
40
+ char *inptr, *out, *outptr, *latin, *latinptr;
41
+ char *c, *r;
42
+ size_t inlen, latinlen, conv;
43
+
44
+ if (in == NULL)
45
+ return NULL;
46
+ inlen = strlen(in);
47
+
48
+ if (cd == NULL) {
49
+ local_cd = iconv_open("ISO-8859-1//IGNORE", "UTF-8");
50
+ if (local_cd == (iconv_t) -1)
51
+ return NULL;
52
+ cd = local_cd;
53
+ }
54
+
55
+ // Do a single allocation to be used for:
56
+ // [normalized output]\x00[iconv output]\x00
57
+ out = malloc((inlen*4)+2);
58
+ if (out == NULL) {
59
+ if (local_cd != NULL)
60
+ iconv_close(local_cd);
61
+ return NULL;
62
+ }
63
+ latin = out + (inlen*2) + 1;
64
+ latinlen = (inlen*2) + 1;
65
+
66
+ inptr = in;
67
+ latinptr = latin;
68
+ conv = iconv(cd, &inptr, &inlen, &latinptr, &latinlen);
69
+ if (local_cd != NULL)
70
+ iconv_close(local_cd);
71
+
72
+ if (conv == (size_t) -1 && errno == E2BIG) {
73
+ // Failed
74
+ free(out);
75
+ return NULL;
76
+ }
77
+
78
+ // Scan latin and normalize into out
79
+ outptr = out;
80
+ for (c = latin; c < latinptr; c++) {
81
+ if ((r = geo_charmap[(unsigned char)*c]) != NULL) {
82
+ while (*r != 0) {
83
+ *outptr = *r;
84
+ r++;
85
+ outptr++;
86
+ }
87
+ }
88
+ }
89
+ *outptr = 0;
90
+
91
+ return out;
92
+ }
@@ -0,0 +1,36 @@
1
+ #include <ruby.h>
2
+ #include <iconv.h>
3
+
4
+ VALUE GeoNormalize = Qnil;
5
+
6
+ void Init_geo_normalize();
7
+ VALUE method_geo_normalize(VALUE self, VALUE str);
8
+ char *geo_normalize(char *str, iconv_t cd);
9
+
10
+ void Init_geo_normalize() {
11
+ GeoNormalize = rb_define_module("GeoNormalize");
12
+ rb_define_singleton_method(GeoNormalize, "normalize", method_geo_normalize, 1);
13
+ }
14
+
15
+ VALUE method_geo_normalize(VALUE self, VALUE str) {
16
+ if (TYPE(str) != T_STRING)
17
+ rb_raise(rb_eArgError, "normalize needs a string parameter");
18
+
19
+ size_t length = RSTRING_LEN(str);
20
+ char *buffer = malloc(length+1);
21
+ memcpy(buffer, RSTRING_PTR(str), length);
22
+ buffer[length] = 0;
23
+
24
+ char *result = geo_normalize(buffer, NULL);
25
+
26
+ free(buffer);
27
+
28
+ if (result == NULL)
29
+ rb_raise(rb_eStandardError, "geo_normalize call failed");
30
+
31
+ VALUE ret = rb_str_new2(result);
32
+
33
+ free(result);
34
+
35
+ return ret;
36
+ }
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: geo_normalize
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jeremie Lasalle Ratelle
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake-compiler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description:
31
+ email:
32
+ executables: []
33
+ extensions:
34
+ - ext/geo_normalize/extconf.rb
35
+ extra_rdoc_files: []
36
+ files:
37
+ - ext/geo_normalize/geo_normalize.c
38
+ - ext/geo_normalize/ruby_geo_normalize.c
39
+ - ext/geo_normalize/extconf.rb
40
+ homepage:
41
+ licenses: []
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 1.8.25
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: GeoIP city name normalization
64
+ test_files: []