geo_normalize 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ extension_name = 'geo_normalize'
3
+ dir_config(extension_name)
4
+ create_makefile(extension_name)
@@ -0,0 +1,92 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <iconv.h>
4
+ #include <errno.h>
5
+
6
+ static char *geo_charmap[] = {
7
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
8
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
9
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
10
+ "0" , "1" , "2" , "3" , "4" , "5" , "6" , "7" , "8" , "9" , NULL , NULL , NULL , NULL , NULL , NULL ,
11
+ NULL , "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" , "i" , "j" , "k" , "l" , "m" , "n" , "o" ,
12
+ "p" , "q" , "r" , "s" , "t" , "u" , "v" , "w" , "x" , "y" , "z" , NULL , NULL , NULL , NULL , NULL ,
13
+ NULL , "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" , "i" , "j" , "k" , "l" , "m" , "n" , "o" ,
14
+ "p" , "q" , "r" , "s" , "t" , "u" , "v" , "w" , "x" , "y" , "z" , NULL , NULL , NULL , NULL , NULL ,
15
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
16
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
17
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
18
+ NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL , NULL ,
19
+ "a" , "a" , "a" , "a" , "a" , "a" , "ae" , "c" , "e" , "e" , "e" , "e" , "i" , "i" , "i" , "i" ,
20
+ "d" , "n" , "o" , "o" , "o" , "o" , "o" , "x" , "o" , "u" , "u" , "u" , "u" , "y" , "th" , "ss" ,
21
+ "a" , "a" , "a" , "a" , "a" , "a" , "ae" , "c" , "e" , "e" , "e" , "e" , "i" , "i" , "i" , "i" ,
22
+ "o" , "n" , "o" , "o" , "o" , "o" , "o" , NULL , "o" , "u" , "u" , "u" , "u" , "y" , "th" , "y" ,
23
+ };
24
+
25
+ /*
26
+ * This takes a UTF-8 string like "Rivière-du-Loup" and, optionally,
27
+ * a pre-allocated UTF-8 to ISO-8859-1//IGNORE conversion descriptor,
28
+ * and returns a normalized ascii version "riviereduloup"
29
+ *
30
+ * It's indended for processing maxmind geo data as well as geo-targetting rules before
31
+ * doing strcmp(), to tolerate variations in accent/whitespace/punctuation/capitalization
32
+ *
33
+ * Returns a freshly-malloc()ed string on success, NULL on failure. Be sure to free()
34
+ * result when you no longer need it
35
+ */
36
+ char *
37
+ geo_normalize(char *in, iconv_t cd)
38
+ {
39
+ iconv_t local_cd = NULL;
40
+ char *inptr, *out, *outptr, *latin, *latinptr;
41
+ char *c, *r;
42
+ size_t inlen, latinlen, conv;
43
+
44
+ if (in == NULL)
45
+ return NULL;
46
+ inlen = strlen(in);
47
+
48
+ if (cd == NULL) {
49
+ local_cd = iconv_open("ISO-8859-1//IGNORE", "UTF-8");
50
+ if (local_cd == (iconv_t) -1)
51
+ return NULL;
52
+ cd = local_cd;
53
+ }
54
+
55
+ // Do a single allocation to be used for:
56
+ // [normalized output]\x00[iconv output]\x00
57
+ out = malloc((inlen*4)+2);
58
+ if (out == NULL) {
59
+ if (local_cd != NULL)
60
+ iconv_close(local_cd);
61
+ return NULL;
62
+ }
63
+ latin = out + (inlen*2) + 1;
64
+ latinlen = (inlen*2) + 1;
65
+
66
+ inptr = in;
67
+ latinptr = latin;
68
+ conv = iconv(cd, &inptr, &inlen, &latinptr, &latinlen);
69
+ if (local_cd != NULL)
70
+ iconv_close(local_cd);
71
+
72
+ if (conv == (size_t) -1 && errno == E2BIG) {
73
+ // Failed
74
+ free(out);
75
+ return NULL;
76
+ }
77
+
78
+ // Scan latin and normalize into out
79
+ outptr = out;
80
+ for (c = latin; c < latinptr; c++) {
81
+ if ((r = geo_charmap[(unsigned char)*c]) != NULL) {
82
+ while (*r != 0) {
83
+ *outptr = *r;
84
+ r++;
85
+ outptr++;
86
+ }
87
+ }
88
+ }
89
+ *outptr = 0;
90
+
91
+ return out;
92
+ }
@@ -0,0 +1,36 @@
1
+ #include <ruby.h>
2
+ #include <iconv.h>
3
+
4
+ VALUE GeoNormalize = Qnil;
5
+
6
+ void Init_geo_normalize();
7
+ VALUE method_geo_normalize(VALUE self, VALUE str);
8
+ char *geo_normalize(char *str, iconv_t cd);
9
+
10
+ void Init_geo_normalize() {
11
+ GeoNormalize = rb_define_module("GeoNormalize");
12
+ rb_define_singleton_method(GeoNormalize, "normalize", method_geo_normalize, 1);
13
+ }
14
+
15
+ VALUE method_geo_normalize(VALUE self, VALUE str) {
16
+ if (TYPE(str) != T_STRING)
17
+ rb_raise(rb_eArgError, "normalize needs a string parameter");
18
+
19
+ size_t length = RSTRING_LEN(str);
20
+ char *buffer = malloc(length+1);
21
+ memcpy(buffer, RSTRING_PTR(str), length);
22
+ buffer[length] = 0;
23
+
24
+ char *result = geo_normalize(buffer, NULL);
25
+
26
+ free(buffer);
27
+
28
+ if (result == NULL)
29
+ rb_raise(rb_eStandardError, "geo_normalize call failed");
30
+
31
+ VALUE ret = rb_str_new2(result);
32
+
33
+ free(result);
34
+
35
+ return ret;
36
+ }
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: geo_normalize
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jeremie Lasalle Ratelle
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake-compiler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description:
31
+ email:
32
+ executables: []
33
+ extensions:
34
+ - ext/geo_normalize/extconf.rb
35
+ extra_rdoc_files: []
36
+ files:
37
+ - ext/geo_normalize/geo_normalize.c
38
+ - ext/geo_normalize/ruby_geo_normalize.c
39
+ - ext/geo_normalize/extconf.rb
40
+ homepage:
41
+ licenses: []
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 1.8.25
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: GeoIP city name normalization
64
+ test_files: []