geoipdb 0.5.5-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +18 -0
- data/.jrubyrc +12 -0
- data/.rspec +1 -0
- data/.rvmrc +1 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +20 -0
- data/README.markdown +18 -0
- data/Rakefile +8 -0
- data/ext/geoipdb/extconf.rb +3 -0
- data/ext/geoipdb/geoipdb.c +107 -0
- data/ext/geoipdb/ipdb.c +668 -0
- data/ext/geoipdb/ipdb.h +84 -0
- data/ext/geoipdb/src/City.java +120 -0
- data/ext/geoipdb/src/CsvReader.java +29 -0
- data/ext/geoipdb/src/GeoIpDb.java +101 -0
- data/ext/geoipdb/src/IpRange.java +110 -0
- data/geoipdb.gemspec +28 -0
- data/lib/geoipdb.jar +0 -0
- data/lib/geoipdb.rb +8 -0
- data/lib/ip_information.rb +27 -0
- data/lib/jgeoipdb.rb +43 -0
- data/sample_data/cities.csv +7 -0
- data/sample_data/citiess_corrupt.csv +8 -0
- data/sample_data/ip_ranges.csv +11 -0
- data/sample_data/ip_ranges_corrupt.csv +20 -0
- data/spec/geoipdb_spec.rb +63 -0
- data/spec/spec_helper.rb +12 -0
- data/tasks/compile.rake +8 -0
- data/tasks/rspec.rake +7 -0
- data/tasks/yard.rake +5 -0
- metadata +110 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
*.class
|
2
|
+
*build
|
3
|
+
.bundle
|
4
|
+
.yardoc
|
5
|
+
Gemfile.lock
|
6
|
+
coverage
|
7
|
+
doc
|
8
|
+
ext/Makefile
|
9
|
+
ext/geoipdb/Makefile
|
10
|
+
ext/geoipdb/geoipdb.bundle
|
11
|
+
ext/geoipdb/geoipdb.o
|
12
|
+
ext/geoipdb/ipdb.o
|
13
|
+
ext/geoipdb/test
|
14
|
+
ext/geoipdb/test.o
|
15
|
+
pkg
|
16
|
+
rdoc
|
17
|
+
tags
|
18
|
+
tmp/
|
data/.jrubyrc
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use --create jruby-1.7.2@geoipdb
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 madvertise GmbH
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# geoipdb: fast (in memory!) geo location db.
|
2
|
+
|
3
|
+
Fast (>3 Mio queries/sec!!!) GeoIpDb implementation for Ruby using C/Java-Extensions.
|
4
|
+
|
5
|
+
* Returns a GeoLocation and additional information for a given IP.
|
6
|
+
* Reads Data from CSV-Files and uses internal binary caching.
|
7
|
+
|
8
|
+
## Usage
|
9
|
+
|
10
|
+
db = IpDb.init "city_codes.csv", "ip_city.txt", "ip_city.cache"
|
11
|
+
ip_info = db.information_for_ip("178.0.0.1")
|
12
|
+
ip_info.inspect
|
13
|
+
=> #<IpInformation:0x101385c78 @city_name="eschborn", @city_code="ax5", @lng=8.55, @country_iso_code="de", @lat=50.133333, @is_mobile=true>
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2010 madvertise GmbH. See LICENSE.txt for
|
18
|
+
further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "ipdb.h"
|
3
|
+
|
4
|
+
/**
|
5
|
+
Ruby Wrapper
|
6
|
+
*/
|
7
|
+
|
8
|
+
typedef struct geipdb {
|
9
|
+
IPDB *db;
|
10
|
+
} geoipdb;
|
11
|
+
|
12
|
+
static VALUE cIpDb;
|
13
|
+
|
14
|
+
// free the memory used by the db, called by the Ruby-GC
|
15
|
+
void geoipdb_free(geoipdb *gi) {
|
16
|
+
if(gi == NULL)
|
17
|
+
return;
|
18
|
+
if (gi->db != NULL){
|
19
|
+
if(gi->db->cities != NULL ){
|
20
|
+
free(gi->db->cities);
|
21
|
+
gi->db->cities = NULL;
|
22
|
+
}
|
23
|
+
if(gi->db->ranges != NULL ){
|
24
|
+
free(gi->db->ranges);
|
25
|
+
gi->db->ranges = NULL;
|
26
|
+
}
|
27
|
+
if(gi->db != NULL){
|
28
|
+
free(gi->db);
|
29
|
+
}
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
|
34
|
+
VALUE ipdb_init(VALUE self, VALUE cities_file_name, VALUE ranges_file_name, VALUE cache_file_name) {
|
35
|
+
geoipdb *gi;
|
36
|
+
|
37
|
+
Check_Type(cities_file_name, T_STRING);
|
38
|
+
Check_Type(ranges_file_name, T_STRING);
|
39
|
+
Check_Type(cache_file_name, T_STRING);
|
40
|
+
|
41
|
+
char *cities_csv_file = RSTRING_PTR(cities_file_name);
|
42
|
+
char *ranges_csv_file = RSTRING_PTR(ranges_file_name);
|
43
|
+
char *cache_file = RSTRING_PTR(cache_file_name);
|
44
|
+
|
45
|
+
gi = ALLOC(geoipdb);
|
46
|
+
|
47
|
+
gi->db= init_db(cities_csv_file, ranges_csv_file, cache_file);
|
48
|
+
|
49
|
+
if(gi->db == NULL)
|
50
|
+
{
|
51
|
+
if(DEBUG)
|
52
|
+
printf("Could not init DB!\n");
|
53
|
+
/*
|
54
|
+
TODO: Add geoipdb_free in this case.. though not important for production...
|
55
|
+
*/
|
56
|
+
return Qnil;
|
57
|
+
}else{
|
58
|
+
if(DEBUG)
|
59
|
+
printf("\nDB Init completed!\n");
|
60
|
+
return(Data_Wrap_Struct(cIpDb, 0, geoipdb_free, gi));
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
|
65
|
+
VALUE build_ip_information_object(IpRange *range, City *city, char* isp) {
|
66
|
+
VALUE CIpInformation;
|
67
|
+
|
68
|
+
CIpInformation = rb_const_get(rb_cObject, rb_intern("IpInformation"));
|
69
|
+
|
70
|
+
VALUE ip_information = rb_funcall(CIpInformation, rb_intern("new"), 0);
|
71
|
+
rb_ivar_set(ip_information, rb_intern("@country_iso_code"), rb_str_new2(city->country_iso2) );
|
72
|
+
rb_ivar_set(ip_information, rb_intern("@city_name"), rb_str_new2(city->name) );
|
73
|
+
rb_ivar_set(ip_information, rb_intern("@city_code"), INT2FIX(city->city_code) );
|
74
|
+
rb_ivar_set(ip_information, rb_intern("@lng"), rb_float_new(city->lng) );
|
75
|
+
rb_ivar_set(ip_information, rb_intern("@lat"), rb_float_new(city->lat) );
|
76
|
+
rb_ivar_set(ip_information, rb_intern("@is_mobile"), range->is_mobile == 1 ? Qtrue : Qfalse );
|
77
|
+
rb_ivar_set(ip_information, rb_intern("@isp_name"), isp == NULL ? Qnil : ID2SYM( rb_intern(isp) ) );
|
78
|
+
|
79
|
+
return ip_information;
|
80
|
+
}
|
81
|
+
|
82
|
+
VALUE ipdb_information_for_ip(VALUE self, VALUE ip_string){
|
83
|
+
char *ip = RSTRING_PTR(ip_string);
|
84
|
+
geoipdb *gi;
|
85
|
+
|
86
|
+
Data_Get_Struct(self, geoipdb, gi);
|
87
|
+
|
88
|
+
IpRange* ip_range = find_range_for_ip(gi->db, ip);
|
89
|
+
|
90
|
+
if(!ip_range)
|
91
|
+
return Qnil;
|
92
|
+
|
93
|
+
City * city = find_city_for_ip_range(gi->db, ip_range);
|
94
|
+
if(!city)
|
95
|
+
return Qnil;
|
96
|
+
|
97
|
+
char* isp = find_isp_for_ip_range(gi->db, ip_range);
|
98
|
+
|
99
|
+
return build_ip_information_object(ip_range, city, isp);
|
100
|
+
}
|
101
|
+
|
102
|
+
void Init_geoipdb(void)
|
103
|
+
{
|
104
|
+
cIpDb = rb_define_class( "GeoIpDb", rb_cObject);
|
105
|
+
rb_define_singleton_method( cIpDb, "init", ipdb_init, 3);
|
106
|
+
rb_define_method( cIpDb, "information_for_ip", ipdb_information_for_ip, 1);
|
107
|
+
}
|
data/ext/geoipdb/ipdb.c
ADDED
@@ -0,0 +1,668 @@
|
|
1
|
+
#include "ipdb.h"
|
2
|
+
|
3
|
+
#include <search.h>
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <stdlib.h>
|
7
|
+
#include <sys/time.h>
|
8
|
+
|
9
|
+
const char country_iso2_codes[253][3] = { "--","ap","eu","ad","ae","af","ag","ai","al","am","an",
|
10
|
+
"ao","aq","ar","as","at","au","aw","az","ba","bb",
|
11
|
+
"bd","be","bf","bg","bh","bi","bj","bm","bn","bo",
|
12
|
+
"br","bs","bt","bv","bw","by","bz","ca","cc","cd",
|
13
|
+
"cf","cg","ch","ci","ck","cl","cm","cn","co","cr",
|
14
|
+
"cu","cv","cx","cy","cz","de","dj","dk","dm","do",
|
15
|
+
"dz","ec","ee","eg","eh","er","es","et","fi","fj",
|
16
|
+
"fk","fm","fo","fr","fx","ga","gb","gd","ge","gf",
|
17
|
+
"gh","gi","gl","gm","gn","gp","gq","gr","gs","gt",
|
18
|
+
"gu","gw","gy","hk","hm","hn","hr","ht","hu","id",
|
19
|
+
"ie","il","in","io","iq","ir","is","it","jm","jo",
|
20
|
+
"jp","ke","kg","kh","ki","km","kn","kp","kr","kw",
|
21
|
+
"ky","kz","la","lb","lc","li","lk","lr","ls","lt",
|
22
|
+
"lu","lv","ly","ma","mc","md","mg","mh","mk","ml",
|
23
|
+
"mm","mn","mo","mp","mq","mr","ms","mt","mu","mv",
|
24
|
+
"mw","mx","my","mz","na","nc","ne","nf","ng","ni",
|
25
|
+
"nl","no","np","nr","nu","nz","om","pa","pe","pf",
|
26
|
+
"pg","ph","pk","pl","pm","pn","pr","ps","pt","pw",
|
27
|
+
"py","qa","re","ro","ru","rw","sa","sb","sc","sd",
|
28
|
+
"se","sg","sh","si","sj","sk","sl","sm","sn","so",
|
29
|
+
"sr","st","sv","sy","sz","tc","td","tf","tg","th",
|
30
|
+
"tj","tk","tm","tn","to","tl","tr","tt","tv","tw",
|
31
|
+
"tz","ua","ug","um","us","uy","uz","va","vc","ve",
|
32
|
+
"vg","vi","vn","vu","wf","ws","ye","yt","rs","za",
|
33
|
+
"zm","me","zw","a1","a2","o1","ax","gg","im","je",
|
34
|
+
"bl","mf"};
|
35
|
+
|
36
|
+
static const unsigned num_countries = (unsigned)(sizeof(country_iso2_codes)/sizeof(country_iso2_codes[0]));
|
37
|
+
|
38
|
+
const char country_iso3_codes[253][4] = { "--","ap","eu","and","are","afg","atg","aia","alb","arm","ant",
|
39
|
+
"ago","aq","arg","asm","aut","aus","abw","aze","bih","brb",
|
40
|
+
"bgd","bel","bfa","bgr","bhr","bdi","ben","bmu","brn","bol",
|
41
|
+
"bra","bhs","btn","bv","bwa","blr","blz","can","cc","cod",
|
42
|
+
"caf","cog","che","civ","cok","chl","cmr","chn","col","cri",
|
43
|
+
"cub","cpv","cx","cyp","cze","deu","dji","dnk","dma","dom",
|
44
|
+
"dza","ecu","est","egy","esh","eri","esp","eth","fin","fji",
|
45
|
+
"flk","fsm","fro","fra","fx","gab","gbr","grd","geo","guf",
|
46
|
+
"gha","gib","grl","gmb","gin","glp","gnq","grc","gs","gtm",
|
47
|
+
"gum","gnb","guy","hkg","hm","hnd","hrv","hti","hun","idn",
|
48
|
+
"irl","isr","ind","io","irq","irn","isl","ita","jam","jor",
|
49
|
+
"jpn","ken","kgz","khm","kir","com","kna","prk","kor","kwt",
|
50
|
+
"cym","kaz","lao","lbn","lca","lie","lka","lbr","lso","ltu",
|
51
|
+
"lux","lva","lby","mar","mco","mda","mdg","mhl","mkd","mli",
|
52
|
+
"mmr","mng","mac","mnp","mtq","mrt","msr","mlt","mus","mdv",
|
53
|
+
"mwi","mex","mys","moz","nam","ncl","ner","nfk","nga","nic",
|
54
|
+
"nld","nor","npl","nru","niu","nzl","omn","pan","per","pyf",
|
55
|
+
"png","phl","pak","pol","spm","pcn","pri","pse","prt","plw",
|
56
|
+
"pry","qat","reu","rou","rus","rwa","sau","slb","syc","sdn",
|
57
|
+
"swe","sgp","shn","svn","sjm","svk","sle","smr","sen","som",
|
58
|
+
"sur","stp","slv","syr","swz","tca","tcd","tf","tgo","tha",
|
59
|
+
"tjk","tkl","tkm","tun","ton","tls","tur","tto","tuv","twn",
|
60
|
+
"tza","ukr","uga","um","usa","ury","uzb","vat","vct","ven",
|
61
|
+
"vgb","vir","vnm","vut","wlf","wsm","yem","yt","srb","zaf",
|
62
|
+
"zmb","mne","zwe","a1","a2","o1","ala","ggy","imn","jey",
|
63
|
+
"blm","maf"};
|
64
|
+
|
65
|
+
|
66
|
+
void
|
67
|
+
print_range(const IpRange* e){
|
68
|
+
printf( "from: %lu, to:%lu ->City-idx: %i \n",e->from, e->to,e->city_index );
|
69
|
+
}
|
70
|
+
|
71
|
+
void
|
72
|
+
print_ranges(IPDB * db){
|
73
|
+
int i;
|
74
|
+
for(i = 0; i < db->ranges_count; ++i)
|
75
|
+
{
|
76
|
+
print_range(&(db->ranges[i]));
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
void
|
81
|
+
print_city(const City * e){
|
82
|
+
if(e == NULL)
|
83
|
+
{
|
84
|
+
return;
|
85
|
+
}
|
86
|
+
printf( "City: code:%i, name:%s, country: %s, lat: %10.7f, lng: %10.7f \n",e->city_code, e->name, e->country_iso3, e->lat, e->lng );
|
87
|
+
}
|
88
|
+
|
89
|
+
void
|
90
|
+
print_cities(IPDB * db){
|
91
|
+
int i;
|
92
|
+
for(i = 0; i < db->cities_count; ++i)
|
93
|
+
{
|
94
|
+
print_city(&(db->cities[i]));
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
void print_stats(IPDB * db){
|
99
|
+
printf("DB STATS: \n");
|
100
|
+
printf("\tCities: %i\n", db->cities_count);
|
101
|
+
printf("\tRanges: %i\n", db->ranges_count);
|
102
|
+
}
|
103
|
+
|
104
|
+
double
|
105
|
+
get_time(struct timeval *tim){
|
106
|
+
gettimeofday(tim, NULL);
|
107
|
+
return tim->tv_sec+(tim->tv_usec/1000000.0);
|
108
|
+
}
|
109
|
+
|
110
|
+
|
111
|
+
unsigned long
|
112
|
+
ip_to_int(const char *addr){
|
113
|
+
unsigned int c, octet, t;
|
114
|
+
unsigned long ipnum;
|
115
|
+
int i = 3;
|
116
|
+
|
117
|
+
octet = ipnum = 0;
|
118
|
+
while ((c = *addr++)) {
|
119
|
+
if (c == '.') {
|
120
|
+
if (octet > 255)
|
121
|
+
return 0;
|
122
|
+
ipnum <<= 8;
|
123
|
+
ipnum += octet;
|
124
|
+
i--;
|
125
|
+
octet = 0;
|
126
|
+
} else {
|
127
|
+
t = octet;
|
128
|
+
octet <<= 3;
|
129
|
+
octet += t;
|
130
|
+
octet += t;
|
131
|
+
c -= '0';
|
132
|
+
if (c > 9)
|
133
|
+
return 0;
|
134
|
+
octet += c;
|
135
|
+
}
|
136
|
+
}
|
137
|
+
if ((octet > 255) || (i != 0))
|
138
|
+
return 0;
|
139
|
+
ipnum <<= 8;
|
140
|
+
return ipnum + octet;
|
141
|
+
}
|
142
|
+
|
143
|
+
|
144
|
+
unsigned char con_type_to_int(char* con_type) {
|
145
|
+
// possible values
|
146
|
+
// ?
|
147
|
+
// dialup
|
148
|
+
// broadband
|
149
|
+
// cable
|
150
|
+
// xdsl
|
151
|
+
// mobile
|
152
|
+
// t1
|
153
|
+
// t3
|
154
|
+
// oc3
|
155
|
+
// oc12
|
156
|
+
// satellite
|
157
|
+
// wireless
|
158
|
+
if(strlen(con_type) > 0 && (con_type[0] == 'm'))
|
159
|
+
return 1;
|
160
|
+
return 0;
|
161
|
+
}
|
162
|
+
|
163
|
+
// Function to compare
|
164
|
+
// - either two ip-ranges: i.e.: a(from...to) <=> b(from...to)
|
165
|
+
// - or a ip(i.e. range without to) and an ip-range: i.e. a(from...NULL) <=> b(from...to); a(from...to) <=> b(from ... NULL)
|
166
|
+
int compare_ranges(const void *fa, const void *fb) {
|
167
|
+
if(fa == NULL)
|
168
|
+
{
|
169
|
+
if(DEBUG){printf("FA IS NULL!!!\n");}
|
170
|
+
return 0;
|
171
|
+
}
|
172
|
+
if(fb == NULL)
|
173
|
+
{
|
174
|
+
if(DEBUG){printf("FB IS NULL!!!\n");}
|
175
|
+
return 0;
|
176
|
+
}
|
177
|
+
|
178
|
+
|
179
|
+
const IpRange *a = (IpRange *) fa;
|
180
|
+
const IpRange *b = (IpRange *) fb;
|
181
|
+
|
182
|
+
if(a->from>0 && a->to>0 && b->from>0 && b->to>0){ //regular case: both entries are ranges
|
183
|
+
if(a->to < b->from) {
|
184
|
+
return -1;
|
185
|
+
}else if(a->from > b->to){
|
186
|
+
return +1;
|
187
|
+
}else{
|
188
|
+
return 0;
|
189
|
+
}
|
190
|
+
}else if(a->to == 0 && b->to>0){//a is a search_object
|
191
|
+
if(a->from < b->from) {
|
192
|
+
return -1;
|
193
|
+
}else if(a->from > b->to){
|
194
|
+
return +1;
|
195
|
+
}else{
|
196
|
+
return 0;
|
197
|
+
}
|
198
|
+
}else if(b->to == 0 && a->to>0){//b is a search_object
|
199
|
+
if(b->from < a->from){
|
200
|
+
return -1;
|
201
|
+
}else if(b->from > a->to){
|
202
|
+
return +1;
|
203
|
+
}else{
|
204
|
+
return 0;
|
205
|
+
}
|
206
|
+
}else if(a->to == 0 && b->to == 0){ //both are search objects - this should not happen!
|
207
|
+
return a->from - b->from;
|
208
|
+
}
|
209
|
+
return 0;
|
210
|
+
}
|
211
|
+
|
212
|
+
|
213
|
+
int
|
214
|
+
compare_cities(const void *a, const void *b){
|
215
|
+
const City city_a = *(City*)a;
|
216
|
+
const City city_b = * (City*) b;
|
217
|
+
// sort cities by city_code
|
218
|
+
return city_a.city_code - city_b.city_code;
|
219
|
+
}
|
220
|
+
|
221
|
+
void
|
222
|
+
sort_cities(IPDB * db){
|
223
|
+
if(DEBUG)
|
224
|
+
printf("Sorting %i Cities in db...\n", db->cities_count);
|
225
|
+
|
226
|
+
struct timeval tim;
|
227
|
+
double t1 = get_time(&tim);
|
228
|
+
|
229
|
+
qsort(db->cities,db->cities_count,sizeof(City), compare_cities);
|
230
|
+
if(DEBUG)
|
231
|
+
printf("\n Sorting cities needed %.6lf seconds\n", get_time(&tim)-t1);
|
232
|
+
}
|
233
|
+
|
234
|
+
|
235
|
+
int // returns a city-index
|
236
|
+
city_index_by_code(IPDB * db, int city_code){
|
237
|
+
City *search, *result;
|
238
|
+
search = malloc(sizeof(City));
|
239
|
+
search->city_code = city_code;
|
240
|
+
result = (City*) bsearch(search, db->cities, db->cities_count, sizeof(City), compare_cities);
|
241
|
+
|
242
|
+
if(search != NULL)
|
243
|
+
free(search);
|
244
|
+
|
245
|
+
if(result == NULL)
|
246
|
+
{
|
247
|
+
if(DEBUG)
|
248
|
+
printf("Could not find searched city with code: %i \n", city_code);
|
249
|
+
return -1;
|
250
|
+
}else{
|
251
|
+
int index;
|
252
|
+
index = (result - db->cities);
|
253
|
+
return index;
|
254
|
+
}
|
255
|
+
}
|
256
|
+
|
257
|
+
|
258
|
+
IpRange* find_range_for_ip(IPDB *db, char *ip) {
|
259
|
+
IpRange* search;
|
260
|
+
IpRange* result;
|
261
|
+
search = (IpRange *)malloc(sizeof(IpRange));
|
262
|
+
|
263
|
+
if(db == NULL)
|
264
|
+
{
|
265
|
+
if(DEBUG){printf("ERROR: DB ist NULL! \n");}
|
266
|
+
return NULL;
|
267
|
+
}
|
268
|
+
|
269
|
+
if(db->ranges_count == 0)
|
270
|
+
{
|
271
|
+
if(DEBUG){printf("ERROR: DB has no Ranges Data. Can not search!\n");}
|
272
|
+
return NULL;
|
273
|
+
}
|
274
|
+
|
275
|
+
search->from = ip_to_int(ip);
|
276
|
+
search->to=0;
|
277
|
+
search->city_index = 0;
|
278
|
+
if(DEBUG)
|
279
|
+
printf("Searching for: ip=%s, ipnum=%lu \n", ip, search->from);
|
280
|
+
result = (IpRange*)bsearch(search, db->ranges, db->ranges_count, sizeof(IpRange), compare_ranges);
|
281
|
+
if(search != NULL)
|
282
|
+
free(search);
|
283
|
+
|
284
|
+
if(result == NULL)
|
285
|
+
{
|
286
|
+
if(DEBUG)
|
287
|
+
printf("ERROR: Could not find the IP: %s! THIS SHOULD NOT HAPPEN!\n", ip);
|
288
|
+
return NULL;
|
289
|
+
} else {
|
290
|
+
if(DEBUG) {
|
291
|
+
printf("Found Range: \t");
|
292
|
+
print_range(result);
|
293
|
+
}
|
294
|
+
return (IpRange*)result;
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
298
|
+
City * find_city_for_ip_range(IPDB * db, IpRange* range)
|
299
|
+
{
|
300
|
+
if(!db || !range)
|
301
|
+
return NULL;
|
302
|
+
|
303
|
+
if(db->cities_count == 0)
|
304
|
+
{
|
305
|
+
if(DEBUG)
|
306
|
+
printf("ERROR: DB has no City Data. Can not search!\n");
|
307
|
+
return NULL;
|
308
|
+
}
|
309
|
+
|
310
|
+
if( range->city_index <= 0 || range->city_index >= db->cities_count )
|
311
|
+
{
|
312
|
+
if(DEBUG)
|
313
|
+
printf("ERROR: Could not find city with index: %i - THIS SHOULD NOT HAPPEN!\n", range->city_index);
|
314
|
+
}
|
315
|
+
|
316
|
+
return &(db->cities[range->city_index]);
|
317
|
+
}
|
318
|
+
|
319
|
+
char* find_isp_for_ip_range(IPDB * db, IpRange* range)
|
320
|
+
{
|
321
|
+
if( range == NULL || range->isp_index < 0){
|
322
|
+
if(DEBUG){printf("Could not find isp for isp_index=%i\n", range->isp_index);}
|
323
|
+
return NULL;
|
324
|
+
}
|
325
|
+
return db->isps[range->isp_index];
|
326
|
+
}
|
327
|
+
|
328
|
+
int16
|
329
|
+
isp_index_by_name(IPDB * db, char* isp_name){
|
330
|
+
if(isp_name == NULL || isp_name == "")
|
331
|
+
return -1;
|
332
|
+
if( db->isps_count > 0){
|
333
|
+
int16 i = 0;
|
334
|
+
for( i = 0; i < db->isps_count; i++)
|
335
|
+
{
|
336
|
+
if( strcmp(db->isps[i], isp_name)==0)
|
337
|
+
{
|
338
|
+
return i;
|
339
|
+
}
|
340
|
+
}
|
341
|
+
}
|
342
|
+
// add new isp
|
343
|
+
if(db->isps_count < MAX_ISPS_COUNT){
|
344
|
+
int16 new_index = db->isps_count;
|
345
|
+
strncpy(db->isps[new_index], isp_name, MAX_ISP_NAME_LENGTH);
|
346
|
+
db->isps_count++;
|
347
|
+
return new_index;
|
348
|
+
}else{
|
349
|
+
if(DEBUG){printf("ERROR: MAX_ISPS_COUNT = %i limit reached - this should not happen!\n", MAX_ISPS_COUNT);}
|
350
|
+
return -1;
|
351
|
+
}
|
352
|
+
}
|
353
|
+
|
354
|
+
|
355
|
+
|
356
|
+
// read ip-ranges from csv file, of format:
|
357
|
+
// from_ip|to_ip|contype|city_code
|
358
|
+
void
|
359
|
+
read_ranges_csv(IPDB * db){
|
360
|
+
struct timeval tim;
|
361
|
+
double t1 = get_time(&tim);
|
362
|
+
|
363
|
+
db->ranges = malloc(sizeof(IpRange) * db->max_ranges_count);
|
364
|
+
|
365
|
+
if(DEBUG)
|
366
|
+
printf("Parsing RANGES-CSV-file: %s\n", db->ranges_csv_file);
|
367
|
+
FILE * f = fopen(db->ranges_csv_file, "rt");
|
368
|
+
if(f == NULL)
|
369
|
+
{
|
370
|
+
if(DEBUG)
|
371
|
+
printf("Could not open the CSV-file: %s\n", db->ranges_csv_file);
|
372
|
+
return;
|
373
|
+
}
|
374
|
+
char line[256];
|
375
|
+
char* from;
|
376
|
+
char* to;
|
377
|
+
char* city_code;
|
378
|
+
int city_index;
|
379
|
+
|
380
|
+
char* con_type;
|
381
|
+
char* isp_name;
|
382
|
+
uint16 isp_index;
|
383
|
+
|
384
|
+
int invalid_cities_count = 0;
|
385
|
+
|
386
|
+
IpRange* entry;
|
387
|
+
db->ranges_count = 0;
|
388
|
+
while (fgets(line, sizeof(line) ,f) && db->ranges_count < db->max_ranges_count){
|
389
|
+
from = NULL;
|
390
|
+
to = NULL;
|
391
|
+
city_code = NULL;
|
392
|
+
city_index = 0;
|
393
|
+
|
394
|
+
con_type = NULL;
|
395
|
+
isp_name = NULL;
|
396
|
+
int16 isp_index = -1;
|
397
|
+
|
398
|
+
if(DEBUG && db->ranges_count % 1000000 == 0)
|
399
|
+
printf("Worked lines: %i\n", db->ranges_count);
|
400
|
+
|
401
|
+
from = strtok(line, RANGES_DELIM);
|
402
|
+
to = strtok(NULL, RANGES_DELIM);
|
403
|
+
con_type = strtok(NULL, RANGES_DELIM);
|
404
|
+
city_code = strtok(NULL, RANGES_DELIM);
|
405
|
+
isp_name = strtok(NULL, RANGES_DELIM);
|
406
|
+
|
407
|
+
city_index = city_index_by_code(db, atoi(city_code));
|
408
|
+
isp_index = isp_index_by_name(db, isp_name);
|
409
|
+
|
410
|
+
if(city_index < 0)
|
411
|
+
{
|
412
|
+
if(DEBUG)
|
413
|
+
printf("Could not find city for code: %i\n", atoi(city_code));
|
414
|
+
invalid_cities_count ++;
|
415
|
+
continue;
|
416
|
+
}else{
|
417
|
+
entry = &(db->ranges[db->ranges_count]);
|
418
|
+
entry->from = ip_to_int(from);
|
419
|
+
entry->to = ip_to_int(to);
|
420
|
+
entry->is_mobile = con_type_to_int(con_type);
|
421
|
+
entry->city_index = city_index;
|
422
|
+
entry->isp_index = isp_index;
|
423
|
+
|
424
|
+
db->ranges_count++;
|
425
|
+
}
|
426
|
+
}
|
427
|
+
if(DEBUG)
|
428
|
+
{
|
429
|
+
if(invalid_cities_count ){printf("Found invalid cities: %i\n", invalid_cities_count);}
|
430
|
+
printf("\n Parsing of %i records needed %.6lf seconds\n", db->ranges_count, get_time(&tim)-t1);
|
431
|
+
}
|
432
|
+
}
|
433
|
+
|
434
|
+
|
435
|
+
|
436
|
+
//translate country iso3 to iso2
|
437
|
+
char *
|
438
|
+
iso2_code(char* iso3){
|
439
|
+
int i = 0;
|
440
|
+
for( i = 0; i < num_countries; i++)
|
441
|
+
{
|
442
|
+
if( strcmp(country_iso3_codes[i],iso3)==0)
|
443
|
+
{
|
444
|
+
return (char*) country_iso2_codes[i];
|
445
|
+
}
|
446
|
+
}
|
447
|
+
return (char*) country_iso2_codes[0];
|
448
|
+
}
|
449
|
+
|
450
|
+
//read city-data from csv-file of format:
|
451
|
+
// COUNTRY,REGION,CITY-NAME,METRO-CODE,CITY-CODE,LATITUDE,LONGITUDE
|
452
|
+
void
|
453
|
+
read_cities_csv(IPDB * db){
|
454
|
+
struct timeval tim;
|
455
|
+
double t1 = get_time(&tim);
|
456
|
+
|
457
|
+
db->cities_count = 0;
|
458
|
+
db->cities = malloc(sizeof(City) * db->max_cities_count);
|
459
|
+
|
460
|
+
if(DEBUG)
|
461
|
+
printf("Parsing Cities-CSV-file: %s\n", db->cities_csv_file);
|
462
|
+
FILE * f = fopen(db->cities_csv_file, "rt");
|
463
|
+
if(f == NULL)
|
464
|
+
{
|
465
|
+
if(DEBUG)
|
466
|
+
printf("Could not open the Cities-CSV-file: %s\n", db->cities_csv_file);
|
467
|
+
return;
|
468
|
+
}
|
469
|
+
char line[256];
|
470
|
+
char *country, *region, *name,*metro_code,*city_code,*lat,*lng ;
|
471
|
+
int i = 0;
|
472
|
+
City* entry;
|
473
|
+
|
474
|
+
while (fgets(line,sizeof(line),f) && db->cities_count < db->max_cities_count){
|
475
|
+
i++;
|
476
|
+
if(i == 1)
|
477
|
+
continue;//skip the header
|
478
|
+
|
479
|
+
if(DEBUG && i % 1000000 == 0)
|
480
|
+
{
|
481
|
+
printf("Worked lines: %i\n", i);
|
482
|
+
}
|
483
|
+
// COUNTRY,REGION,CITY-NAME,METRO-CODE,CITY-CODE,LATITUDE,LONGITUDE
|
484
|
+
country = strtok(line, CITIES_DELIM);
|
485
|
+
region = strtok(NULL, CITIES_DELIM);
|
486
|
+
name = strtok(NULL, CITIES_DELIM);
|
487
|
+
metro_code = strtok(NULL, CITIES_DELIM);
|
488
|
+
city_code = strtok(NULL, CITIES_DELIM);
|
489
|
+
lat = strtok(NULL, CITIES_DELIM);
|
490
|
+
lng = strtok(NULL, CITIES_DELIM);
|
491
|
+
|
492
|
+
entry = &(db->cities[db->cities_count]);
|
493
|
+
|
494
|
+
strncpy(entry->country_iso3, country, strlen(country));
|
495
|
+
|
496
|
+
strncpy(entry->country_iso2, iso2_code(country), 2);
|
497
|
+
strncpy(entry->name, name, strlen(name));
|
498
|
+
|
499
|
+
entry->city_code = atoi(city_code);
|
500
|
+
entry->lat = atof(lat);
|
501
|
+
entry->lng = atof(lng);
|
502
|
+
db->cities_count++;
|
503
|
+
}
|
504
|
+
if(DEBUG)
|
505
|
+
printf("\n Parsing of %i records needed %.6lf seconds\n", db->cities_count, get_time(&tim)-t1);
|
506
|
+
}
|
507
|
+
|
508
|
+
/**
|
509
|
+
cache-file is an exact binary copy of the ranges+cities-arrays from memory,
|
510
|
+
the layout goes like this:
|
511
|
+
db->cities_count [4 Bytes]
|
512
|
+
db->ranges_count [4 Bytes]
|
513
|
+
|
514
|
+
db->cities [sizeof(City)=24 x db->ranges_count Bytes]
|
515
|
+
db->ranges [sizeof(IpRange)=24 x db->ranges_count Bytes]
|
516
|
+
*/
|
517
|
+
void
|
518
|
+
write_cache_file(IPDB * db){
|
519
|
+
struct timeval tim;
|
520
|
+
double t1 = get_time(&tim);
|
521
|
+
int objects_written;
|
522
|
+
|
523
|
+
FILE * f;
|
524
|
+
f = fopen(db->cache_file_name, "w");
|
525
|
+
if(f==NULL){
|
526
|
+
if(DEBUG)
|
527
|
+
printf("Could not open Cache-File: %s\n", db->cache_file_name);
|
528
|
+
return;
|
529
|
+
}
|
530
|
+
if(DEBUG){
|
531
|
+
printf("Dumping %i records to cache-file: %s\n\n", db->ranges_count, db->cache_file_name);
|
532
|
+
|
533
|
+
//write the record length at file header
|
534
|
+
printf("Writing DB-Header of length: %li\n",sizeof(db->ranges_count));
|
535
|
+
|
536
|
+
printf("RecordLength: %li\n",sizeof(IpRange));
|
537
|
+
printf("FieldLength: %li\n",sizeof(db->ranges[0].from));
|
538
|
+
}
|
539
|
+
//write the header: i.e.: numbers of records
|
540
|
+
fwrite(&(db->cities_count), sizeof(db->cities_count),1,f);
|
541
|
+
fwrite(&(db->isps_count), sizeof(db->isps_count),1,f);
|
542
|
+
fwrite(&(db->ranges_count), sizeof(db->ranges_count),1,f);
|
543
|
+
|
544
|
+
if(DEBUG)
|
545
|
+
printf("Writing Contents with %i cities, a %li bytes each, should = %li \n", db->cities_count, sizeof(City), db->cities_count * sizeof(City));
|
546
|
+
//write the actual data: all the ranges-array-buffer:
|
547
|
+
objects_written = fwrite(db->cities, sizeof(City), db->cities_count, f);
|
548
|
+
|
549
|
+
if(DEBUG)
|
550
|
+
printf("Writing Contents with %i isps, a %i bytes each, should = %i \n", db->isps_count, MAX_ISP_NAME_LENGTH, db->isps_count * MAX_ISP_NAME_LENGTH);
|
551
|
+
//write the actual data: all the ranges-array-buffer:
|
552
|
+
objects_written += fwrite(db->isps, MAX_ISP_NAME_LENGTH, db->isps_count, f);
|
553
|
+
|
554
|
+
if(DEBUG)
|
555
|
+
printf("Writing Contents with %i ranges, a %li bytes each, should = %li \n", db->ranges_count, sizeof(IpRange), db->ranges_count * sizeof(IpRange));
|
556
|
+
//write the actual data: all the ranges-array-buffer:
|
557
|
+
objects_written += fwrite(db->ranges, sizeof(IpRange), db->ranges_count, f);
|
558
|
+
|
559
|
+
|
560
|
+
fclose(f);
|
561
|
+
if(DEBUG)
|
562
|
+
printf("\n Writing CacheFile of %i objects needed %.6lf seconds\n", objects_written, get_time(&tim)-t1);
|
563
|
+
}
|
564
|
+
|
565
|
+
int
|
566
|
+
read_cache_file(IPDB * db){
|
567
|
+
struct timeval tim;
|
568
|
+
double t1 = get_time(&tim);
|
569
|
+
FILE * f;
|
570
|
+
f = fopen(db->cache_file_name, "r");
|
571
|
+
if(f==NULL){
|
572
|
+
if(DEBUG)
|
573
|
+
printf("Could not open Cache-File: %s\n", db->cache_file_name);
|
574
|
+
return 0;
|
575
|
+
}
|
576
|
+
int cities_header_read = fread(&(db->cities_count), sizeof(db->cities_count),1,f);
|
577
|
+
int isps_header_read = fread(&(db->isps_count), sizeof(db->isps_count),1,f);
|
578
|
+
int ranges_header_read = fread(&(db->ranges_count), sizeof(db->ranges_count),1,f);
|
579
|
+
|
580
|
+
|
581
|
+
if(cities_header_read == 0 || isps_header_read == 0 || ranges_header_read == 0 || db->cities_count == 0 || db->isps_count ==0 || db->ranges_count ==0)
|
582
|
+
{
|
583
|
+
if(DEBUG){printf("Could not read Cities-Header from Cache-File: %s\n", db->cache_file_name);}
|
584
|
+
return 0;
|
585
|
+
}
|
586
|
+
if(DEBUG)
|
587
|
+
printf("Reading DB-Header from Cache-File: %s, with %i cities, %iisps and %i ranges\n",db->cache_file_name, db->cities_count, db->isps_count, db->ranges_count);
|
588
|
+
|
589
|
+
int objects_read = 0;
|
590
|
+
if(DEBUG)
|
591
|
+
printf("Allocating: %lu for cities-array \n", sizeof(City)*(db->cities_count));
|
592
|
+
db->cities = malloc(sizeof(City) * db->cities_count);
|
593
|
+
objects_read += fread(db->cities, sizeof(City),db->cities_count,f);
|
594
|
+
|
595
|
+
if(DEBUG)
|
596
|
+
printf("Reading in the isps into preallocated buffer of size: %lu\n", sizeof(db->isps));
|
597
|
+
objects_read += fread(db->isps, MAX_ISP_NAME_LENGTH, db->isps_count,f);
|
598
|
+
|
599
|
+
if(DEBUG)
|
600
|
+
printf("Allocating: %lu for ranges-array \n", sizeof(IpRange)*(db->ranges_count));
|
601
|
+
db->ranges = malloc(sizeof(IpRange) * db->ranges_count);
|
602
|
+
objects_read += fread(db->ranges, sizeof(IpRange),db->ranges_count,f);
|
603
|
+
|
604
|
+
|
605
|
+
fclose(f);
|
606
|
+
if(DEBUG)
|
607
|
+
printf("Reading cacheFile of %i objects needed %.6lf seconds\n", objects_read, get_time(&tim)-t1);
|
608
|
+
return objects_read;
|
609
|
+
}
|
610
|
+
|
611
|
+
void
|
612
|
+
benchmark_search(IPDB * db,int count){
|
613
|
+
printf("(Naiv) benchmark of the City-Search-Function with %i counts \n", count);
|
614
|
+
struct timeval tim;
|
615
|
+
double t1 = get_time(&tim);
|
616
|
+
int i;
|
617
|
+
City * city;
|
618
|
+
|
619
|
+
for(i=0;i<count; i++){
|
620
|
+
IpRange* range = find_range_for_ip(db,"278.50.47.0");
|
621
|
+
City* city = find_city_for_ip_range(db,range);
|
622
|
+
}
|
623
|
+
double delta = get_time(&tim)-t1;
|
624
|
+
|
625
|
+
printf("\n\nSearch: %.6lf seconds elapsed, i.e. %.6lf Ops/Second \n", delta, count / delta);
|
626
|
+
}
|
627
|
+
|
628
|
+
IPDB * init_db(char * cities_csv_file, char * ranges_csv_file, char * cache_file_name){
|
629
|
+
if(DEBUG)
|
630
|
+
printf("Initializing db\n");
|
631
|
+
IPDB *db;
|
632
|
+
db = (IPDB*)malloc(sizeof(IPDB));
|
633
|
+
if (db == NULL) //no memory left
|
634
|
+
return NULL;
|
635
|
+
db->cities = NULL;
|
636
|
+
db->ranges = NULL;
|
637
|
+
db->cache_file_name = cache_file_name;
|
638
|
+
|
639
|
+
db->cities_csv_file = cities_csv_file;
|
640
|
+
db->max_cities_count = MAX_CITIES_COUNT;
|
641
|
+
db->ranges_csv_file = ranges_csv_file;
|
642
|
+
db->max_ranges_count = MAX_RANGES_COUNT;
|
643
|
+
|
644
|
+
db->isps_count = 0;
|
645
|
+
|
646
|
+
if(USE_CACHE && read_cache_file(db) > 0){
|
647
|
+
if(DEBUG)
|
648
|
+
printf("Loaded DB from Cache-File with %i records \n", db->ranges_count);
|
649
|
+
}else{
|
650
|
+
if(DEBUG)
|
651
|
+
printf("Initializing IPDB from CSV-file: %s \n", db->ranges_csv_file);
|
652
|
+
read_cities_csv(db);
|
653
|
+
if(db->cities_count == 0)
|
654
|
+
{
|
655
|
+
return NULL;
|
656
|
+
}
|
657
|
+
sort_cities(db);
|
658
|
+
read_ranges_csv(db);
|
659
|
+
if(db!=NULL && db->ranges_count > 0 && USE_CACHE)
|
660
|
+
{
|
661
|
+
if(DEBUG)
|
662
|
+
printf("Got %i records from CSV-file, writing to cache...\n", db->ranges_count);
|
663
|
+
write_cache_file(db);
|
664
|
+
}
|
665
|
+
}
|
666
|
+
return db;
|
667
|
+
}
|
668
|
+
|