maxmind-geoip2 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +7 -0
  3. data/Gemfile.lock +5 -3
  4. data/README.md +69 -0
  5. data/lib/maxmind/geoip2.rb +0 -13
  6. data/lib/maxmind/geoip2/model/abstract.rb +27 -0
  7. data/lib/maxmind/geoip2/model/anonymous_ip.rb +63 -0
  8. data/lib/maxmind/geoip2/model/asn.rb +39 -0
  9. data/lib/maxmind/geoip2/model/city.rb +28 -13
  10. data/lib/maxmind/geoip2/model/connection_type.rb +32 -0
  11. data/lib/maxmind/geoip2/model/country.rb +17 -9
  12. data/lib/maxmind/geoip2/model/domain.rb +32 -0
  13. data/lib/maxmind/geoip2/model/enterprise.rb +1 -1
  14. data/lib/maxmind/geoip2/model/isp.rb +53 -0
  15. data/lib/maxmind/geoip2/reader.rb +176 -59
  16. data/lib/maxmind/geoip2/record/abstract.rb +2 -1
  17. data/lib/maxmind/geoip2/record/city.rb +11 -6
  18. data/lib/maxmind/geoip2/record/continent.rb +11 -6
  19. data/lib/maxmind/geoip2/record/country.rb +17 -8
  20. data/lib/maxmind/geoip2/record/location.rb +18 -8
  21. data/lib/maxmind/geoip2/record/place.rb +5 -3
  22. data/lib/maxmind/geoip2/record/postal.rb +6 -2
  23. data/lib/maxmind/geoip2/record/represented_country.rb +4 -2
  24. data/lib/maxmind/geoip2/record/subdivision.rb +14 -10
  25. data/lib/maxmind/geoip2/record/traits.rb +52 -21
  26. data/maxmind-geoip2.gemspec +3 -1
  27. data/test/data/LICENSE +4 -0
  28. data/test/data/MaxMind-DB-spec.md +570 -0
  29. data/test/data/MaxMind-DB-test-metadata-pointers.mmdb +0 -0
  30. data/test/data/README.md +4 -0
  31. data/test/data/bad-data/README.md +7 -0
  32. data/test/data/bad-data/libmaxminddb/libmaxminddb-offset-integer-overflow.mmdb +0 -0
  33. data/test/data/bad-data/maxminddb-golang/cyclic-data-structure.mmdb +0 -0
  34. data/test/data/bad-data/maxminddb-golang/invalid-bytes-length.mmdb +1 -0
  35. data/test/data/bad-data/maxminddb-golang/invalid-data-record-offset.mmdb +0 -0
  36. data/test/data/bad-data/maxminddb-golang/invalid-map-key-length.mmdb +0 -0
  37. data/test/data/bad-data/maxminddb-golang/invalid-string-length.mmdb +1 -0
  38. data/test/data/bad-data/maxminddb-golang/metadata-is-an-uint128.mmdb +1 -0
  39. data/test/data/bad-data/maxminddb-golang/unexpected-bytes.mmdb +0 -0
  40. data/test/data/perltidyrc +12 -0
  41. data/test/data/source-data/GeoIP2-Anonymous-IP-Test.json +48 -0
  42. data/test/data/source-data/GeoIP2-City-Test.json +12852 -0
  43. data/test/data/source-data/GeoIP2-Connection-Type-Test.json +102 -0
  44. data/test/data/source-data/GeoIP2-Country-Test.json +15916 -0
  45. data/test/data/source-data/GeoIP2-DensityIncome-Test.json +14 -0
  46. data/test/data/source-data/GeoIP2-Domain-Test.json +452 -0
  47. data/test/data/source-data/GeoIP2-Enterprise-Test.json +687 -0
  48. data/test/data/source-data/GeoIP2-ISP-Test.json +12593 -0
  49. data/test/data/source-data/GeoIP2-Precision-Enterprise-Test.json +2061 -0
  50. data/test/data/source-data/GeoIP2-Static-IP-Score-Test.json +2132 -0
  51. data/test/data/source-data/GeoIP2-User-Count-Test.json +2837 -0
  52. data/test/data/source-data/GeoLite2-ASN-Test.json +37 -0
  53. data/test/data/source-data/README +15 -0
  54. data/test/data/test-data/GeoIP2-Anonymous-IP-Test.mmdb +0 -0
  55. data/test/data/test-data/GeoIP2-City-Test-Broken-Double-Format.mmdb +0 -0
  56. data/test/data/test-data/GeoIP2-City-Test-Invalid-Node-Count.mmdb +0 -0
  57. data/test/data/test-data/GeoIP2-City-Test.mmdb +0 -0
  58. data/test/data/test-data/GeoIP2-Connection-Type-Test.mmdb +0 -0
  59. data/test/data/test-data/GeoIP2-Country-Test.mmdb +0 -0
  60. data/test/data/test-data/GeoIP2-DensityIncome-Test.mmdb +0 -0
  61. data/test/data/test-data/GeoIP2-Domain-Test.mmdb +0 -0
  62. data/test/data/test-data/GeoIP2-Enterprise-Test.mmdb +0 -0
  63. data/test/data/test-data/GeoIP2-ISP-Test.mmdb +0 -0
  64. data/test/data/test-data/GeoIP2-Precision-Enterprise-Test.mmdb +0 -0
  65. data/test/data/test-data/GeoIP2-Static-IP-Score-Test.mmdb +0 -0
  66. data/test/data/test-data/GeoIP2-User-Count-Test.mmdb +0 -0
  67. data/test/data/test-data/GeoLite2-ASN-Test.mmdb +0 -0
  68. data/test/data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb +0 -0
  69. data/test/data/test-data/MaxMind-DB-string-value-entries.mmdb +0 -0
  70. data/test/data/test-data/MaxMind-DB-test-broken-pointers-24.mmdb +0 -0
  71. data/test/data/test-data/MaxMind-DB-test-broken-search-tree-24.mmdb +0 -0
  72. data/test/data/test-data/MaxMind-DB-test-decoder.mmdb +0 -0
  73. data/test/data/test-data/MaxMind-DB-test-ipv4-24.mmdb +0 -0
  74. data/test/data/test-data/MaxMind-DB-test-ipv4-28.mmdb +0 -0
  75. data/test/data/test-data/MaxMind-DB-test-ipv4-32.mmdb +0 -0
  76. data/test/data/test-data/MaxMind-DB-test-ipv6-24.mmdb +0 -0
  77. data/test/data/test-data/MaxMind-DB-test-ipv6-28.mmdb +0 -0
  78. data/test/data/test-data/MaxMind-DB-test-ipv6-32.mmdb +0 -0
  79. data/test/data/test-data/MaxMind-DB-test-metadata-pointers.mmdb +0 -0
  80. data/test/data/test-data/MaxMind-DB-test-mixed-24.mmdb +0 -0
  81. data/test/data/test-data/MaxMind-DB-test-mixed-28.mmdb +0 -0
  82. data/test/data/test-data/MaxMind-DB-test-mixed-32.mmdb +0 -0
  83. data/test/data/test-data/MaxMind-DB-test-nested.mmdb +0 -0
  84. data/test/data/test-data/README.md +26 -0
  85. data/test/data/test-data/maps-with-pointers.raw +0 -0
  86. data/test/data/test-data/write-test-data.pl +641 -0
  87. data/test/data/tidyall.ini +5 -0
  88. data/test/test_model_country.rb +1 -1
  89. data/test/test_model_names.rb +1 -1
  90. data/test/test_reader.rb +79 -1
  91. metadata +85 -4
@@ -7,37 +7,46 @@ module MaxMind::GeoIP2::Record
7
7
  #
8
8
  # This record is returned by all location services and databases.
9
9
  #
10
- # See Place for inherited methods.
10
+ # See {MaxMind::GeoIP2::Record::Place} for inherited methods.
11
11
  class Country < Place
12
12
  # A value from 0-100 indicating MaxMind's confidence that the country is
13
13
  # correct. This attribute is only available from the Insights service and
14
- # the GeoIP2 Enterprise database. Integer but may be nil.
14
+ # the GeoIP2 Enterprise database.
15
+ #
16
+ # @return [Integer, nil]
15
17
  def confidence
16
18
  get('confidence')
17
19
  end
18
20
 
19
21
  # The GeoName ID for the country. This attribute is returned by all
20
- # location services and databases. Integer but may be nil.
22
+ # location services and databases.
23
+ #
24
+ # @return [Integer, nil]
21
25
  def geoname_id
22
26
  get('geoname_id')
23
27
  end
24
28
 
25
29
  # This is true if the country is a member state of the European Union. This
26
- # attribute is returned by all location services and databases. Boolean.
30
+ # attribute is returned by all location services and databases.
31
+ #
32
+ # @return [Boolean]
27
33
  def in_european_union?
28
34
  get('is_in_european_union')
29
35
  end
30
36
 
31
37
  # The two-character ISO 3166-1 alpha code for the country. See
32
38
  # https://en.wikipedia.org/wiki/ISO_3166-1. This attribute is returned by
33
- # all location services and databases. String but may be nil.
39
+ # all location services and databases.
40
+ #
41
+ # @return [String, nil]
34
42
  def iso_code
35
43
  get('iso_code')
36
44
  end
37
45
 
38
- # A Hash where the keys are locale codes (Strings) and the values are names
39
- # (Strings). This attribute is returned by all location services and
40
- # databases. Hash but may be nil.
46
+ # A Hash where the keys are locale codes and the values are names. This
47
+ # attribute is returned by all location services and databases.
48
+ #
49
+ # @return [Hash<String, String>, nil]
41
50
  def names
42
51
  get('names')
43
52
  end
@@ -11,29 +11,35 @@ module MaxMind::GeoIP2::Record
11
11
  # The approximate accuracy radius in kilometers around the latitude and
12
12
  # longitude for the IP address. This is the radius where we have a 67%
13
13
  # confidence that the device using the IP address resides within the circle
14
- # centered at the latitude and longitude with the provided radius. Integer
15
- # but may be nil.
14
+ # centered at the latitude and longitude with the provided radius.
15
+ #
16
+ # @return [Integer, nil]
16
17
  def accuracy_radius
17
18
  get('accuracy_radius')
18
19
  end
19
20
 
20
21
  # The average income in US dollars associated with the requested IP
21
22
  # address. This attribute is only available from the Insights service.
22
- # Integer but may be nil.
23
+ #
24
+ # @return [Integer, nil]
23
25
  def average_income
24
26
  get('average_income')
25
27
  end
26
28
 
27
29
  # The approximate latitude of the location associated with the IP address.
28
30
  # This value is not precise and should not be used to identify a particular
29
- # address or household. Float but may be nil.
31
+ # address or household.
32
+ #
33
+ # @return [Float, nil]
30
34
  def latitude
31
35
  get('latitude')
32
36
  end
33
37
 
34
38
  # The approximate longitude of the location associated with the IP address.
35
39
  # This value is not precise and should not be used to identify a particular
36
- # address or household. Float but may be nil.
40
+ # address or household.
41
+ #
42
+ # @return [Float, nil]
37
43
  def longitude
38
44
  get('longitude')
39
45
  end
@@ -41,21 +47,25 @@ module MaxMind::GeoIP2::Record
41
47
  # The metro code of the location if the location is in the US. MaxMind
42
48
  # returns the same metro codes as the Google AdWords API. See
43
49
  # https://developers.google.com/adwords/api/docs/appendix/cities-DMAregions.
44
- # Integer but may be nil.
50
+ #
51
+ # @return [Integer, nil]
45
52
  def metro_code
46
53
  get('metro_code')
47
54
  end
48
55
 
49
56
  # The estimated population per square kilometer associated with the IP
50
57
  # address. This attribute is only available from the Insights service.
51
- # Integer but may be nil.
58
+ #
59
+ # @return [Integer, nil]
52
60
  def population_density
53
61
  get('population_density')
54
62
  end
55
63
 
56
64
  # The time zone associated with location, as specified by the IANA Time
57
65
  # Zone Database, e.g., "America/New_York". See
58
- # https://www.iana.org/time-zones. String but may be nil.
66
+ # https://www.iana.org/time-zones.
67
+ #
68
+ # @return [String, nil]
59
69
  def time_zone
60
70
  get('time_zone')
61
71
  end
@@ -5,13 +5,15 @@ require 'maxmind/geoip2/record/abstract'
5
5
  module MaxMind::GeoIP2::Record
6
6
  # Location data common to different location types.
7
7
  class Place < Abstract
8
- def initialize(record, locales) # :nodoc:
8
+ # @!visibility private
9
+ def initialize(record, locales)
9
10
  super(record)
10
11
  @locales = locales
11
12
  end
12
13
 
13
- # A string containing the first available localized name in order of
14
- # preference.
14
+ # The first available localized name in order of preference.
15
+ #
16
+ # @return [String, nil]
15
17
  def name
16
18
  n = names
17
19
  return nil if n.nil?
@@ -11,14 +11,18 @@ module MaxMind::GeoIP2::Record
11
11
  # The postal code of the location. Postal codes are not available for all
12
12
  # countries. In some countries, this will only contain part of the postal
13
13
  # code. This attribute is returned by all location databases and services
14
- # besides Country. String but may be nil.
14
+ # besides Country.
15
+ #
16
+ # @return [String, nil]
15
17
  def code
16
18
  get('code')
17
19
  end
18
20
 
19
21
  # A value from 0-100 indicating MaxMind's confidence that the postal code
20
22
  # is correct. This attribute is only available from the Insights service
21
- # and the GeoIP2 Enterprise database. Integer but may be nil.
23
+ # and the GeoIP2 Enterprise database.
24
+ #
25
+ # @return [Integer, nil]
22
26
  def confidence
23
27
  get('confidence')
24
28
  end
@@ -9,11 +9,13 @@ module MaxMind::GeoIP2::Record
9
9
  # for the IP's represented country. The represented country is the country
10
10
  # represented by something like a military base.
11
11
  #
12
- # See Country for inherited methods.
12
+ # See {MaxMind::GeoIP2::Record::Country} for inherited methods.
13
13
  class RepresentedCountry < Country
14
14
  # A string indicating the type of entity that is representing the country.
15
15
  # Currently we only return +military+ but this could expand to include
16
- # other types in the future. String but may be nil.
16
+ # other types in the future.
17
+ #
18
+ # @return [String, nil]
17
19
  def type
18
20
  get('type')
19
21
  end
@@ -8,19 +8,21 @@ module MaxMind::GeoIP2::Record
8
8
  # This record is returned by all location databases and services besides
9
9
  # Country.
10
10
  #
11
- # See Place for inherited methods.
11
+ # See {MaxMind::GeoIP2::Record::Place} for inherited methods.
12
12
  class Subdivision < Place
13
13
  # This is a value from 0-100 indicating MaxMind's confidence that the
14
14
  # subdivision is correct. This attribute is only available from the
15
- # Insights service and the GeoIP2 Enterprise database. Integer but may be
16
- # nil.
15
+ # Insights service and the GeoIP2 Enterprise database.
16
+ #
17
+ # @return [Integer, nil]
17
18
  def confidence
18
19
  get('confidence')
19
20
  end
20
21
 
21
22
  # This is a GeoName ID for the subdivision. This attribute is returned by
22
- # all location databases and services besides Country. Integer but may be
23
- # nil.
23
+ # all location databases and services besides Country.
24
+ #
25
+ # @return [Integer, nil]
24
26
  def geoname_id
25
27
  get('geoname_id')
26
28
  end
@@ -28,15 +30,17 @@ module MaxMind::GeoIP2::Record
28
30
  # This is a string up to three characters long contain the subdivision
29
31
  # portion of the ISO 3166-2 code. See
30
32
  # https://en.wikipedia.org/wiki/ISO_3166-2. This attribute is returned by
31
- # all location databases and services except Country. String but may be
32
- # nil.
33
+ # all location databases and services except Country.
34
+ #
35
+ # @return [String, nil]
33
36
  def iso_code
34
37
  get('iso_code')
35
38
  end
36
39
 
37
- # A Hash where the keys are locale codes (Strings) and the values are names
38
- # (Strings). This attribute is returned by all location services and
39
- # databases besides country. Hash but may be nil.
40
+ # A Hash where the keys are locale codes and the values are names. This attribute is returned by all location services and
41
+ # databases besides country.
42
+ #
43
+ # @return [Hash<String, String>, nil]
40
44
  def names
41
45
  get('names')
42
46
  end
@@ -8,13 +8,12 @@ module MaxMind::GeoIP2::Record
8
8
  #
9
9
  # This record is returned by all location services and databases.
10
10
  class Traits < Abstract
11
- def initialize(record) # :nodoc:
11
+ # @!visibility private
12
+ def initialize(record)
12
13
  super(record)
13
14
  if !record.key?('network') && record.key?('ip_address') &&
14
15
  record.key?('prefix_length')
15
- ip = IPAddr.new(
16
- format('%s/%d', record['ip_address'], record['prefix_length']),
17
- )
16
+ ip = IPAddr.new(record['ip_address']).mask(record['prefix_length'])
18
17
  # We could use ip.prefix instead of record['prefix_length'], but that
19
18
  # method only becomes available in Ruby 2.5+.
20
19
  record['network'] = format('%s/%d', ip.to_s, record['prefix_length'])
@@ -24,7 +23,9 @@ module MaxMind::GeoIP2::Record
24
23
  # The autonomous system number associated with the IP address. See
25
24
  # Wikipedia[https://en.wikipedia.org/wiki/Autonomous_system_(Internet)].
26
25
  # This attribute is only available from the City and Insights web service
27
- # and the GeoIP2 Enterprise database. Integer but may be nil.
26
+ # and the GeoIP2 Enterprise database.
27
+ #
28
+ # @return [Integer, nil]
28
29
  def autonomous_system_number
29
30
  get('autonomous_system_number')
30
31
  end
@@ -33,7 +34,9 @@ module MaxMind::GeoIP2::Record
33
34
  # for the IP address. See
34
35
  # Wikipedia[https://en.wikipedia.org/wiki/Autonomous_system_(Internet)].
35
36
  # This attribute is only available from the City and Insights web service
36
- # and the GeoIP2 Enterprise database. String but may be nil.
37
+ # and the GeoIP2 Enterprise database.
38
+ #
39
+ # @return [String, nil]
37
40
  def autonomous_system_organization
38
41
  get('autonomous_system_organization')
39
42
  end
@@ -41,7 +44,9 @@ module MaxMind::GeoIP2::Record
41
44
  # The connection type may take the following values: "Dialup",
42
45
  # "Cable/DSL", "Corporate", "Cellular". Additional values may be added in
43
46
  # the future. This attribute is only available in the GeoIP2 Enterprise
44
- # database. String but may be nil.
47
+ # database.
48
+ #
49
+ # @return [String, nil]
45
50
  def connection_type
46
51
  get('connection_type')
47
52
  end
@@ -49,7 +54,9 @@ module MaxMind::GeoIP2::Record
49
54
  # The second level domain associated with the IP address. This will be
50
55
  # something like "example.com" or "example.co.uk", not "foo.example.com".
51
56
  # This attribute is only available from the City and Insights web service
52
- # and the GeoIP2 Enterprise database. String but may be nil.
57
+ # and the GeoIP2 Enterprise database.
58
+ #
59
+ # @return [String, nil]
53
60
  def domain
54
61
  get('domain')
55
62
  end
@@ -58,13 +65,17 @@ module MaxMind::GeoIP2::Record
58
65
  # lookup against the web service, this will be the externally routable IP
59
66
  # address for the system the code is running on. If the system is behind a
60
67
  # NAT, this may differ from the IP address locally assigned to it. This
61
- # attribute is returned by all end points. String.
68
+ # attribute is returned by all end points.
69
+ #
70
+ # @return [String]
62
71
  def ip_address
63
72
  get('ip_address')
64
73
  end
65
74
 
66
75
  # This is true if the IP address belongs to any sort of anonymous network.
67
- # This property is only available from GeoIP2 Precision Insights. Boolean.
76
+ # This property is only available from GeoIP2 Precision Insights.
77
+ #
78
+ # @return [Boolean]
68
79
  def anonymous?
69
80
  get('is_anonymous')
70
81
  end
@@ -73,60 +84,78 @@ module MaxMind::GeoIP2::Record
73
84
  # provider. If a VPN provider does not register subnets under names
74
85
  # associated with them, we will likely only flag their IP ranges using the
75
86
  # hosting_provider? property. This property is only available from GeoIP2
76
- # Precision Insights. Boolean.
87
+ # Precision Insights.
88
+ #
89
+ # @return [Boolean]
77
90
  def anonymous_vpn?
78
91
  get('is_anonymous_vpn')
79
92
  end
80
93
 
81
94
  # This is true if the IP address belongs to a hosting or VPN provider (see
82
95
  # description of the anonymous_vpn? property). This property is only
83
- # available from GeoIP2 Precision Insights. Boolean.
96
+ # available from GeoIP2 Precision Insights.
97
+ #
98
+ # @return [Boolean]
84
99
  def hosting_provider?
85
100
  get('is_hosting_provider')
86
101
  end
87
102
 
88
103
  # This attribute is true if MaxMind believes this IP address to be a
89
104
  # legitimate proxy, such as an internal VPN used by a corporation. This
90
- # attribute is only available in the GeoIP2 Enterprise database. Boolean.
105
+ # attribute is only available in the GeoIP2 Enterprise database.
106
+ #
107
+ # @return [Boolean]
91
108
  def legitimate_proxy?
92
109
  get('is_legitimate_proxy')
93
110
  end
94
111
 
95
112
  # This is true if the IP address belongs to a public proxy. This property
96
- # is only available from GeoIP2 Precision Insights. Boolean.
113
+ # is only available from GeoIP2 Precision Insights.
114
+ #
115
+ # @return [Boolean]
97
116
  def public_proxy?
98
117
  get('is_public_proxy')
99
118
  end
100
119
 
101
120
  # This is true if the IP address is a Tor exit node. This property is only
102
- # available from GeoIP2 Precision Insights. Boolean.
121
+ # available from GeoIP2 Precision Insights.
122
+ #
123
+ # @return [Boolean]
103
124
  def tor_exit_node?
104
125
  get('is_tor_exit_node')
105
126
  end
106
127
 
107
128
  # The name of the ISP associated with the IP address. This attribute is
108
129
  # only available from the City and Insights web services and the GeoIP2
109
- # Enterprise database. String but may be nil.
130
+ # Enterprise database.
131
+ #
132
+ # @return [String, nil]
110
133
  def isp
111
134
  get('isp')
112
135
  end
113
136
 
114
137
  # The network in CIDR notation associated with the record. In particular,
115
138
  # this is the largest network where all of the fields besides ip_address
116
- # have the same value. String.
139
+ # have the same value.
140
+ #
141
+ # @return [String]
117
142
  def network
118
143
  get('network')
119
144
  end
120
145
 
121
146
  # The name of the organization associated with the IP address. This
122
147
  # attribute is only available from the City and Insights web services and
123
- # the GeoIP2 Enterprise database. String but may be nil.
148
+ # the GeoIP2 Enterprise database.
149
+ #
150
+ # @return [String, nil]
124
151
  def organization
125
152
  get('organization')
126
153
  end
127
154
 
128
155
  # An indicator of how static or dynamic an IP address is. This property is
129
- # only available from GeoIP2 Precision Insights. Float but may be nil.
156
+ # only available from GeoIP2 Precision Insights.
157
+ #
158
+ # @return [Float, nil]
130
159
  def static_ip_score
131
160
  get('static_ip_score')
132
161
  end
@@ -134,7 +163,9 @@ module MaxMind::GeoIP2::Record
134
163
  # The estimated number of users sharing the IP/network during the past 24
135
164
  # hours. For IPv4, the count is for the individual IP. For IPv6, the count
136
165
  # is for the /64 network. This property is only available from GeoIP2
137
- # Precision Insights. Integer but may be nil.
166
+ # Precision Insights.
167
+ #
168
+ # @return [Integer, nil]
138
169
  def user_count
139
170
  get('user_count')
140
171
  end
@@ -161,7 +192,7 @@ module MaxMind::GeoIP2::Record
161
192
  # This attribute is only available from the Insights web service and the
162
193
  # GeoIP2 Enterprise database.
163
194
  #
164
- # May be nil.
195
+ # @return [String, nil]
165
196
  def user_type
166
197
  get('user_type')
167
198
  end
@@ -5,7 +5,7 @@ Gem::Specification.new do |s|
5
5
  s.files = Dir['**/*']
6
6
  s.name = 'maxmind-geoip2'
7
7
  s.summary = 'A gem for interacting with the GeoIP2 webservices and databases.'
8
- s.version = '0.0.1'
8
+ s.version = '0.1.0'
9
9
 
10
10
  s.description = 'A gem for interacting with the GeoIP2 webservices and databases. MaxMind provides geolocation data as downloadable databases as well as through a webservice.'
11
11
  s.email = 'support@maxmind.com'
@@ -19,4 +19,6 @@ Gem::Specification.new do |s|
19
19
  'source_code_uri' => 'https://github.com/maxmind/GeoIP2-ruby',
20
20
  }
21
21
  s.required_ruby_version = '>= 2.4.0'
22
+
23
+ s.add_runtime_dependency 'maxmind-db', ['~> 1.1']
22
24
  end
@@ -0,0 +1,4 @@
1
+ This work is licensed under the Creative Commons Attribution-ShareAlike 3.0
2
+ Unported License. To view a copy of this license, visit
3
+ http://creativecommons.org/licenses/by-sa/3.0/ or send a letter to Creative
4
+ Commons, 444 Castro Street, Suite 900, Mountain View, California, 94041, USA.
@@ -0,0 +1,570 @@
1
+ ---
2
+ layout: default
3
+ title: MaxMind DB File Format Specification
4
+ version: v2.0
5
+ ---
6
+ # MaxMind DB File Format Specification
7
+
8
+ ## Description
9
+
10
+ The MaxMind DB file format is a database format that maps IPv4 and IPv6
11
+ addresses to data records using an efficient binary search tree.
12
+
13
+ ## Version
14
+
15
+ This spec documents **version 2.0** of the MaxMind DB binary format.
16
+
17
+ The version number consists of separate major and minor version numbers. It
18
+ should not be considered a decimal number. In other words, version 2.10 comes
19
+ after version 2.9.
20
+
21
+ Code which is capable of reading a given major version of the format should
22
+ not be broken by minor version changes to the format.
23
+
24
+ ## Overview
25
+
26
+ The binary database is split into three parts:
27
+
28
+ 1. The binary search tree. Each level of the tree corresponds to a single bit
29
+ in the 128 bit representation of an IPv6 address.
30
+ 2. The data section. These are the values returned to the client for a
31
+ specific IP address, e.g. "US", "New York", or a more complex map type made up
32
+ of multiple fields.
33
+ 3. Database metadata. Information about the database itself.
34
+
35
+ ## Database Metadata
36
+
37
+ This portion of the database is stored at the end of the file. It is
38
+ documented first because understanding some of the metadata is key to
39
+ understanding how the other sections work.
40
+
41
+ This section can be found by looking for a binary sequence matching
42
+ "\xab\xcd\xefMaxMind.com". The *last* occurrence of this string in the file
43
+ marks the end of the data section and the beginning of the metadata. Since we
44
+ allow for arbitrary binary data in the data section, some other piece of data
45
+ could contain these values. This is why you need to find the last occurrence
46
+ of this sequence.
47
+
48
+ The maximum allowable size for the metadata section, including the marker that
49
+ starts the metadata, is 128KiB.
50
+
51
+ The metadata is stored as a map data structure. This structure is described
52
+ later in the spec. Changing a key's data type or removing a key would
53
+ constitute a major version change for this spec.
54
+
55
+ Except where otherwise specified, each key listed is required for the database
56
+ to be considered valid.
57
+
58
+ Adding a key constitutes a minor version change. Removing a key or changing
59
+ its type constitutes a major version change.
60
+
61
+ The list of known keys for the current version of the format is as follows:
62
+
63
+ ### node\_count
64
+
65
+ This is an unsigned 32-bit integer indicating the number of nodes in the
66
+ search tree.
67
+
68
+ ### record\_size
69
+
70
+ This is an unsigned 16-bit integer. It indicates the number of bits in a
71
+ record in the search tree. Note that each node consists of *two* records.
72
+
73
+ ### ip\_version
74
+
75
+ This is an unsigned 16-bit integer which is always 4 or 6. It indicates
76
+ whether the database contains IPv4 or IPv6 address data.
77
+
78
+ ### database\_type
79
+
80
+ This is a string that indicates the structure of each data record associated
81
+ with an IP address. The actual definition of these structures is left up to
82
+ the database creator.
83
+
84
+ Names starting with "GeoIP" are reserved for use by MaxMind (and "GeoIP" is a
85
+ trademark anyway).
86
+
87
+ ### languages
88
+
89
+ An array of strings, each of which is a locale code. A given record may
90
+ contain data items that have been localized to some or all of these
91
+ locales. Records should not contain localized data for locales not included in
92
+ this array.
93
+
94
+ This is an optional key, as this may not be relevant for all types of data.
95
+
96
+ ### binary\_format\_major\_version
97
+
98
+ This is an unsigned 16-bit integer indicating the major version number for the
99
+ database's binary format.
100
+
101
+ ### binary\_format\_minor\_version
102
+
103
+ This is an unsigned 16-bit integer indicating the minor version number for the
104
+ database's binary format.
105
+
106
+ ### build\_epoch
107
+
108
+ This is an unsigned 64-bit integer that contains the database build timestamp
109
+ as a Unix epoch value.
110
+
111
+ ### description
112
+
113
+ This key will always point to a map. The keys of that map will be language
114
+ codes, and the values will be a description in that language as a UTF-8
115
+ string.
116
+
117
+ The codes may include additional information such as script or country
118
+ identifiers, like "zh-TW" or "mn-Cyrl-MN". The additional identifiers will be
119
+ separated by a dash character ("-").
120
+
121
+ This key is optional. However, creators of databases are strongly
122
+ encouraged to include a description in at least one language.
123
+
124
+ ### Calculating the Search Tree Section Size
125
+
126
+ The formula for calculating the search tree section size *in bytes* is as
127
+ follows:
128
+
129
+ ( ( $record_size * 2 ) / 8 ) * $number_of_nodes
130
+
131
+ The end of the search tree marks the beginning of the data section.
132
+
133
+ ## Binary Search Tree Section
134
+
135
+ The database file starts with a binary search tree. The number of nodes in the
136
+ tree is dependent on how many unique netblocks are needed for the particular
137
+ database. For example, the city database needs many more small netblocks than
138
+ the country database.
139
+
140
+ The top most node is always located at the beginning of the search tree
141
+ section's address space. The top node is node 0.
142
+
143
+ Each node consists of two records, each of which is a pointer to an address in
144
+ the file.
145
+
146
+ The pointers can point to one of three things. First, it may point to another
147
+ node in the search tree address space. These pointers are followed as part of
148
+ the IP address search algorithm, described below.
149
+
150
+ The pointer can point to a value equal to `$number_of_nodes`. If this is the
151
+ case, it means that the IP address we are searching for is not in the
152
+ database.
153
+
154
+ Finally, it may point to an address in the data section. This is the data
155
+ relevant to the given netblock.
156
+
157
+ ### Node Layout
158
+
159
+ Each node in the search tree consists of two records, each of which is a
160
+ pointer. The record size varies by database, but inside a single database node
161
+ records are always the same size. A record may be anywhere from 24 to 128 bits
162
+ long, depending on the number of nodes in the tree. These pointers are
163
+ stored in big-endian format (most significant byte first).
164
+
165
+ Here are some examples of how the records are laid out in a node for 24, 28,
166
+ and 32 bit records. Larger record sizes follow this same pattern.
167
+
168
+ #### 24 bits (small database), one node is 6 bytes
169
+
170
+ | <------------- node --------------->|
171
+ | 23 .. 0 | 23 .. 0 |
172
+
173
+ #### 28 bits (medium database), one node is 7 bytes
174
+
175
+ | <------------- node --------------->|
176
+ | 23 .. 0 | 27..24 | 27..24 | 23 .. 0 |
177
+
178
+ Note 4 bits of each pointer are combined into the middle byte. For both
179
+ records, they are prepended and end up in the most significant position.
180
+
181
+ #### 32 bits (large database), one node is 8 bytes
182
+
183
+ | <------------- node --------------->|
184
+ | 31 .. 0 | 31 .. 0 |
185
+
186
+ ### Search Lookup Algorithm
187
+
188
+ The first step is to convert the IP address to its big-endian binary
189
+ representation. For an IPv4 address, this becomes 32 bits. For IPv6 you get
190
+ 128 bits.
191
+
192
+ The leftmost bit corresponds to the first node in the search tree. For each
193
+ bit, a value of 0 means we choose the left record in a node, and a value of 1
194
+ means we choose the right record.
195
+
196
+ The record value is always interpreted as an unsigned integer. The maximum
197
+ size of the integer is dependent on the number of bits in a record (24, 28, or
198
+ 32).
199
+
200
+ If the record value is a number that is less than the *number of nodes* (not
201
+ in bytes, but the actual node count) in the search tree (this is stored in the
202
+ database metadata), then the value is a node number. In this case, we find
203
+ that node in the search tree and repeat the lookup algorithm from there.
204
+
205
+ If the record value is equal to the number of nodes, that means that we do not
206
+ have any data for the IP address, and the search ends here.
207
+
208
+ If the record value is *greater* than the number of nodes in the search tree,
209
+ then it is an actual pointer value pointing into the data section. The value
210
+ of the pointer is relative to the start of the data section, *not* the
211
+ start of the file.
212
+
213
+ In order to determine where in the data section we should start looking, we use
214
+ the following formula:
215
+
216
+ $data_section_offset = ( $record_value - $node_count ) - 16
217
+
218
+ The 16 is the size of the data section separator. We subtract it because we
219
+ want to permit pointing to the first byte of the data section. Recall that
220
+ the record value cannot equal the node count as that means there is no
221
+ data. Instead, we choose to start values that go to the data section at
222
+ `$node_count + 16`. (This has the side effect that record values
223
+ `$node_count + 1` through `$node_count + 15` inclusive are not valid).
224
+
225
+ This is best demonstrated by an example:
226
+
227
+ Let's assume we have a 24-bit tree with 1,000 nodes. Each node contains 48
228
+ bits, or 6 bytes. The size of the tree is 6,000 bytes.
229
+
230
+ When a record in the tree contains a number that is less than 1,000, this
231
+ is a *node number*, and we look up that node. If a record contains a value
232
+ greater than or equal to 1,016, we know that it is a data section value. We
233
+ subtract the node count (1,000) and then subtract 16 for the data section
234
+ separator, giving us the number 0, the first byte of the data section.
235
+
236
+ If a record contained the value 6,000, this formula would give us an offset of
237
+ 4,984 into the data section.
238
+
239
+ In order to determine where in the file this offset really points to, we also
240
+ need to know where the data section starts. This can be calculated by
241
+ determining the size of the search tree in bytes and then adding an additional
242
+ 16 bytes for the data section separator:
243
+
244
+ $offset_in_file = $data_section_offset
245
+ + $search_tree_size_in_bytes
246
+ + 16
247
+
248
+ Since we subtract and then add 16, the final formula to determine the
249
+ offset in the file can be simplified to:
250
+
251
+ $offset_in_file = ( $record_value - $node_count )
252
+ + $search_tree_size_in_bytes
253
+
254
+ ### IPv4 addresses in an IPv6 tree
255
+
256
+ When storing IPv4 addresses in an IPv6 tree, they are stored as-is, so they
257
+ occupy the first 32-bits of the address space (from 0 to 2**32 - 1).
258
+
259
+ Creators of databases should decide on a strategy for handling the various
260
+ mappings between IPv4 and IPv6.
261
+
262
+ The strategy that MaxMind uses for its GeoIP databases is to include a pointer
263
+ from the `::ffff:0:0/96` subnet to the root node of the IPv4 address space in
264
+ the tree. This accounts for the
265
+ [IPv4-mapped IPv6 address](http://en.wikipedia.org/wiki/IPv6#IPv4-mapped_IPv6_addresses).
266
+
267
+ MaxMind also includes a pointer from the `2002::/16` subnet to the root node
268
+ of the IPv4 address space in the tree. This accounts for the
269
+ [6to4 mapping](http://en.wikipedia.org/wiki/6to4) subnet.
270
+
271
+ Database creators are encouraged to document whether they are doing something
272
+ similar for their databases.
273
+
274
+ The Teredo subnet cannot be accounted for in the tree. Instead, code that
275
+ searches the tree can offer to decode the IPv4 portion of a Teredo address and
276
+ look that up.
277
+
278
+ ## Data Section Separator
279
+
280
+ There are 16 bytes of NULLs in between the search tree and the data
281
+ section. This separator exists in order to make it possible for a verification
282
+ tool to distinguish between the two sections.
283
+
284
+ This separator is not considered part of the data section itself. In other
285
+ words, the data section starts at `$size_of_search_tree + 16` bytes in the
286
+ file.
287
+
288
+ ## Output Data Section
289
+
290
+ Each output data field has an associated type, and that type is encoded as a
291
+ number that begins the data field. Some types are variable length. In those
292
+ cases, the type indicator is also followed by a length. The data payload
293
+ always comes at the end of the field.
294
+
295
+ All binary data is stored in big-endian format.
296
+
297
+ Note that the *interpretation* of a given data type's meaning is decided by
298
+ higher-level APIs, not by the binary format itself.
299
+
300
+ ### pointer - 1
301
+
302
+ A pointer to another part of the data section's address space. The pointer
303
+ will point to the beginning of a field. It is illegal for a pointer to point
304
+ to another pointer.
305
+
306
+ Pointer values start from the beginning of the data section, *not* the
307
+ beginning of the file.
308
+
309
+ ### UTF-8 string - 2
310
+
311
+ A variable length byte sequence that contains valid utf8. If the length is
312
+ zero then this is an empty string.
313
+
314
+ ### double - 3
315
+
316
+ This is stored as an IEEE-754 double (binary64) in big-endian format. The
317
+ length of a double is always 8 bytes.
318
+
319
+ ### bytes - 4
320
+
321
+ A variable length byte sequence containing any sort of binary data. If the
322
+ length is zero then this a zero-length byte sequence.
323
+
324
+ This is not currently used but may be used in the future to embed non-text
325
+ data (images, etc.).
326
+
327
+ ### integer formats
328
+
329
+ Integers are stored in variable length binary fields.
330
+
331
+ We support 16-bit, 32-bit, 64-bit, and 128-bit unsigned integers. We also
332
+ support 32-bit signed integers.
333
+
334
+ A 128-bit integer can use up to 16 bytes, but may use fewer. Similarly, a
335
+ 32-bit integer may use from 0-4 bytes. The number of bytes used is determined
336
+ by the length specifier in the control byte. See below for details.
337
+
338
+ A length of zero always indicates the number 0.
339
+
340
+ When storing a signed integer, the left-most bit is the sign. A 1 is negative
341
+ and a 0 is positive.
342
+
343
+ The type numbers for our integer types are:
344
+
345
+ * unsigned 16-bit int - 5
346
+ * unsigned 32-bit int - 6
347
+ * signed 32-bit int - 8
348
+ * unsigned 64-bit int - 9
349
+ * unsigned 128-bit int - 10
350
+
351
+ The unsigned 32-bit and 128-bit types may be used to store IPv4 and IPv6
352
+ addresses, respectively.
353
+
354
+ The signed 32-bit integers are stored using the 2's complement representation.
355
+
356
+ ### map - 7
357
+
358
+ A map data type contains a set of key/value pairs. Unlike other data types,
359
+ the length information for maps indicates how many key/value pairs it
360
+ contains, not its length in bytes. This size can be zero.
361
+
362
+ See below for the algorithm used to determine the number of pairs in the
363
+ hash. This algorithm is also used to determine the length of a field's
364
+ payload.
365
+
366
+ ### array - 11
367
+
368
+ An array type contains a set of ordered values. The length information for
369
+ arrays indicates how many values it contains, not its length in bytes. This
370
+ size can be zero.
371
+
372
+ This type uses the same algorithm as maps for determining the length of a
373
+ field's payload.
374
+
375
+ ### data cache container - 12
376
+
377
+ This is a special data type that marks a container used to cache repeated
378
+ data. For example, instead of repeating the string "United States" over and
379
+ over in the database, we store it in the cache container and use pointers
380
+ *into* this container instead.
381
+
382
+ Nothing in the database will ever contain a pointer to this field
383
+ itself. Instead, various fields will point into the container.
384
+
385
+ The primary reason for making this a separate data type versus simply inlining
386
+ the cached data is so that a database dumper tool can skip this cache when
387
+ dumping the data section. The cache contents will end up being dumped as
388
+ pointers into it are followed.
389
+
390
+ ### end marker - 13
391
+
392
+ The end marker marks the end of the data section. It is not strictly
393
+ necessary, but including this marker allows a data section deserializer to
394
+ process a stream of input, rather than having to find the end of the section
395
+ before beginning the deserialization.
396
+
397
+ This data type is not followed by a payload, and its size is always zero.
398
+
399
+ ### boolean - 14
400
+
401
+ A true or false value. The length information for a boolean type will always
402
+ be 0 or 1, indicating the value. There is no payload for this field.
403
+
404
+ ### float - 15
405
+
406
+ This is stored as an IEEE-754 float (binary32) in big-endian format. The
407
+ length of a float is always 4 bytes.
408
+
409
+ This type is provided primarily for completeness. Because of the way floating
410
+ point numbers are stored, this type can easily lose precision when serialized
411
+ and then deserialized. If this is an issue for you, consider using a double
412
+ instead.
413
+
414
+ ### Data Field Format
415
+
416
+ Each field starts with a control byte. This control byte provides information
417
+ about the field's data type and payload size.
418
+
419
+ The first three bits of the control byte tell you what type the field is. If
420
+ these bits are all 0, then this is an "extended" type, which means that the
421
+ *next* byte contains the actual type. Otherwise, the first three bits will
422
+ contain a number from 1 to 7, the actual type for the field.
423
+
424
+ We've tried to assign the most commonly used types as numbers 1-7 as an
425
+ optimization.
426
+
427
+ With an extended type, the type number in the second byte is the number
428
+ minus 7. In other words, an array (type 11) will be stored with a 0 for the
429
+ type in the first byte and a 4 in the second.
430
+
431
+ Here is an example of how the control byte may combine with the next byte to
432
+ tell us the type:
433
+
434
+ 001XXXXX pointer
435
+ 010XXXXX UTF-8 string
436
+ 110XXXXX unsigned 32-bit int (ASCII)
437
+ 000XXXXX 00000011 unsigned 128-bit int (binary)
438
+ 000XXXXX 00000100 array
439
+ 000XXXXX 00000110 end marker
440
+
441
+ #### Payload Size
442
+
443
+ The next five bits in the control byte tell you how long the data field's
444
+ payload is, except for maps and pointers. Maps and pointers use this size
445
+ information a bit differently. See below.
446
+
447
+ If the five bits are smaller than 29, then those bits are the payload size in
448
+ bytes. For example:
449
+
450
+ 01000010 UTF-8 string - 2 bytes long
451
+ 01011100 UTF-8 string - 28 bytes long
452
+ 11000001 unsigned 32-bit int - 1 byte long
453
+ 00000011 00000011 unsigned 128-bit int - 3 bytes long
454
+
455
+ If the five bits are equal to 29, 30, or 31, then use the following algorithm
456
+ to calculate the payload size.
457
+
458
+ If the value is 29, then the size is 29 + *the next byte after the type
459
+ specifying bytes as an unsigned integer*.
460
+
461
+ If the value is 30, then the size is 285 + *the next two bytes after the type
462
+ specifying bytes as a single unsigned integer*.
463
+
464
+ If the value is 31, then the size is 65,821 + *the next three bytes after the
465
+ type specifying bytes as a single unsigned integer*.
466
+
467
+ Some examples:
468
+
469
+ 01011101 00110011 UTF-8 string - 80 bytes long
470
+
471
+ In this case, the last five bits of the control byte equal 29. We treat the
472
+ next byte as an unsigned integer. The next byte is 51, so the total size is
473
+ (29 + 51) = 80.
474
+
475
+ 01011110 00110011 00110011 UTF-8 string - 13,392 bytes long
476
+
477
+ The last five bits of the control byte equal 30. We treat the next two bytes
478
+ as a single unsigned integer. The next two bytes equal 13,107, so the total
479
+ size is (285 + 13,107) = 13,392.
480
+
481
+ 01011111 00110011 00110011 00110011 UTF-8 string - 3,421,264 bytes long
482
+
483
+ The last five bits of the control byte equal 31. We treat the next three bytes
484
+ as a single unsigned integer. The next three bytes equal 3,355,443, so the
485
+ total size is (65,821 + 3,355,443) = 3,421,264.
486
+
487
+ This means that the maximum payload size for a single field is 16,843,036
488
+ bytes.
489
+
490
+ The binary number types always have a known size, but for consistency's sake,
491
+ the control byte will always specify the correct size for these types.
492
+
493
+ #### Maps
494
+
495
+ Maps use the size in the control byte (and any following bytes) to indicate
496
+ the number of key/value pairs in the map, not the size of the payload in
497
+ bytes.
498
+
499
+ This means that the maximum number of pairs for a single map is 16,843,036.
500
+
501
+ Maps are laid out with each key followed by its value, followed by the next
502
+ pair, etc.
503
+
504
+ The keys are **always** UTF-8 strings. The values may be any data type,
505
+ including maps or pointers.
506
+
507
+ Once we know the number of pairs, we can look at each pair in turn to
508
+ determine the size of the key and the key name, as well as the value's type
509
+ and payload.
510
+
511
+ #### Pointers
512
+
513
+ Pointers use the last five bits in the control byte to calculate the pointer
514
+ value.
515
+
516
+ To calculate the pointer value, we start by subdividing the five bits into two
517
+ groups. The first two bits indicate the size, and the next three bits are part
518
+ of the value, so we end up with a control byte breaking down like this:
519
+ 001SSVVV.
520
+
521
+ The size can be 0, 1, 2, or 3.
522
+
523
+ If the size is 0, the pointer is built by appending the next byte to the last
524
+ three bits to produce an 11-bit value.
525
+
526
+ If the size is 1, the pointer is built by appending the next two bytes to the
527
+ last three bits to produce a 19-bit value + 2048.
528
+
529
+ If the size is 2, the pointer is built by appending the next three bytes to the
530
+ last three bits to produce a 27-bit value + 526336.
531
+
532
+ Finally, if the size is 3, the pointer's value is contained in the next four
533
+ bytes as a 32-bit value. In this case, the last three bits of the control byte
534
+ are ignored.
535
+
536
+ This means that we are limited to 4GB of address space for pointers, so the
537
+ data section size for the database is limited to 4GB.
538
+
539
+ ## Reference Implementations
540
+
541
+ ### Writer
542
+
543
+ * [Perl](https://github.com/maxmind/MaxMind-DB-Writer-perl)
544
+
545
+ ### Reader
546
+
547
+ * [C](https://github.com/maxmind/libmaxminddb)
548
+ * [C#](https://github.com/maxmind/MaxMind-DB-Reader-dotnet)
549
+ * [Java](https://github.com/maxmind/MaxMind-DB-Reader-java)
550
+ * [Perl](https://github.com/maxmind/MaxMind-DB-Reader-perl)
551
+ * [PHP](https://github.com/maxmind/MaxMind-DB-Reader-php)
552
+ * [Python](https://github.com/maxmind/MaxMind-DB-Reader-python)
553
+ * [Ruby](https://github.com/maxmind/MaxMind-DB-Reader-ruby)
554
+
555
+ ## Authors
556
+
557
+ This specification was created by the following authors:
558
+
559
+ * Greg Oschwald \<goschwald@maxmind.com\>
560
+ * Dave Rolsky \<drolsky@maxmind.com\>
561
+ * Boris Zentner \<bzentner@maxmind.com\>
562
+
563
+ ## License
564
+
565
+ This work is licensed under the Creative Commons Attribution-ShareAlike 3.0
566
+ Unported License. To view a copy of this license, visit
567
+ [http://creativecommons.org/licenses/by-sa/3.0/](http://creativecommons.org/licenses/by-sa/3.0/)
568
+ or send a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain
569
+ View, California, 94041, USA
570
+