platphorm-maxmind-db 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/maxmind/db.rb ADDED
@@ -0,0 +1,306 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ipaddr'
4
+ require 'maxmind/db/decoder'
5
+ require 'maxmind/db/errors'
6
+ require 'maxmind/db/file_reader.rb'
7
+ require 'maxmind/db/memory_reader.rb'
8
+ require 'maxmind/db/metadata.rb'
9
+
10
+ module MaxMind
11
+ # DB provides a way to read {MaxMind DB
12
+ # files}[https://maxmind.github.io/MaxMind-DB/].
13
+ #
14
+ # {MaxMind DB}[https://maxmind.github.io/MaxMind-DB/] is a binary file format
15
+ # that stores data indexed by IP address subnets (IPv4 or IPv6).
16
+ #
17
+ # This class is a pure Ruby implementation of a reader for the format.
18
+ #
19
+ # == Example
20
+ #
21
+ # require 'maxmind/db'
22
+ #
23
+ # reader = MaxMind::DB.new('GeoIP2-City.mmdb', mode: MaxMind::DB::MODE_MEMORY)
24
+ #
25
+ # record = reader.get('1.1.1.1')
26
+ # if record.nil?
27
+ # puts '1.1.1.1 was not found in the database'
28
+ # else
29
+ # puts record['country']['iso_code']
30
+ # puts record['country']['names']['en']
31
+ # end
32
+ #
33
+ # reader.close
34
+ class DB
35
+ # Choose the default method to open the database. Currently the default is
36
+ # MODE_FILE.
37
+ MODE_AUTO = :MODE_AUTO
38
+ # Open the database as a regular file and read on demand.
39
+ MODE_FILE = :MODE_FILE
40
+ # Read the database into memory. This is faster than MODE_FILE but causes
41
+ # increased memory use.
42
+ MODE_MEMORY = :MODE_MEMORY
43
+ # Treat the database parameter as containing a database already read into
44
+ # memory. It must be a binary string. This primarily exists for testing.
45
+ #
46
+ # @!visibility private
47
+ MODE_PARAM_IS_BUFFER = :MODE_PARAM_IS_BUFFER
48
+
49
+ DATA_SECTION_SEPARATOR_SIZE = 16
50
+ private_constant :DATA_SECTION_SEPARATOR_SIZE
51
+ METADATA_START_MARKER = "\xAB\xCD\xEFMaxMind.com".b.freeze
52
+ private_constant :METADATA_START_MARKER
53
+ METADATA_START_MARKER_LENGTH = 14
54
+ private_constant :METADATA_START_MARKER_LENGTH
55
+ METADATA_MAX_SIZE = 131_072
56
+ private_constant :METADATA_MAX_SIZE
57
+
58
+ # Return the metadata associated with the {MaxMind
59
+ # DB}[https://maxmind.github.io/MaxMind-DB/]
60
+ #
61
+ # @return [MaxMind::DB::Metadata]
62
+ attr_reader :metadata
63
+
64
+ # Create a DB. A DB provides a way to read {MaxMind DB
65
+ # files}[https://maxmind.github.io/MaxMind-DB/]. If you're performing
66
+ # multiple lookups, it's most efficient to create one DB and reuse it.
67
+ #
68
+ # Once created, the DB is safe to use for lookups from multiple threads. It
69
+ # is safe to use after forking only if you use MODE_MEMORY or if your
70
+ # version of Ruby supports IO#pread.
71
+ #
72
+ # @param database [String] a path to a {MaxMind
73
+ # DB}[https://maxmind.github.io/MaxMind-DB/].
74
+ #
75
+ # @param options [Hash<Symbol, Symbol>] options controlling the behavior of
76
+ # the DB.
77
+ #
78
+ # @option options [Symbol] :mode Defines how to open the database. It may
79
+ # be one of MODE_AUTO, MODE_FILE, or MODE_MEMORY. If you don't provide
80
+ # one, DB uses MODE_AUTO. Refer to the definition of those constants for
81
+ # an explanation of their meaning.
82
+ #
83
+ # @raise [InvalidDatabaseError] if the database is corrupt or invalid.
84
+ #
85
+ # @raise [ArgumentError] if the mode is invalid.
86
+ def initialize(database, options = {})
87
+ options[:mode] = MODE_AUTO unless options.key?(:mode)
88
+
89
+ case options[:mode]
90
+ when MODE_AUTO, MODE_FILE
91
+ @io = FileReader.new(database)
92
+ when MODE_MEMORY
93
+ @io = MemoryReader.new(database)
94
+ when MODE_PARAM_IS_BUFFER
95
+ @io = MemoryReader.new(database, is_buffer: true)
96
+ else
97
+ raise ArgumentError, 'Invalid mode'
98
+ end
99
+
100
+ begin
101
+ @size = @io.size
102
+
103
+ metadata_start = find_metadata_start
104
+ metadata_decoder = Decoder.new(@io, metadata_start)
105
+ metadata_map, = metadata_decoder.decode(metadata_start)
106
+ @metadata = Metadata.new(metadata_map)
107
+ @decoder = Decoder.new(@io, @metadata.search_tree_size +
108
+ DATA_SECTION_SEPARATOR_SIZE)
109
+
110
+ # Store copies as instance variables to reduce method calls.
111
+ @ip_version = @metadata.ip_version
112
+ @node_count = @metadata.node_count
113
+ @node_byte_size = @metadata.node_byte_size
114
+ @record_size = @metadata.record_size
115
+ @search_tree_size = @metadata.search_tree_size
116
+
117
+ @ipv4_start = nil
118
+ # Find @ipv4_start up front. If we don't, we either have a race to
119
+ # get/set it or have to synchronize access.
120
+ start_node(0)
121
+ rescue StandardError => e
122
+ @io.close
123
+ raise e
124
+ end
125
+ end
126
+
127
+ # Return the record for the IP address in the {MaxMind
128
+ # DB}[https://maxmind.github.io/MaxMind-DB/]. The record can be one of
129
+ # several types and depends on the contents of the database.
130
+ #
131
+ # If no record is found for the IP address, +get+ returns +nil+.
132
+ #
133
+ # @param ip_address [String] a string in the standard notation. It may be
134
+ # IPv4 or IPv6.
135
+ #
136
+ # @raise [ArgumentError] if you attempt to look up an IPv6 address in an
137
+ # IPv4-only database.
138
+ #
139
+ # @raise [InvalidDatabaseError] if the database is corrupt or invalid.
140
+ #
141
+ # @return [Object, nil]
142
+ def get(ip_address)
143
+ record, = get_with_prefix_length(ip_address)
144
+
145
+ record
146
+ end
147
+
148
+ # Return an array containing the record for the IP address in the
149
+ # {MaxMind DB}[https://maxmind.github.io/MaxMind-DB/] and its associated
150
+ # network prefix length. The record can be one of several types and
151
+ # depends on the contents of the database.
152
+ #
153
+ # If no record is found for the IP address, the record will be +nil+ and
154
+ # the prefix length will be the value for the missing network.
155
+ #
156
+ # @param ip_address [String] a string in the standard notation. It may be
157
+ # IPv4 or IPv6.
158
+ #
159
+ # @raise [ArgumentError] if you attempt to look up an IPv6 address in an
160
+ # IPv4-only database.
161
+ #
162
+ # @raise [InvalidDatabaseError] if the database is corrupt or invalid.
163
+ #
164
+ # @return [Array<(Object, Integer)>]
165
+ def get_with_prefix_length(ip_address)
166
+ ip = IPAddr.new(ip_address)
167
+ # We could check the IP has the correct prefix (32 or 128) but I do not
168
+ # for performance reasons.
169
+
170
+ ip_version = ip.ipv6? ? 6 : 4
171
+ if ip_version == 6 && @ip_version == 4
172
+ raise ArgumentError,
173
+ "Error looking up #{ip}. You attempted to look up an IPv6 address in an IPv4-only database."
174
+ end
175
+
176
+ pointer, depth = find_address_in_tree(ip, ip_version)
177
+ return nil, depth if pointer == 0
178
+
179
+ [resolve_data_pointer(pointer), depth]
180
+ end
181
+
182
+ private
183
+
184
+ IP_VERSION_TO_BIT_COUNT = {
185
+ 4 => 32,
186
+ 6 => 128,
187
+ }.freeze
188
+ private_constant :IP_VERSION_TO_BIT_COUNT
189
+
190
+ def find_address_in_tree(ip_address, ip_version)
191
+ packed = ip_address.hton
192
+
193
+ bit_count = IP_VERSION_TO_BIT_COUNT[ip_version]
194
+ node = start_node(bit_count)
195
+
196
+ node_count = @node_count
197
+
198
+ depth = 0
199
+ loop do
200
+ break if depth >= bit_count || node >= node_count
201
+
202
+ c = packed[depth >> 3].ord
203
+ bit = 1 & (c >> 7 - (depth % 8))
204
+ node = read_node(node, bit)
205
+ depth += 1
206
+ end
207
+
208
+ return 0, depth if node == node_count
209
+
210
+ return node, depth if node > node_count
211
+
212
+ raise InvalidDatabaseError, 'Invalid node in search tree'
213
+ end
214
+
215
+ def start_node(length)
216
+ return 0 if @ip_version != 6 || length == 128
217
+
218
+ return @ipv4_start if @ipv4_start
219
+
220
+ node = 0
221
+ 96.times do
222
+ break if node >= @metadata.node_count
223
+
224
+ node = read_node(node, 0)
225
+ end
226
+
227
+ @ipv4_start = node
228
+ end
229
+
230
+ # Read a record from the indicated node. Index indicates whether it's the
231
+ # left (0) or right (1) record.
232
+ #
233
+ # rubocop:disable Metrics/CyclomaticComplexity
234
+ def read_node(node_number, index)
235
+ base_offset = node_number * @node_byte_size
236
+
237
+ if @record_size == 24
238
+ offset = index == 0 ? base_offset : base_offset + 3
239
+ buf = @io.read(offset, 3)
240
+ node_bytes = "\x00".b << buf
241
+ return node_bytes.unpack('N').first
242
+ end
243
+
244
+ if @record_size == 28
245
+ if index == 0
246
+ buf = @io.read(base_offset, 4)
247
+ n = buf.unpack('N').first
248
+ last24 = n >> 8
249
+ first4 = (n & 0xf0) << 20
250
+ return first4 | last24
251
+ end
252
+ buf = @io.read(base_offset + 3, 4)
253
+ return buf.unpack('N').first & 0x0fffffff
254
+ end
255
+
256
+ if @record_size == 32
257
+ offset = index == 0 ? base_offset : base_offset + 4
258
+ node_bytes = @io.read(offset, 4)
259
+ return node_bytes.unpack('N').first
260
+ end
261
+
262
+ raise InvalidDatabaseError, "Unsupported record size: #{@record_size}"
263
+ end
264
+ # rubocop:enable Metrics/CyclomaticComplexity
265
+
266
+ def resolve_data_pointer(pointer)
267
+ offset_in_file = pointer - @node_count + @search_tree_size
268
+
269
+ if offset_in_file >= @size
270
+ raise InvalidDatabaseError,
271
+ 'The MaxMind DB file\'s search tree is corrupt'
272
+ end
273
+
274
+ data, = @decoder.decode(offset_in_file)
275
+ data
276
+ end
277
+
278
+ def find_metadata_start
279
+ metadata_max_size = @size < METADATA_MAX_SIZE ? @size : METADATA_MAX_SIZE
280
+
281
+ stop_index = @size - metadata_max_size
282
+ index = @size - METADATA_START_MARKER_LENGTH
283
+ while index >= stop_index
284
+ return index + METADATA_START_MARKER_LENGTH if at_metadata?(index)
285
+
286
+ index -= 1
287
+ end
288
+
289
+ raise InvalidDatabaseError,
290
+ 'Metadata section not found. Is this a valid MaxMind DB file?'
291
+ end
292
+
293
+ def at_metadata?(index)
294
+ @io.read(index, METADATA_START_MARKER_LENGTH) == METADATA_START_MARKER
295
+ end
296
+
297
+ public
298
+
299
+ # Close the DB and return resources to the system.
300
+ #
301
+ # @return [void]
302
+ def close
303
+ @io.close
304
+ end
305
+ end
306
+ end
@@ -0,0 +1,235 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'maxmind/db/errors'
4
+
5
+ module MaxMind
6
+ class DB
7
+ # +Decoder+ decodes a {MaxMind DB}[https://maxmind.github.io/MaxMind-DB/]
8
+ # data section.
9
+ #
10
+ # Typically you will interact with this class through a Reader rather than
11
+ # directly.
12
+ #
13
+ # @!visibility private
14
+ class Decoder
15
+ # Create a +Decoder+.
16
+ #
17
+ # +io+ is the DB. It must provide a +read+ method. It must be opened in
18
+ # binary mode.
19
+ #
20
+ # +pointer_base+ is the base number to use when decoding a pointer. It is
21
+ # where the data section begins rather than the beginning of the file.
22
+ # The specification states the formula in the `Data Section Separator'
23
+ # section.
24
+ #
25
+ # +pointer_test+ is used for testing pointer code.
26
+ def initialize(io, pointer_base = 0, pointer_test = false)
27
+ @io = io
28
+ @pointer_base = pointer_base
29
+ @pointer_test = pointer_test
30
+ end
31
+
32
+ private
33
+
34
+ def decode_array(size, offset)
35
+ array = []
36
+ size.times do
37
+ value, offset = decode(offset)
38
+ array << value
39
+ end
40
+ [array, offset]
41
+ end
42
+
43
+ def decode_boolean(size, offset)
44
+ [size != 0, offset]
45
+ end
46
+
47
+ def decode_bytes(size, offset)
48
+ [@io.read(offset, size), offset + size]
49
+ end
50
+
51
+ def decode_double(size, offset)
52
+ verify_size(8, size)
53
+ buf = @io.read(offset, 8)
54
+ [buf.unpack('G').first, offset + 8]
55
+ end
56
+
57
+ def decode_float(size, offset)
58
+ verify_size(4, size)
59
+ buf = @io.read(offset, 4)
60
+ [buf.unpack('g').first, offset + 4]
61
+ end
62
+
63
+ def verify_size(expected, actual)
64
+ return if expected == actual
65
+
66
+ raise InvalidDatabaseError,
67
+ 'The MaxMind DB file\'s data section contains bad data (unknown data type or corrupt data)'
68
+ end
69
+
70
+ def decode_int32(size, offset)
71
+ decode_int('l>', 4, size, offset)
72
+ end
73
+
74
+ def decode_uint16(size, offset)
75
+ decode_int('n', 2, size, offset)
76
+ end
77
+
78
+ def decode_uint32(size, offset)
79
+ decode_int('N', 4, size, offset)
80
+ end
81
+
82
+ def decode_uint64(size, offset)
83
+ decode_int('Q>', 8, size, offset)
84
+ end
85
+
86
+ def decode_int(type_code, type_size, size, offset)
87
+ return 0, offset if size == 0
88
+
89
+ buf = @io.read(offset, size)
90
+ buf = buf.rjust(type_size, "\x00") if size != type_size
91
+ [buf.unpack(type_code).first, offset + size]
92
+ end
93
+
94
+ def decode_uint128(size, offset)
95
+ return 0, offset if size == 0
96
+
97
+ buf = @io.read(offset, size)
98
+
99
+ if size <= 8
100
+ buf = buf.rjust(8, "\x00")
101
+ return buf.unpack('Q>').first, offset + size
102
+ end
103
+
104
+ a_bytes = buf[0...-8].rjust(8, "\x00")
105
+ b_bytes = buf[-8...buf.length]
106
+ a = a_bytes.unpack('Q>').first
107
+ b = b_bytes.unpack('Q>').first
108
+ a <<= 64
109
+ [a | b, offset + size]
110
+ end
111
+
112
+ def decode_map(size, offset)
113
+ container = {}
114
+ size.times do
115
+ key, offset = decode(offset)
116
+ value, offset = decode(offset)
117
+ container[key] = value
118
+ end
119
+ [container, offset]
120
+ end
121
+
122
+ def decode_pointer(size, offset)
123
+ pointer_size = size >> 3
124
+
125
+ case pointer_size
126
+ when 0
127
+ new_offset = offset + 1
128
+ buf = (size & 0x7).chr << @io.read(offset, 1)
129
+ pointer = buf.unpack('n').first + @pointer_base
130
+ when 1
131
+ new_offset = offset + 2
132
+ buf = "\x00".b << (size & 0x7).chr << @io.read(offset, 2)
133
+ pointer = buf.unpack('N').first + 2048 + @pointer_base
134
+ when 2
135
+ new_offset = offset + 3
136
+ buf = (size & 0x7).chr << @io.read(offset, 3)
137
+ pointer = buf.unpack('N').first + 526_336 + @pointer_base
138
+ else
139
+ new_offset = offset + 4
140
+ buf = @io.read(offset, 4)
141
+ pointer = buf.unpack('N').first + @pointer_base
142
+ end
143
+
144
+ return pointer, new_offset if @pointer_test
145
+
146
+ value, = decode(pointer)
147
+ [value, new_offset]
148
+ end
149
+
150
+ def decode_utf8_string(size, offset)
151
+ new_offset = offset + size
152
+ buf = @io.read(offset, size)
153
+ buf.force_encoding(Encoding::UTF_8)
154
+ # We could check it's valid UTF-8 with `valid_encoding?', but for
155
+ # performance I do not.
156
+ [buf, new_offset]
157
+ end
158
+
159
+ TYPE_DECODER = {
160
+ 1 => :decode_pointer,
161
+ 2 => :decode_utf8_string,
162
+ 3 => :decode_double,
163
+ 4 => :decode_bytes,
164
+ 5 => :decode_uint16,
165
+ 6 => :decode_uint32,
166
+ 7 => :decode_map,
167
+ 8 => :decode_int32,
168
+ 9 => :decode_uint64,
169
+ 10 => :decode_uint128,
170
+ 11 => :decode_array,
171
+ 14 => :decode_boolean,
172
+ 15 => :decode_float,
173
+ }.freeze
174
+ private_constant :TYPE_DECODER
175
+
176
+ public
177
+
178
+ # Decode a section of the data section starting at +offset+.
179
+ #
180
+ # +offset+ is the location of the data structure to decode.
181
+ #
182
+ # Returns an array where the first element is the decoded value and the
183
+ # second is the offset after decoding it.
184
+ #
185
+ # Throws an exception if there is an error.
186
+ def decode(offset)
187
+ new_offset = offset + 1
188
+ buf = @io.read(offset, 1)
189
+ ctrl_byte = buf.ord
190
+ type_num = ctrl_byte >> 5
191
+ type_num, new_offset = read_extended(new_offset) if type_num == 0
192
+
193
+ size, new_offset = size_from_ctrl_byte(ctrl_byte, new_offset, type_num)
194
+ # We could check an element exists at `type_num', but for performance I
195
+ # don't.
196
+ send(TYPE_DECODER[type_num], size, new_offset)
197
+ end
198
+
199
+ private
200
+
201
+ def read_extended(offset)
202
+ buf = @io.read(offset, 1)
203
+ next_byte = buf.ord
204
+ type_num = next_byte + 7
205
+ if type_num < 7
206
+ raise InvalidDatabaseError,
207
+ "Something went horribly wrong in the decoder. An extended type resolved to a type number < 8 (#{type_num})"
208
+ end
209
+ [type_num, offset + 1]
210
+ end
211
+
212
+ def size_from_ctrl_byte(ctrl_byte, offset, type_num)
213
+ size = ctrl_byte & 0x1f
214
+
215
+ return size, offset if type_num == 1 || size < 29
216
+
217
+ if size == 29
218
+ size_bytes = @io.read(offset, 1)
219
+ size = 29 + size_bytes.ord
220
+ return size, offset + 1
221
+ end
222
+
223
+ if size == 30
224
+ size_bytes = @io.read(offset, 2)
225
+ size = 285 + size_bytes.unpack('n').first
226
+ return size, offset + 2
227
+ end
228
+
229
+ size_bytes = "\x00".b << @io.read(offset, 3)
230
+ size = 65_821 + size_bytes.unpack('N').first
231
+ [size, offset + 3]
232
+ end
233
+ end
234
+ end
235
+ end