platphorm-maxmind-db 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/maxmind/db.rb ADDED
@@ -0,0 +1,306 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ipaddr'
4
+ require 'maxmind/db/decoder'
5
+ require 'maxmind/db/errors'
6
+ require 'maxmind/db/file_reader.rb'
7
+ require 'maxmind/db/memory_reader.rb'
8
+ require 'maxmind/db/metadata.rb'
9
+
10
+ module MaxMind
11
+ # DB provides a way to read {MaxMind DB
12
+ # files}[https://maxmind.github.io/MaxMind-DB/].
13
+ #
14
+ # {MaxMind DB}[https://maxmind.github.io/MaxMind-DB/] is a binary file format
15
+ # that stores data indexed by IP address subnets (IPv4 or IPv6).
16
+ #
17
+ # This class is a pure Ruby implementation of a reader for the format.
18
+ #
19
+ # == Example
20
+ #
21
+ # require 'maxmind/db'
22
+ #
23
+ # reader = MaxMind::DB.new('GeoIP2-City.mmdb', mode: MaxMind::DB::MODE_MEMORY)
24
+ #
25
+ # record = reader.get('1.1.1.1')
26
+ # if record.nil?
27
+ # puts '1.1.1.1 was not found in the database'
28
+ # else
29
+ # puts record['country']['iso_code']
30
+ # puts record['country']['names']['en']
31
+ # end
32
+ #
33
+ # reader.close
34
+ class DB
35
+ # Choose the default method to open the database. Currently the default is
36
+ # MODE_FILE.
37
+ MODE_AUTO = :MODE_AUTO
38
+ # Open the database as a regular file and read on demand.
39
+ MODE_FILE = :MODE_FILE
40
+ # Read the database into memory. This is faster than MODE_FILE but causes
41
+ # increased memory use.
42
+ MODE_MEMORY = :MODE_MEMORY
43
+ # Treat the database parameter as containing a database already read into
44
+ # memory. It must be a binary string. This primarily exists for testing.
45
+ #
46
+ # @!visibility private
47
+ MODE_PARAM_IS_BUFFER = :MODE_PARAM_IS_BUFFER
48
+
49
+ DATA_SECTION_SEPARATOR_SIZE = 16
50
+ private_constant :DATA_SECTION_SEPARATOR_SIZE
51
+ METADATA_START_MARKER = "\xAB\xCD\xEFMaxMind.com".b.freeze
52
+ private_constant :METADATA_START_MARKER
53
+ METADATA_START_MARKER_LENGTH = 14
54
+ private_constant :METADATA_START_MARKER_LENGTH
55
+ METADATA_MAX_SIZE = 131_072
56
+ private_constant :METADATA_MAX_SIZE
57
+
58
+ # Return the metadata associated with the {MaxMind
59
+ # DB}[https://maxmind.github.io/MaxMind-DB/]
60
+ #
61
+ # @return [MaxMind::DB::Metadata]
62
+ attr_reader :metadata
63
+
64
+ # Create a DB. A DB provides a way to read {MaxMind DB
65
+ # files}[https://maxmind.github.io/MaxMind-DB/]. If you're performing
66
+ # multiple lookups, it's most efficient to create one DB and reuse it.
67
+ #
68
+ # Once created, the DB is safe to use for lookups from multiple threads. It
69
+ # is safe to use after forking only if you use MODE_MEMORY or if your
70
+ # version of Ruby supports IO#pread.
71
+ #
72
+ # @param database [String] a path to a {MaxMind
73
+ # DB}[https://maxmind.github.io/MaxMind-DB/].
74
+ #
75
+ # @param options [Hash<Symbol, Symbol>] options controlling the behavior of
76
+ # the DB.
77
+ #
78
+ # @option options [Symbol] :mode Defines how to open the database. It may
79
+ # be one of MODE_AUTO, MODE_FILE, or MODE_MEMORY. If you don't provide
80
+ # one, DB uses MODE_AUTO. Refer to the definition of those constants for
81
+ # an explanation of their meaning.
82
+ #
83
+ # @raise [InvalidDatabaseError] if the database is corrupt or invalid.
84
+ #
85
+ # @raise [ArgumentError] if the mode is invalid.
86
+ def initialize(database, options = {})
87
+ options[:mode] = MODE_AUTO unless options.key?(:mode)
88
+
89
+ case options[:mode]
90
+ when MODE_AUTO, MODE_FILE
91
+ @io = FileReader.new(database)
92
+ when MODE_MEMORY
93
+ @io = MemoryReader.new(database)
94
+ when MODE_PARAM_IS_BUFFER
95
+ @io = MemoryReader.new(database, is_buffer: true)
96
+ else
97
+ raise ArgumentError, 'Invalid mode'
98
+ end
99
+
100
+ begin
101
+ @size = @io.size
102
+
103
+ metadata_start = find_metadata_start
104
+ metadata_decoder = Decoder.new(@io, metadata_start)
105
+ metadata_map, = metadata_decoder.decode(metadata_start)
106
+ @metadata = Metadata.new(metadata_map)
107
+ @decoder = Decoder.new(@io, @metadata.search_tree_size +
108
+ DATA_SECTION_SEPARATOR_SIZE)
109
+
110
+ # Store copies as instance variables to reduce method calls.
111
+ @ip_version = @metadata.ip_version
112
+ @node_count = @metadata.node_count
113
+ @node_byte_size = @metadata.node_byte_size
114
+ @record_size = @metadata.record_size
115
+ @search_tree_size = @metadata.search_tree_size
116
+
117
+ @ipv4_start = nil
118
+ # Find @ipv4_start up front. If we don't, we either have a race to
119
+ # get/set it or have to synchronize access.
120
+ start_node(0)
121
+ rescue StandardError => e
122
+ @io.close
123
+ raise e
124
+ end
125
+ end
126
+
127
+ # Return the record for the IP address in the {MaxMind
128
+ # DB}[https://maxmind.github.io/MaxMind-DB/]. The record can be one of
129
+ # several types and depends on the contents of the database.
130
+ #
131
+ # If no record is found for the IP address, +get+ returns +nil+.
132
+ #
133
+ # @param ip_address [String] a string in the standard notation. It may be
134
+ # IPv4 or IPv6.
135
+ #
136
+ # @raise [ArgumentError] if you attempt to look up an IPv6 address in an
137
+ # IPv4-only database.
138
+ #
139
+ # @raise [InvalidDatabaseError] if the database is corrupt or invalid.
140
+ #
141
+ # @return [Object, nil]
142
+ def get(ip_address)
143
+ record, = get_with_prefix_length(ip_address)
144
+
145
+ record
146
+ end
147
+
148
+ # Return an array containing the record for the IP address in the
149
+ # {MaxMind DB}[https://maxmind.github.io/MaxMind-DB/] and its associated
150
+ # network prefix length. The record can be one of several types and
151
+ # depends on the contents of the database.
152
+ #
153
+ # If no record is found for the IP address, the record will be +nil+ and
154
+ # the prefix length will be the value for the missing network.
155
+ #
156
+ # @param ip_address [String] a string in the standard notation. It may be
157
+ # IPv4 or IPv6.
158
+ #
159
+ # @raise [ArgumentError] if you attempt to look up an IPv6 address in an
160
+ # IPv4-only database.
161
+ #
162
+ # @raise [InvalidDatabaseError] if the database is corrupt or invalid.
163
+ #
164
+ # @return [Array<(Object, Integer)>]
165
+ def get_with_prefix_length(ip_address)
166
+ ip = IPAddr.new(ip_address)
167
+ # We could check the IP has the correct prefix (32 or 128) but I do not
168
+ # for performance reasons.
169
+
170
+ ip_version = ip.ipv6? ? 6 : 4
171
+ if ip_version == 6 && @ip_version == 4
172
+ raise ArgumentError,
173
+ "Error looking up #{ip}. You attempted to look up an IPv6 address in an IPv4-only database."
174
+ end
175
+
176
+ pointer, depth = find_address_in_tree(ip, ip_version)
177
+ return nil, depth if pointer == 0
178
+
179
+ [resolve_data_pointer(pointer), depth]
180
+ end
181
+
182
+ private
183
+
184
+ IP_VERSION_TO_BIT_COUNT = {
185
+ 4 => 32,
186
+ 6 => 128,
187
+ }.freeze
188
+ private_constant :IP_VERSION_TO_BIT_COUNT
189
+
190
+ def find_address_in_tree(ip_address, ip_version)
191
+ packed = ip_address.hton
192
+
193
+ bit_count = IP_VERSION_TO_BIT_COUNT[ip_version]
194
+ node = start_node(bit_count)
195
+
196
+ node_count = @node_count
197
+
198
+ depth = 0
199
+ loop do
200
+ break if depth >= bit_count || node >= node_count
201
+
202
+ c = packed[depth >> 3].ord
203
+ bit = 1 & (c >> 7 - (depth % 8))
204
+ node = read_node(node, bit)
205
+ depth += 1
206
+ end
207
+
208
+ return 0, depth if node == node_count
209
+
210
+ return node, depth if node > node_count
211
+
212
+ raise InvalidDatabaseError, 'Invalid node in search tree'
213
+ end
214
+
215
+ def start_node(length)
216
+ return 0 if @ip_version != 6 || length == 128
217
+
218
+ return @ipv4_start if @ipv4_start
219
+
220
+ node = 0
221
+ 96.times do
222
+ break if node >= @metadata.node_count
223
+
224
+ node = read_node(node, 0)
225
+ end
226
+
227
+ @ipv4_start = node
228
+ end
229
+
230
+ # Read a record from the indicated node. Index indicates whether it's the
231
+ # left (0) or right (1) record.
232
+ #
233
+ # rubocop:disable Metrics/CyclomaticComplexity
234
+ def read_node(node_number, index)
235
+ base_offset = node_number * @node_byte_size
236
+
237
+ if @record_size == 24
238
+ offset = index == 0 ? base_offset : base_offset + 3
239
+ buf = @io.read(offset, 3)
240
+ node_bytes = "\x00".b << buf
241
+ return node_bytes.unpack('N').first
242
+ end
243
+
244
+ if @record_size == 28
245
+ if index == 0
246
+ buf = @io.read(base_offset, 4)
247
+ n = buf.unpack('N').first
248
+ last24 = n >> 8
249
+ first4 = (n & 0xf0) << 20
250
+ return first4 | last24
251
+ end
252
+ buf = @io.read(base_offset + 3, 4)
253
+ return buf.unpack('N').first & 0x0fffffff
254
+ end
255
+
256
+ if @record_size == 32
257
+ offset = index == 0 ? base_offset : base_offset + 4
258
+ node_bytes = @io.read(offset, 4)
259
+ return node_bytes.unpack('N').first
260
+ end
261
+
262
+ raise InvalidDatabaseError, "Unsupported record size: #{@record_size}"
263
+ end
264
+ # rubocop:enable Metrics/CyclomaticComplexity
265
+
266
+ def resolve_data_pointer(pointer)
267
+ offset_in_file = pointer - @node_count + @search_tree_size
268
+
269
+ if offset_in_file >= @size
270
+ raise InvalidDatabaseError,
271
+ 'The MaxMind DB file\'s search tree is corrupt'
272
+ end
273
+
274
+ data, = @decoder.decode(offset_in_file)
275
+ data
276
+ end
277
+
278
+ def find_metadata_start
279
+ metadata_max_size = @size < METADATA_MAX_SIZE ? @size : METADATA_MAX_SIZE
280
+
281
+ stop_index = @size - metadata_max_size
282
+ index = @size - METADATA_START_MARKER_LENGTH
283
+ while index >= stop_index
284
+ return index + METADATA_START_MARKER_LENGTH if at_metadata?(index)
285
+
286
+ index -= 1
287
+ end
288
+
289
+ raise InvalidDatabaseError,
290
+ 'Metadata section not found. Is this a valid MaxMind DB file?'
291
+ end
292
+
293
+ def at_metadata?(index)
294
+ @io.read(index, METADATA_START_MARKER_LENGTH) == METADATA_START_MARKER
295
+ end
296
+
297
+ public
298
+
299
+ # Close the DB and return resources to the system.
300
+ #
301
+ # @return [void]
302
+ def close
303
+ @io.close
304
+ end
305
+ end
306
+ end
@@ -0,0 +1,235 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'maxmind/db/errors'
4
+
5
+ module MaxMind
6
+ class DB
7
+ # +Decoder+ decodes a {MaxMind DB}[https://maxmind.github.io/MaxMind-DB/]
8
+ # data section.
9
+ #
10
+ # Typically you will interact with this class through a Reader rather than
11
+ # directly.
12
+ #
13
+ # @!visibility private
14
+ class Decoder
15
+ # Create a +Decoder+.
16
+ #
17
+ # +io+ is the DB. It must provide a +read+ method. It must be opened in
18
+ # binary mode.
19
+ #
20
+ # +pointer_base+ is the base number to use when decoding a pointer. It is
21
+ # where the data section begins rather than the beginning of the file.
22
+ # The specification states the formula in the `Data Section Separator'
23
+ # section.
24
+ #
25
+ # +pointer_test+ is used for testing pointer code.
26
+ def initialize(io, pointer_base = 0, pointer_test = false)
27
+ @io = io
28
+ @pointer_base = pointer_base
29
+ @pointer_test = pointer_test
30
+ end
31
+
32
+ private
33
+
34
+ def decode_array(size, offset)
35
+ array = []
36
+ size.times do
37
+ value, offset = decode(offset)
38
+ array << value
39
+ end
40
+ [array, offset]
41
+ end
42
+
43
+ def decode_boolean(size, offset)
44
+ [size != 0, offset]
45
+ end
46
+
47
+ def decode_bytes(size, offset)
48
+ [@io.read(offset, size), offset + size]
49
+ end
50
+
51
+ def decode_double(size, offset)
52
+ verify_size(8, size)
53
+ buf = @io.read(offset, 8)
54
+ [buf.unpack('G').first, offset + 8]
55
+ end
56
+
57
+ def decode_float(size, offset)
58
+ verify_size(4, size)
59
+ buf = @io.read(offset, 4)
60
+ [buf.unpack('g').first, offset + 4]
61
+ end
62
+
63
+ def verify_size(expected, actual)
64
+ return if expected == actual
65
+
66
+ raise InvalidDatabaseError,
67
+ 'The MaxMind DB file\'s data section contains bad data (unknown data type or corrupt data)'
68
+ end
69
+
70
+ def decode_int32(size, offset)
71
+ decode_int('l>', 4, size, offset)
72
+ end
73
+
74
+ def decode_uint16(size, offset)
75
+ decode_int('n', 2, size, offset)
76
+ end
77
+
78
+ def decode_uint32(size, offset)
79
+ decode_int('N', 4, size, offset)
80
+ end
81
+
82
+ def decode_uint64(size, offset)
83
+ decode_int('Q>', 8, size, offset)
84
+ end
85
+
86
+ def decode_int(type_code, type_size, size, offset)
87
+ return 0, offset if size == 0
88
+
89
+ buf = @io.read(offset, size)
90
+ buf = buf.rjust(type_size, "\x00") if size != type_size
91
+ [buf.unpack(type_code).first, offset + size]
92
+ end
93
+
94
+ def decode_uint128(size, offset)
95
+ return 0, offset if size == 0
96
+
97
+ buf = @io.read(offset, size)
98
+
99
+ if size <= 8
100
+ buf = buf.rjust(8, "\x00")
101
+ return buf.unpack('Q>').first, offset + size
102
+ end
103
+
104
+ a_bytes = buf[0...-8].rjust(8, "\x00")
105
+ b_bytes = buf[-8...buf.length]
106
+ a = a_bytes.unpack('Q>').first
107
+ b = b_bytes.unpack('Q>').first
108
+ a <<= 64
109
+ [a | b, offset + size]
110
+ end
111
+
112
+ def decode_map(size, offset)
113
+ container = {}
114
+ size.times do
115
+ key, offset = decode(offset)
116
+ value, offset = decode(offset)
117
+ container[key] = value
118
+ end
119
+ [container, offset]
120
+ end
121
+
122
+ def decode_pointer(size, offset)
123
+ pointer_size = size >> 3
124
+
125
+ case pointer_size
126
+ when 0
127
+ new_offset = offset + 1
128
+ buf = (size & 0x7).chr << @io.read(offset, 1)
129
+ pointer = buf.unpack('n').first + @pointer_base
130
+ when 1
131
+ new_offset = offset + 2
132
+ buf = "\x00".b << (size & 0x7).chr << @io.read(offset, 2)
133
+ pointer = buf.unpack('N').first + 2048 + @pointer_base
134
+ when 2
135
+ new_offset = offset + 3
136
+ buf = (size & 0x7).chr << @io.read(offset, 3)
137
+ pointer = buf.unpack('N').first + 526_336 + @pointer_base
138
+ else
139
+ new_offset = offset + 4
140
+ buf = @io.read(offset, 4)
141
+ pointer = buf.unpack('N').first + @pointer_base
142
+ end
143
+
144
+ return pointer, new_offset if @pointer_test
145
+
146
+ value, = decode(pointer)
147
+ [value, new_offset]
148
+ end
149
+
150
+ def decode_utf8_string(size, offset)
151
+ new_offset = offset + size
152
+ buf = @io.read(offset, size)
153
+ buf.force_encoding(Encoding::UTF_8)
154
+ # We could check it's valid UTF-8 with `valid_encoding?', but for
155
+ # performance I do not.
156
+ [buf, new_offset]
157
+ end
158
+
159
+ TYPE_DECODER = {
160
+ 1 => :decode_pointer,
161
+ 2 => :decode_utf8_string,
162
+ 3 => :decode_double,
163
+ 4 => :decode_bytes,
164
+ 5 => :decode_uint16,
165
+ 6 => :decode_uint32,
166
+ 7 => :decode_map,
167
+ 8 => :decode_int32,
168
+ 9 => :decode_uint64,
169
+ 10 => :decode_uint128,
170
+ 11 => :decode_array,
171
+ 14 => :decode_boolean,
172
+ 15 => :decode_float,
173
+ }.freeze
174
+ private_constant :TYPE_DECODER
175
+
176
+ public
177
+
178
+ # Decode a section of the data section starting at +offset+.
179
+ #
180
+ # +offset+ is the location of the data structure to decode.
181
+ #
182
+ # Returns an array where the first element is the decoded value and the
183
+ # second is the offset after decoding it.
184
+ #
185
+ # Throws an exception if there is an error.
186
+ def decode(offset)
187
+ new_offset = offset + 1
188
+ buf = @io.read(offset, 1)
189
+ ctrl_byte = buf.ord
190
+ type_num = ctrl_byte >> 5
191
+ type_num, new_offset = read_extended(new_offset) if type_num == 0
192
+
193
+ size, new_offset = size_from_ctrl_byte(ctrl_byte, new_offset, type_num)
194
+ # We could check an element exists at `type_num', but for performance I
195
+ # don't.
196
+ send(TYPE_DECODER[type_num], size, new_offset)
197
+ end
198
+
199
+ private
200
+
201
+ def read_extended(offset)
202
+ buf = @io.read(offset, 1)
203
+ next_byte = buf.ord
204
+ type_num = next_byte + 7
205
+ if type_num < 7
206
+ raise InvalidDatabaseError,
207
+ "Something went horribly wrong in the decoder. An extended type resolved to a type number < 8 (#{type_num})"
208
+ end
209
+ [type_num, offset + 1]
210
+ end
211
+
212
+ def size_from_ctrl_byte(ctrl_byte, offset, type_num)
213
+ size = ctrl_byte & 0x1f
214
+
215
+ return size, offset if type_num == 1 || size < 29
216
+
217
+ if size == 29
218
+ size_bytes = @io.read(offset, 1)
219
+ size = 29 + size_bytes.ord
220
+ return size, offset + 1
221
+ end
222
+
223
+ if size == 30
224
+ size_bytes = @io.read(offset, 2)
225
+ size = 285 + size_bytes.unpack('n').first
226
+ return size, offset + 2
227
+ end
228
+
229
+ size_bytes = "\x00".b << @io.read(offset, 3)
230
+ size = 65_821 + size_bytes.unpack('N').first
231
+ [size, offset + 3]
232
+ end
233
+ end
234
+ end
235
+ end