RubyGems - platphorm-maxmind-db - Versions diffs - 1.1.0 - Mend

platphorm-maxmind-db 1.1.0

Files changed (21) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +25 -0
data/Gemfile +6 -0
data/Gemfile.lock +15 -0
data/LICENSE-APACHE +202 -0
data/LICENSE-MIT +17 -0
data/README.dev.md +34 -0
data/README.md +63 -0
data/Rakefile +9 -0
data/bin/mmdb-benchmark.rb +64 -0
data/lib/maxmind/db.rb +306 -0
data/lib/maxmind/db/decoder.rb +235 -0
data/lib/maxmind/db/errors.rb +10 -0
data/lib/maxmind/db/file_reader.rb +40 -0
data/lib/maxmind/db/memory_reader.rb +32 -0
data/lib/maxmind/db/metadata.rb +89 -0
data/platphorm-maxmind-db.gemspec +22 -0
data/test/mmdb_util.rb +26 -0
data/test/test_decoder.rb +243 -0
data/test/test_reader.rb +539 -0
metadata +68 -0

data/lib/maxmind/db.rb ADDED Viewed

@@ -0,0 +1,306 @@
+# frozen_string_literal: true
+require 'ipaddr'
+require 'maxmind/db/decoder'
+require 'maxmind/db/errors'
+require 'maxmind/db/file_reader.rb'
+require 'maxmind/db/memory_reader.rb'
+require 'maxmind/db/metadata.rb'
+module MaxMind
+  # DB provides a way to read {MaxMind DB
+  # files}[https://maxmind.github.io/MaxMind-DB/].
+  #
+  # {MaxMind DB}[https://maxmind.github.io/MaxMind-DB/] is a binary file format
+  # that stores data indexed by IP address subnets (IPv4 or IPv6).
+  #
+  # This class is a pure Ruby implementation of a reader for the format.
+  #
+  # == Example
+  #
+  #   require 'maxmind/db'
+  #
+  #   reader = MaxMind::DB.new('GeoIP2-City.mmdb', mode: MaxMind::DB::MODE_MEMORY)
+  #
+  #   record = reader.get('1.1.1.1')
+  #   if record.nil?
+  #     puts '1.1.1.1 was not found in the database'
+  #   else
+  #     puts record['country']['iso_code']
+  #     puts record['country']['names']['en']
+  #   end
+  #
+  #   reader.close
+  class DB
+    # Choose the default method to open the database. Currently the default is
+    # MODE_FILE.
+    MODE_AUTO = :MODE_AUTO
+    # Open the database as a regular file and read on demand.
+    MODE_FILE = :MODE_FILE
+    # Read the database into memory. This is faster than MODE_FILE but causes
+    # increased memory use.
+    MODE_MEMORY = :MODE_MEMORY
+    # Treat the database parameter as containing a database already read into
+    # memory. It must be a binary string. This primarily exists for testing.
+    #
+    # @!visibility private
+    MODE_PARAM_IS_BUFFER = :MODE_PARAM_IS_BUFFER
+    DATA_SECTION_SEPARATOR_SIZE = 16
+    private_constant :DATA_SECTION_SEPARATOR_SIZE
+    METADATA_START_MARKER = "\xAB\xCD\xEFMaxMind.com".b.freeze
+    private_constant :METADATA_START_MARKER
+    METADATA_START_MARKER_LENGTH = 14
+    private_constant :METADATA_START_MARKER_LENGTH
+    METADATA_MAX_SIZE = 131_072
+    private_constant :METADATA_MAX_SIZE
+    # Return the metadata associated with the {MaxMind
+    # DB}[https://maxmind.github.io/MaxMind-DB/]
+    #
+    # @return [MaxMind::DB::Metadata]
+    attr_reader :metadata
+    # Create a DB. A DB provides a way to read {MaxMind DB
+    # files}[https://maxmind.github.io/MaxMind-DB/]. If you're performing
+    # multiple lookups, it's most efficient to create one DB and reuse it.
+    #
+    # Once created, the DB is safe to use for lookups from multiple threads. It
+    # is safe to use after forking only if you use MODE_MEMORY or if your
+    # version of Ruby supports IO#pread.
+    #
+    # @param database [String] a path to a {MaxMind
+    #   DB}[https://maxmind.github.io/MaxMind-DB/].
+    #
+    # @param options [Hash<Symbol, Symbol>] options controlling the behavior of
+    #   the DB.
+    #
+    # @option options [Symbol] :mode Defines how to open the database. It may
+    #   be one of MODE_AUTO, MODE_FILE, or MODE_MEMORY. If you don't provide
+    #   one, DB uses MODE_AUTO. Refer to the definition of those constants for
+    #   an explanation of their meaning.
+    #
+    # @raise [InvalidDatabaseError] if the database is corrupt or invalid.
+    #
+    # @raise [ArgumentError] if the mode is invalid.
+    def initialize(database, options = {})
+      options[:mode] = MODE_AUTO unless options.key?(:mode)
+      case options[:mode]
+      when MODE_AUTO, MODE_FILE
+        @io = FileReader.new(database)
+      when MODE_MEMORY
+        @io = MemoryReader.new(database)
+      when MODE_PARAM_IS_BUFFER
+        @io = MemoryReader.new(database, is_buffer: true)
+      else
+        raise ArgumentError, 'Invalid mode'
+      end
+      begin
+        @size = @io.size
+        metadata_start = find_metadata_start
+        metadata_decoder = Decoder.new(@io, metadata_start)
+        metadata_map, = metadata_decoder.decode(metadata_start)
+        @metadata = Metadata.new(metadata_map)
+        @decoder = Decoder.new(@io, @metadata.search_tree_size +
+                               DATA_SECTION_SEPARATOR_SIZE)
+        # Store copies as instance variables to reduce method calls.
+        @ip_version       = @metadata.ip_version
+        @node_count       = @metadata.node_count
+        @node_byte_size   = @metadata.node_byte_size
+        @record_size      = @metadata.record_size
+        @search_tree_size = @metadata.search_tree_size
+        @ipv4_start = nil
+        # Find @ipv4_start up front. If we don't, we either have a race to
+        # get/set it or have to synchronize access.
+        start_node(0)
+      rescue StandardError => e
+        @io.close
+        raise e
+      end
+    end
+    # Return the record for the IP address in the {MaxMind
+    # DB}[https://maxmind.github.io/MaxMind-DB/]. The record can be one of
+    # several types and depends on the contents of the database.
+    #
+    # If no record is found for the IP address, +get+ returns +nil+.
+    #
+    # @param ip_address [String] a string in the standard notation. It may be
+    #   IPv4 or IPv6.
+    #
+    # @raise [ArgumentError] if you attempt to look up an IPv6 address in an
+    #   IPv4-only database.
+    #
+    # @raise [InvalidDatabaseError] if the database is corrupt or invalid.
+    #
+    # @return [Object, nil]
+    def get(ip_address)
+      record, = get_with_prefix_length(ip_address)
+      record
+    end
+    # Return an array containing the record for the IP address in the
+    # {MaxMind DB}[https://maxmind.github.io/MaxMind-DB/] and its associated
+    # network prefix length. The record can be one of several types and
+    # depends on the contents of the database.
+    #
+    # If no record is found for the IP address, the record will be +nil+ and
+    # the prefix length will be the value for the missing network.
+    #
+    # @param ip_address [String] a string in the standard notation. It may be
+    #   IPv4 or IPv6.
+    #
+    # @raise [ArgumentError] if you attempt to look up an IPv6 address in an
+    #   IPv4-only database.
+    #
+    # @raise [InvalidDatabaseError] if the database is corrupt or invalid.
+    #
+    # @return [Array<(Object, Integer)>]
+    def get_with_prefix_length(ip_address)
+      ip = IPAddr.new(ip_address)
+      # We could check the IP has the correct prefix (32 or 128) but I do not
+      # for performance reasons.
+      ip_version = ip.ipv6? ? 6 : 4
+      if ip_version == 6 && @ip_version == 4
+        raise ArgumentError,
+              "Error looking up #{ip}. You attempted to look up an IPv6 address in an IPv4-only database."
+      end
+      pointer, depth = find_address_in_tree(ip, ip_version)
+      return nil, depth if pointer == 0
+      [resolve_data_pointer(pointer), depth]
+    end
+    private
+    IP_VERSION_TO_BIT_COUNT = {
+      4 => 32,
+      6 => 128,
+    }.freeze
+    private_constant :IP_VERSION_TO_BIT_COUNT
+    def find_address_in_tree(ip_address, ip_version)
+      packed = ip_address.hton
+      bit_count = IP_VERSION_TO_BIT_COUNT[ip_version]
+      node = start_node(bit_count)
+      node_count = @node_count
+      depth = 0
+      loop do
+        break if depth >= bit_count || node >= node_count
+        c = packed[depth >> 3].ord
+        bit = 1 & (c >> 7 - (depth % 8))
+        node = read_node(node, bit)
+        depth += 1
+      end
+      return 0, depth if node == node_count
+      return node, depth if node > node_count
+      raise InvalidDatabaseError, 'Invalid node in search tree'
+    end
+    def start_node(length)
+      return 0 if @ip_version != 6 || length == 128
+      return @ipv4_start if @ipv4_start
+      node = 0
+      96.times do
+        break if node >= @metadata.node_count
+        node = read_node(node, 0)
+      end
+      @ipv4_start = node
+    end
+    # Read a record from the indicated node. Index indicates whether it's the
+    # left (0) or right (1) record.
+    #
+    # rubocop:disable Metrics/CyclomaticComplexity
+    def read_node(node_number, index)
+      base_offset = node_number * @node_byte_size
+      if @record_size == 24
+        offset = index == 0 ? base_offset : base_offset + 3
+        buf = @io.read(offset, 3)
+        node_bytes = "\x00".b << buf
+        return node_bytes.unpack('N').first
+      end
+      if @record_size == 28
+        if index == 0
+          buf = @io.read(base_offset, 4)
+          n = buf.unpack('N').first
+          last24 = n >> 8
+          first4 = (n & 0xf0) << 20
+          return first4 | last24
+        end
+        buf = @io.read(base_offset + 3, 4)
+        return buf.unpack('N').first & 0x0fffffff
+      end
+      if @record_size == 32
+        offset = index == 0 ? base_offset : base_offset + 4
+        node_bytes = @io.read(offset, 4)
+        return node_bytes.unpack('N').first
+      end
+      raise InvalidDatabaseError, "Unsupported record size: #{@record_size}"
+    end
+    # rubocop:enable Metrics/CyclomaticComplexity
+    def resolve_data_pointer(pointer)
+      offset_in_file = pointer - @node_count + @search_tree_size
+      if offset_in_file >= @size
+        raise InvalidDatabaseError,
+              'The MaxMind DB file\'s search tree is corrupt'
+      end
+      data, = @decoder.decode(offset_in_file)
+      data
+    end
+    def find_metadata_start
+      metadata_max_size = @size < METADATA_MAX_SIZE ? @size : METADATA_MAX_SIZE
+      stop_index = @size - metadata_max_size
+      index = @size - METADATA_START_MARKER_LENGTH
+      while index >= stop_index
+        return index + METADATA_START_MARKER_LENGTH if at_metadata?(index)
+        index -= 1
+      end
+      raise InvalidDatabaseError,
+            'Metadata section not found. Is this a valid MaxMind DB file?'
+    end
+    def at_metadata?(index)
+      @io.read(index, METADATA_START_MARKER_LENGTH) == METADATA_START_MARKER
+    end
+    public
+    # Close the DB and return resources to the system.
+    #
+    # @return [void]
+    def close
+      @io.close
+    end
+  end
+end

data/lib/maxmind/db/decoder.rb ADDED Viewed

@@ -0,0 +1,235 @@
+# frozen_string_literal: true
+require 'maxmind/db/errors'
+module MaxMind
+  class DB
+    # +Decoder+ decodes a {MaxMind DB}[https://maxmind.github.io/MaxMind-DB/]
+    # data section.
+    #
+    # Typically you will interact with this class through a Reader rather than
+    # directly.
+    #
+    # @!visibility private
+    class Decoder
+      # Create a +Decoder+.
+      #
+      # +io+ is the DB. It must provide a +read+ method. It must be opened in
+      # binary mode.
+      #
+      # +pointer_base+ is the base number to use when decoding a pointer. It is
+      # where the data section begins rather than the beginning of the file.
+      # The specification states the formula in the `Data Section Separator'
+      # section.
+      #
+      # +pointer_test+ is used for testing pointer code.
+      def initialize(io, pointer_base = 0, pointer_test = false)
+        @io = io
+        @pointer_base = pointer_base
+        @pointer_test = pointer_test
+      end
+      private
+      def decode_array(size, offset)
+        array = []
+        size.times do
+          value, offset = decode(offset)
+          array << value
+        end
+        [array, offset]
+      end
+      def decode_boolean(size, offset)
+        [size != 0, offset]
+      end
+      def decode_bytes(size, offset)
+        [@io.read(offset, size), offset + size]
+      end
+      def decode_double(size, offset)
+        verify_size(8, size)
+        buf = @io.read(offset, 8)
+        [buf.unpack('G').first, offset + 8]
+      end
+      def decode_float(size, offset)
+        verify_size(4, size)
+        buf = @io.read(offset, 4)
+        [buf.unpack('g').first, offset + 4]
+      end
+      def verify_size(expected, actual)
+        return if expected == actual
+        raise InvalidDatabaseError,
+              'The MaxMind DB file\'s data section contains bad data (unknown data type or corrupt data)'
+      end
+      def decode_int32(size, offset)
+        decode_int('l>', 4, size, offset)
+      end
+      def decode_uint16(size, offset)
+        decode_int('n', 2, size, offset)
+      end
+      def decode_uint32(size, offset)
+        decode_int('N', 4, size, offset)
+      end
+      def decode_uint64(size, offset)
+        decode_int('Q>', 8, size, offset)
+      end
+      def decode_int(type_code, type_size, size, offset)
+        return 0, offset if size == 0
+        buf = @io.read(offset, size)
+        buf = buf.rjust(type_size, "\x00") if size != type_size
+        [buf.unpack(type_code).first, offset + size]
+      end
+      def decode_uint128(size, offset)
+        return 0, offset if size == 0
+        buf = @io.read(offset, size)
+        if size <= 8
+          buf = buf.rjust(8, "\x00")
+          return buf.unpack('Q>').first, offset + size
+        end
+        a_bytes = buf[0...-8].rjust(8, "\x00")
+        b_bytes = buf[-8...buf.length]
+        a = a_bytes.unpack('Q>').first
+        b = b_bytes.unpack('Q>').first
+        a <<= 64
+        [a | b, offset + size]
+      end
+      def decode_map(size, offset)
+        container = {}
+        size.times do
+          key, offset = decode(offset)
+          value, offset = decode(offset)
+          container[key] = value
+        end
+        [container, offset]
+      end
+      def decode_pointer(size, offset)
+        pointer_size = size >> 3
+        case pointer_size
+        when 0
+          new_offset = offset + 1
+          buf = (size & 0x7).chr << @io.read(offset, 1)
+          pointer = buf.unpack('n').first + @pointer_base
+        when 1
+          new_offset = offset + 2
+          buf = "\x00".b << (size & 0x7).chr << @io.read(offset, 2)
+          pointer = buf.unpack('N').first + 2048 + @pointer_base
+        when 2
+          new_offset = offset + 3
+          buf = (size & 0x7).chr << @io.read(offset, 3)
+          pointer = buf.unpack('N').first + 526_336 + @pointer_base
+        else
+          new_offset = offset + 4
+          buf = @io.read(offset, 4)
+          pointer = buf.unpack('N').first + @pointer_base
+        end
+        return pointer, new_offset if @pointer_test
+        value, = decode(pointer)
+        [value, new_offset]
+      end
+      def decode_utf8_string(size, offset)
+        new_offset = offset + size
+        buf = @io.read(offset, size)
+        buf.force_encoding(Encoding::UTF_8)
+        # We could check it's valid UTF-8 with `valid_encoding?', but for
+        # performance I do not.
+        [buf, new_offset]
+      end
+      TYPE_DECODER = {
+        1 => :decode_pointer,
+        2 => :decode_utf8_string,
+        3 => :decode_double,
+        4 => :decode_bytes,
+        5 => :decode_uint16,
+        6 => :decode_uint32,
+        7 => :decode_map,
+        8 => :decode_int32,
+        9 => :decode_uint64,
+        10 => :decode_uint128,
+        11 => :decode_array,
+        14 => :decode_boolean,
+        15 => :decode_float,
+      }.freeze
+      private_constant :TYPE_DECODER
+      public
+      # Decode a section of the data section starting at +offset+.
+      #
+      # +offset+ is the location of the data structure to decode.
+      #
+      # Returns an array where the first element is the decoded value and the
+      # second is the offset after decoding it.
+      #
+      # Throws an exception if there is an error.
+      def decode(offset)
+        new_offset = offset + 1
+        buf = @io.read(offset, 1)
+        ctrl_byte = buf.ord
+        type_num = ctrl_byte >> 5
+        type_num, new_offset = read_extended(new_offset) if type_num == 0
+        size, new_offset = size_from_ctrl_byte(ctrl_byte, new_offset, type_num)
+        # We could check an element exists at `type_num', but for performance I
+        # don't.
+        send(TYPE_DECODER[type_num], size, new_offset)
+      end
+      private
+      def read_extended(offset)
+        buf = @io.read(offset, 1)
+        next_byte = buf.ord
+        type_num = next_byte + 7
+        if type_num < 7
+          raise InvalidDatabaseError,
+                "Something went horribly wrong in the decoder. An extended type resolved to a type number < 8 (#{type_num})"
+        end
+        [type_num, offset + 1]
+      end
+      def size_from_ctrl_byte(ctrl_byte, offset, type_num)
+        size = ctrl_byte & 0x1f
+        return size, offset if type_num == 1 || size < 29
+        if size == 29
+          size_bytes = @io.read(offset, 1)
+          size = 29 + size_bytes.ord
+          return size, offset + 1
+        end
+        if size == 30
+          size_bytes = @io.read(offset, 2)
+          size = 285 + size_bytes.unpack('n').first
+          return size, offset + 2
+        end
+        size_bytes = "\x00".b << @io.read(offset, 3)
+        size = 65_821 + size_bytes.unpack('N').first
+        [size, offset + 3]
+      end
+    end
+  end
+end