RubyGems - wgit - Versions diffs - 0.0.1 → 0.0.2 - Mend

wgit 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +5 -5
data/lib/wgit.rb +1 -1
data/lib/wgit/assertable.rb +72 -61
data/lib/wgit/core_ext.rb +11 -5
data/lib/wgit/crawler.rb +97 -57
data/lib/wgit/database/database.rb +247 -170
data/lib/wgit/database/model.rb +40 -24
data/lib/wgit/database/mongo_connection_details.rb +44 -23
data/lib/wgit/document.rb +534 -233
data/lib/wgit/indexer.rb +235 -0
data/lib/wgit/url.rb +199 -121
data/lib/wgit/utils.rb +143 -96
data/lib/wgit/version.rb +5 -1
metadata +10 -9
data/lib/wgit/web_crawler.rb +0 -134

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 82f33e00a273c6cdeb3ba9c171110d849fff2428
-  data.tar.gz: 14c63f826d1d21811b14e9f3a2bca750b3f4afa3
+SHA256:
+  metadata.gz: c2ee83f5b722a6aff6fad7727751e149b58de02de3abecd49e51066a1ecf035d
+  data.tar.gz: 112fb3b45192e781d8a4b4eeaca6d3ad7abaaa1a29b8833654b17a9b38644f81
 SHA512:
-  metadata.gz: 7c42b925f72d9e7cceba79d9aee764f97b6537c0005038501a1f75c36b1bcd3b6036cfb9b62fcf01fd435e0348c1e8c00c445a291051c068fa58184de2c9590a
-  data.tar.gz: a2a756c3be7b9b214921bfdac5846a2250e452265285cb9c3b812d2eaefc2ab969b608cd1841f34507a6ef184f20ba7c98658daf0135fb85eead88de0356320f
+  metadata.gz: a29f32db3538bed0e2b09ae599623ee9751ed0f929f83b1d7a987982132431cc72b89fa5bc3e2522c50c50c4a7d0fd33aaf0fbc000986b1db8f0494a37cebd7c
+  data.tar.gz: faf0f814dad58fef4a4ec61a5cdd3f4b3ba7170b417e48a18f4a45922efa0dcab8ffaf4bbe0d198d53e5778056599e6827d1315ce68e08984ca2af47e26631bc

data/lib/wgit.rb CHANGED

@@ -1,6 +1,6 @@
 require_relative 'wgit/version'
 require_relative 'wgit/crawler'
-require_relative 'wgit/web_crawler'
+require_relative 'wgit/indexer'
 require_relative 'wgit/url'
 require_relative 'wgit/document'
 require_relative 'wgit/utils'

data/lib/wgit/assertable.rb CHANGED

@@ -1,69 +1,80 @@
 module Wgit
-  # @author Michael Telford
-  # Module containing assert methods including type checking which can be used
-  # for asserting the integrity of method definitions etc.
+  # Module containing assert methods including type checking which can be used
+  # for asserting the integrity of method definitions etc.
   module Assertable
-      DEFAULT_TYPE_FAIL_MSG = "Expected: %s, Actual: %s"
-      WRONG_METHOD_MSG = "arr must be Enumerable, use a different method"
-      DEFAULT_DUCK_FAIL_MSG = "%s doesn't respond_to? %s"
-      # obj.instance_of? must return true for one of the types listed in
-      # type_or_types or an exception is thrown using msg if provided.
-      # type_or_types can be a single Class or an Enumerable of Class objects,
-      # Strings and Symbols will not work.
-      def assert_types(obj, type_or_types, msg = nil)
-          msg ||= DEFAULT_TYPE_FAIL_MSG % [type_or_types, obj.class]
-          if type_or_types.respond_to?(:any?)
-              match = type_or_types.any? { |type| obj.instance_of?(type) }
-          else
-              match = obj.instance_of?(type_or_types)
-          end
-          raise msg unless match
-          obj
-      end
-      # Each object within arr must match one of the types listed in
-      # type_or_types or an exception is thrown using msg if provided.
-      # type_or_types can be a single Class or an Enumerable of Class objects,
-      # Strings and Symbols will not work.
-      def assert_arr_types(arr, type_or_types, msg = nil)
-          raise WRONG_METHOD_MSG unless arr.respond_to?(:each)
-          arr.each do |obj|
-              assert_types(obj, type_or_types, msg)
-          end
+    DEFAULT_TYPE_FAIL_MSG = "Expected: %s, Actual: %s".freeze
+    WRONG_METHOD_MSG = "arr must be Enumerable, use a different method".freeze
+    DEFAULT_DUCK_FAIL_MSG = "%s doesn't respond_to? %s".freeze
+    # Tests if the obj is of a given type.
+    #
+    # @param obj [Object] The Object to test.
+    # @param type_or_types [Type, Array<Type>] The type/types that obj must
+    #     belong to or an exception is thrown.
+    # @param msg [String] The raised RuntimeError message, if provided.
+    # @return [Object] The given obj on successful assertion.
+    def assert_types(obj, type_or_types, msg = nil)
+      msg ||= DEFAULT_TYPE_FAIL_MSG % [type_or_types, obj.class]
+      if type_or_types.respond_to?(:any?)
+        match = type_or_types.any? { |type| obj.instance_of?(type) }
+      else
+        match = obj.instance_of?(type_or_types)
       end
-      # The obj_or_objs must respond_to? all of the given methods or an
-      # Exception is raised using msg or a default message.
-      # Returns obj_or_objs on sucessful assertion.
-      def assert_respond_to(obj_or_objs, methods, msg = nil)
-          if obj_or_objs.respond_to?(:each)
-              obj_or_objs.each do |obj|
-                  _assert_respond_to(obj, methods, msg)
-              end
-          else
-              _assert_respond_to(obj_or_objs, methods, msg)
-          end
-          obj_or_objs
+      raise msg unless match
+      obj
+    end
+    # Each object within arr must match one of the types listed in
+    # type_or_types or an exception is raised using msg, if provided.
+    #
+    # @param arr [Enumerable#each] Enumerable of objects to type check.
+    # @param type_or_types [Type, Array<Type>] The allowed type(s).
+    # @param msg [String] The raised RuntimeError message, if provided.
+    # @return [Object] The given arr on successful assertion.
+    def assert_arr_types(arr, type_or_types, msg = nil)
+      raise WRONG_METHOD_MSG unless arr.respond_to?(:each)
+      arr.each do |obj|
+        assert_types(obj, type_or_types, msg)
       end
-      private
-      def _assert_respond_to(obj, methods, msg = nil)
-          msg ||= DEFAULT_DUCK_FAIL_MSG % ["#{obj.class} (#{obj})", methods]
-          match = methods.all? { |method| obj.respond_to?(method) }
-          raise msg unless match
-          obj
+    end
+    # The obj_or_objs must respond_to? all of the given methods or an
+    # Exception is raised using msg, if provided.
+    #
+    # @param obj_or_objs [Object, Enumerable#each] The objects to duck check.
+    # @param methods [Array<Symbol>] The methods to :respond_to?.
+    # @param msg [String] The raised RuntimeError message, if provided.
+    # @return [Object] The given obj_or_objs on successful assertion.
+    def assert_respond_to(obj_or_objs, methods, msg = nil)
+      methods = [methods] unless methods.respond_to?(:all?)
+      if obj_or_objs.respond_to?(:each)
+        obj_or_objs.each do |obj|
+          _assert_respond_to(obj, methods, msg)
+        end
+      else
+        _assert_respond_to(obj_or_objs, methods, msg)
       end
-      alias :assert_type :assert_types
-      alias :type :assert_types
-      alias :types :assert_types
-      alias :assert_arr_type :assert_arr_types
-      alias :arr_type :assert_arr_types
-      alias :arr_types :assert_arr_types
-      alias :respond_to :assert_respond_to
+      obj_or_objs
+    end
+  private
+    # obj must respond_to? all methods or an exception is raised.
+    def _assert_respond_to(obj, methods, msg = nil)
+      raise "methods must respond_to? :all?" unless methods.respond_to?(:all?)
+      msg ||= DEFAULT_DUCK_FAIL_MSG % ["#{obj.class} (#{obj})", methods]
+      match = methods.all? { |method| obj.respond_to?(method) }
+      raise msg unless match
+      obj
+    end
+    alias :assert_type :assert_types
+    alias :type :assert_types
+    alias :types :assert_types
+    alias :assert_arr_type :assert_arr_types
+    alias :arr_type :assert_arr_types
+    alias :arr_types :assert_arr_types
+    alias :respond_to :assert_respond_to
   end
 end

data/lib/wgit/core_ext.rb CHANGED

@@ -1,11 +1,12 @@
 require_relative 'url'
-# @author Michael Telford
 # Script which extends Ruby's core functionality when parsed.
-# Needs to be required separately using `require 'wgit/core_ext'`.
+# Needs to be required separately using `require 'wgit/core_ext'`.
 class String
-  # Converts a String into a Wgit::Url object.
+  # Converts a String into a Wgit::Url object.
+  #
+  # @return [Wgit::Url] The converted URL.
   def to_url
     Wgit::Url.new(self)
   end
@@ -13,7 +14,9 @@ end
 module Enumerable
   # Converts each String instance into a Wgit::Url object and returns the new
-  # array.
+  # Array.
+  #
+  # @return [Array<Wgit::Url>] The converted URL's.
   def to_urls
     map do |element|
       process_url_element(element)
@@ -21,7 +24,9 @@ module Enumerable
   end
   # Converts each String instance into a Wgit::Url object and returns the
-  # updated array.
+  # updated array. Modifies the receiver.
+  #
+  # @return [Array<Wgit::Url>] Self containing the converted URL's.
   def to_urls!
     map! do |element|
       process_url_element(element)
@@ -31,6 +36,7 @@ end
 private
+# Converts the element to a Wgit::Url if the element is a String.
 def process_url_element(element)
   if element.is_a? String
     element.to_url

data/lib/wgit/crawler.rb CHANGED

@@ -3,67 +3,106 @@ require_relative 'document'
 require_relative 'utils'
 require_relative 'assertable'
 require 'net/http' # requires 'uri'
 module Wgit
-  # @author Michael Telford
-  # Crawler class provides a means of crawling web URL's.
-  # Note that any redirects will not be followed for during crawling
-  # functionality.
+  # The Crawler class provides a means of crawling web based URL's, turning
+  # their HTML into Wgit::Document's.
+  # Note that currently all redirects will not be followed during a crawl.
   class Crawler
     include Assertable
-  	attr_reader :urls, :docs
+    # The urls to crawl.
+    attr_reader :urls
+    # The docs of the crawled @urls.
+    attr_reader :docs
-  	def initialize(*urls)
-  		self.urls = urls unless urls.nil?
+    # Initializes the Crawler by setting the @urls and @docs.
+    #
+    # @param urls [*Wgit::Url] The URLs to crawl.
+    def initialize(*urls)
+      self.[](*urls)
       @docs = []
-  	end
+    end
+    # Sets this Crawler's @urls.
+    #
+    # @param urls [Array<Wgit::Url>] The URLs to crawl.
     def urls=(urls)
-        @urls = []
-        Wgit::Utils.each(urls) { |url| add_url(url) }
+      @urls = []
+      Wgit::Utils.each(urls) { |url| add_url(url) }
     end
+    # Sets this Crawler's @urls.
+    #
+    # @param urls [*Wgit::Url] The URLs to crawl.
     def [](*urls)
-        self.urls = urls unless urls.nil?
+      # If urls is nil then add_url (when called later) will set @urls = []
+      # so we do nothing here.
+      if not urls.nil?
+        # Due to *urls you can end up with [[url1,url2,url3]] etc. where the
+        # outer array is bogus so we use the inner one only.
+        if  urls.is_a?(Enumerable) &&
+            urls.length == 1 &&
+            urls.first.is_a?(Enumerable)
+          urls = urls.first
+        end
+        # Here we call urls= method using self because the param name is also
+        # urls which conflicts.
+        self.urls = urls
+      end
     end
+    # Adds the url to this Crawler's @urls.
+    #
+    # @param url [Wgit::Url] A URL to crawl.
     def <<(url)
-        add_url(url)
+      add_url(url)
     end
     # Crawls individual urls, not entire sites.
-    # Returns the last crawled doc.
-    # Yields each doc to the provided block or adds each doc to @docs
-    # which can be accessed by Crawler#docs after the method returns.
-  	def crawl_urls(urls = @urls, &block)
+    #
+    # @param urls [Array<Wgit::Url>] The URLs to crawl.
+    # @yield [doc] If provided, the block is given each crawled
+    #   Document. Otherwise each doc is added to @docs which can be accessed
+    #   by Crawler#docs after this method returns.
+    # @return [Wgit::Document] The last Document crawled.
+    def crawl_urls(urls = @urls, &block)
       raise "No urls to crawl" unless urls
       @docs = []
       doc = nil
       Wgit::Utils.each(urls) { |url| doc = handle_crawl_block(url, &block) }
       doc ? doc : @docs.last
-  	end
-  	# Crawl the url and return the response document or nil.
-    # Also yield(doc) if a block is provided. The doc is passed to the block
-    # regardless of the crawl success so the doc.url can be used if needed.
-  	def crawl_url(url = @urls.first, &block)
-      assert_type(url, Url)
-  		markup = fetch(url)
+    end
+    # Crawl the url and return the response document or nil.
+    #
+    # @param url [Wgit::Document] The URL to crawl.
+    # @yield [doc] The crawled HTML Document regardless if the
+    #   crawl was successful or not. Therefore, the Document#url can be used.
+    # @return [Wgit::Document, nil] The crawled HTML Document or nil if the
+    #   crawl was unsuccessful.
+    def crawl_url(url = @urls.first)
+      assert_type(url, Wgit::Url)
+      markup = fetch(url)
       url.crawled = true
       doc = Wgit::Document.new(url, markup)
-      block.call(doc) if block_given?
+      yield(doc) if block_given?
       doc.empty? ? nil : doc
-  	end
+    end
     # Crawls an entire site by recursively going through its internal_links.
-    # Also yield(doc) for each crawled doc if a block is provided.
-    # A block is the only way to interact with the crawled docs.
-    # Returns a unique array of external urls collected from the site
-    # or nil if the base_url could not be crawled successfully.
+    #
+    # @param base_url [Wgit::Url] The base URL of the website to be crawled.
+    # @yield [doc] Given each crawled Document/page of the site.
+    #   A block is the only way to interact with each crawled Document.
+    # @return [Array<Wgit::Url>, nil] Unique Array of external urls collected
+    #   from all of the site's pages or nil if the base_url could not be
+    #   crawled successfully.
     def crawl_site(base_url = @urls.first, &block)
-      assert_type(base_url, Url)
+      assert_type(base_url, Wgit::Url)
       doc = crawl_url(base_url, &block)
       return nil if doc.nil?
@@ -75,7 +114,7 @@ module Wgit
       return doc.external_links.uniq if internal_urls.empty?
       loop do
-        internal_urls.uniq! unless internal_urls.uniq.nil?
+        internal_urls.uniq!
         links = internal_urls - crawled_urls
         break if links.empty?
@@ -94,36 +133,37 @@ module Wgit
   private
-    # Add the document to the @docs array for later processing
-    # or let the block process it here and now.
+    # Add the document to the @docs array for later processing or let the block
+    # process it here and now.
     def handle_crawl_block(url, &block)
-        if not block_given?
-		        @docs << crawl_url(url)
-            nil
-        else
-            crawl_url(url, &block)
-        end
+      if block_given?
+        crawl_url(url, &block)
+      else
+        @docs << crawl_url(url)
+        nil
+      end
     end
     # The fetch method performs a HTTP GET to obtain the HTML document.
-    # Invalid urls or any HTTP response that doesn't return a HTML body
-    # will be ignored and nil will be returned.  This means that redirects
-    # etc. will not be followed.
+    # Invalid urls or any HTTP response that doesn't return a HTML body will be
+    # ignored and nil will be returned.  This means that redirects etc. will
+    # not be followed.
     def fetch(url)
-        raise unless url.respond_to?(:to_uri)
-        res = Net::HTTP.get_response(url.to_uri)
-        res.body.empty? ? nil : res.body
+      raise unless url.respond_to?(:to_uri)
+      res = Net::HTTP.get_response(url.to_uri)
+      res.body.empty? ? nil : res.body
     rescue
-        nil
+      nil
     end
+    # Add the url to @urls ensuring it is cast to a Wgit::Url if necessary.
     def add_url(url)
-        @urls = [] if @urls.nil?
-        if url.instance_of?(Url)
-            @urls << url
-        else
-            @urls << Wgit::Url.new(url)
-        end
+      @urls = [] if @urls.nil?
+      if url.is_a?(Wgit::Url)
+        @urls << url
+      else
+        @urls << Wgit::Url.new(url)
+      end
     end
     alias :crawl :crawl_urls

data/lib/wgit/database/database.rb CHANGED

@@ -2,22 +2,19 @@ require_relative '../document'
 require_relative '../url'
 require_relative '../utils'
 require_relative '../assertable'
-require_relative 'mongo_connection_details'
 require_relative 'model'
 require 'mongo'
 module Wgit
-  # @author Michael Telford
   # Class modeling a DB connection and CRUD operations for the Url and
   # Document collections.
-  # The most common methods are: insert, update, urls, search, stats, size.
   class Database
     include Assertable
-    # Is relative to the root project folder, not this file.
-    LOG_FILE_PATH = "misc/mongo_log.txt"
+    # Initializes a database connection client.
+    #
+    # @raise [RuntimeError] If Wgit::CONNECTION_DETAILS aren't set.
     def initialize
       conn_details = Wgit::CONNECTION_DETAILS
       if conn_details.empty?
@@ -25,146 +22,188 @@ module Wgit
 :port, :db, :uname, :pword for a database connection to be established."
       end
-      logger = Logger.new(LOG_FILE_PATH)
+      # Only log to STDOUT in fatal scenarios.
+      Mongo::Logger.logger.level = Logger::FATAL
       address = "#{conn_details[:host]}:#{conn_details[:port]}"
       @@client = Mongo::Client.new([address],
-                                   :database => conn_details[:db],
-                                   :user => conn_details[:uname],
-                                   :password => conn_details[:pword],
-                                   :logger => logger,
-                                   :truncate_logs => false)
+                                   database:      conn_details[:db],
+                                   user:          conn_details[:uname],
+                                   password:      conn_details[:pword])
     end
     ### Create Data ###
+    # Insert one or more Url or Document objects into the DB.
+    #
+    # @param data [Hash, Enumerable<Hash>] Hash(es) returned from
+    #   Wgit::Model.url or Wgit::Model.document.
+    # @raise [RuntimeError] If the data is not valid.
     def insert(data)
-        if data.is_a?(Url)
-            insert_urls(data)
-        elsif data.is_a?(Document)
-            insert_docs(data)
-        elsif data.respond_to?(:first)
-            if data.first.is_a?(Url)
-                insert_urls(data)
-            else
-                insert_docs(data)
-            end
-        else
-            raise "data is not in the correct format (all Url's or Document's)"
-        end
-    end
-    def insert_urls(url_or_urls)
-        unless url_or_urls.respond_to?(:map)
-            assert_type(url_or_urls, Url)
-            url_or_urls = Wgit::Model.url(url_or_urls)
-        else
-            assert_arr_types(url_or_urls, Url)
-            url_or_urls = url_or_urls.map do |url|
-                Wgit::Model.url(url)
-            end
-        end
-        create(:urls, url_or_urls)
-    end
-    def insert_docs(doc_or_docs)
-        unless doc_or_docs.respond_to?(:map)
-            assert_type(doc_or_docs, [Document, Hash])
-            unless doc_or_docs.is_a?(Hash)
-                doc_or_docs = Wgit::Model.document(doc_or_docs)
-            end
+      if data.is_a?(Url)
+        insert_urls(data)
+      elsif data.is_a?(Document)
+        insert_docs(data)
+      elsif data.respond_to?(:first)
+        if data.first.is_a?(Url)
+          insert_urls(data)
         else
-            assert_arr_types(doc_or_docs, [Document, Hash])
-            doc_or_docs = doc_or_docs.map do |doc|
-                Wgit::Model.document(doc) unless doc.is_a?(Hash)
-            end
+          insert_docs(data)
         end
-        create(:documents, doc_or_docs)
+      else
+        raise "data is not in the correct format (all Url's or Document's)"
+      end
     end
     ### Retrieve Data ###
-    # A crawled parameter value of nil (the default) returns all urls.
-    # A limit of 0 means all urls are returned.
-    # All urls are sorted by date_added ascending, in other words the first
-    # url in the results is the first added.
-    def urls(crawled = nil, limit = 0, skip = 0, &block)
-      crawled.nil? ? query = {} : query = { :crawled => crawled }
+    # Returns Url records from the DB. All Urls are sorted by date_added
+    # ascending, in other words the first url returned is the first one that
+    # was inserted into the DB.
+    #
+    # @param crawled [Boolean] Filter by Url#crawled value. nil returns all.
+    # @param limit [Integer] The max number of Url's to return. 0 returns all.
+    # @param skip [Integer] Skip n amount of Url's.
+    # @yield [url] Given each Url returned from the DB.
+    # @return [Array<Wgit::Url>] The Urls obtained from the DB.
+    def urls(crawled = nil, limit = 0, skip = 0)
+      crawled.nil? ? query = {} : query = { crawled: crawled }
-      sort = { :date_added => 1 }
+      sort = { date_added: 1 }
       results = retrieve(:urls, query, sort, {}, limit, skip)
       return [] if results.count < 1
       # results.respond_to? :map! is false so we use map and overwrite the var.
       results = results.map { |url_doc| Wgit::Url.new(url_doc) }
-      return results unless block_given?
-      results.each { |url| block.call(url) }
+      results.each { |url| yield(url) } if block_given?
+      results
     end
+    # Returns Url records that have been crawled.
+    #
+    # @param limit [Integer] The max number of Url's to return. 0 returns all.
+    # @param skip [Integer] Skip n amount of Url's.
+    # @yield [url] Given each Url returned from the DB.
+    # @return [Array<Wgit::Url>] The crawled Urls obtained from the DB.
     def crawled_urls(limit = 0, skip = 0, &block)
       urls(true, limit, skip, &block)
     end
+    # Returned Url records that haven't been crawled. Each Url is yielded to a
+    # block, if given.
+    #
+    # @param limit [Integer] The max number of Url's to return. 0 returns all.
+    # @param skip [Integer] Skip n amount of Url's.
+    # @yield [url] Given each Url returned from the DB.
+    # @return [Array<Wgit::Url>] The uncrawled Urls obtained from the DB.
     def uncrawled_urls(limit = 0, skip = 0, &block)
       urls(false, limit, skip, &block)
     end
+    # Searches against the indexed docs in the DB for the given query.
+    #
     # Currently all searches are case insensitive.
     #
-    # Searches against the indexed docs in the DB for the given text.
-    # The searched fields are decided by the text index setup against the
+    # The searched fields are decided by the text index setup against the
     # documents collection. Currently we search against the following fields:
     # "author", "keywords", "title" and "text".
     #
-    # The MongoDB search ranks/sorts the results in order (highest first) based
-    # upon each documents textScore which records the number of text hits. We
-    # then store this textScore in each Document object for use elsewhere if
-    # needed.
+    # The MongoDB search ranks/sorts the results in order (highest first) based
+    # upon each documents textScore which records the number of query hits. We
+    # then store this textScore in each Document result object for use
+    # elsewhere if needed.
     #
-    # @param text [String] the value to search the data against.
-    # @param whole_sentence [Boolean] whether multiple words should be
-    # searched for separately.
-    # @param limit [Fixnum] the max length/count of the results array.
-    # @param skip [Fixnum] the number of results to skip, starting with the
-    # most relevant based upon the textScore of the search.
-    # @param block [Block] a block which if provided is passed to each result.
-    #
-    # @return [Array] of Document objects representing the search results.
-    def search(text, whole_sentence = false, limit = 10, skip = 0, &block)
-      text.strip!
-      text.replace("\"" + text + "\"") if whole_sentence
+    # @param query [String] The text query to search with.
+    # @param whole_sentence [Boolean] Whether multiple words should be searched
+    #   for separately.
+    # @param limit [Integer] The max number of results to return.
+    # @param skip [Integer] The number of DB records to skip.
+    # @yield [doc] Given each search result (Wgit::Document).
+    # @return [Array<Wgit::Document>] The search results obtained from the DB.
+    def search(query, whole_sentence = false, limit = 10, skip = 0)
+      query.strip!
+      query.replace("\"" + query + "\"") if whole_sentence
-      # The textScore sorts based on the most search hits.
-      # We use the textScore hash as a sort and a projection below.
-      # :$caseSensitive => case_sensitive, # 3.2+ only.
-      sort_proj = { :score => { :$meta => "textScore" } }
-      query = { :$text => { :$search => text } }
+      # The sort_proj sorts based on the most search hits.
+      # We use the sort_proj hash as both a sort and a projection below.
+      # :$caseSensitive => case_sensitive, 3.2+ only.
+      sort_proj = { score: { :$meta => "textScore" } }
+      query = { :$text => { :$search => query } }
       results = retrieve(:documents, query, sort_proj, sort_proj, limit, skip)
-      return [] if results.count < 1
+      return [] if results.count < 1 # respond_to? :empty? == false
       # results.respond_to? :map! is false so we use map and overwrite the var.
       results = results.map { |mongo_doc| Wgit::Document.new(mongo_doc) }
-      return results unless block_given?
-      results.each { |doc| block.call(doc) }
+      results.each { |doc| yield(doc) } if block_given?
+      results
     end
-    # Performs a search and pretty prints the results.
-    def search_p(text, whole_sentence = false, limit = 10,
-                 skip = 0, sentence_length = 80, &block)
-      results = search(text, whole_sentence, limit, skip, &block)
-      Wgit::Utils.printf_search_results(results, text, false, sentence_length)
-    end
-    # Returns a Mongo object which can be used like a Hash to retrieve values.
+    # Returns statistics about the database.
+    #
+    # @return [BSON::Document#[]#fetch] Similar to a Hash instance.
     def stats
-        @@client.command(:dbStats => 0).documents[0]
+      @@client.command(dbStats: 0).documents[0]
     end
+    # Returns the current size of the database.
+    #
+    # @return [Integer] The current size of the DB.
     def size
-        stats[:dataSize]
+      stats[:dataSize]
     end
+    # Returns the total number of URL records in the DB.
+    #
+    # @return [Integer] The current number of URL records.
+    def num_urls
+      @@client[:urls].count
+    end
+    # Returns the total number of Document records in the DB.
+    #
+    # @return [Integer] The current number of Document records.
+    def num_docs
+      @@client[:documents].count
+    end
+    # Returns the total number of records (urls + docs) in the DB.
+    #
+    # @return [Integer] The current number of URL and Document records.
+    def num_records
+      num_urls + num_docs
+    end
+    # Returns whether or not a record with the given url (which is unique)
+    # exists in the database's 'urls' collection.
+    #
+    # @param url [Wgit::Url] The Url to search the DB for.
+    # @return [Boolean] True if url exists, otherwise false.
+    def url?(url)
+      h = { "url" => url }
+      not @@client[:urls].find(h).none?
+    end
+    # Returns whether or not a record with the given doc.url (which is unique)
+    # exists in the database's 'documents' collection.
+    #
+    # @param doc [Wgit::Document] The Document to search the DB for.
+    # @return [Boolean] True if doc exists, otherwise false.
+    def doc?(doc)
+      url = doc.respond_to?(:url) ? doc.url : doc
+      h = { "url" => url }
+      not @@client[:documents].find(h).none?
+    end
     ### Update Data ###
+    # Update a Url or Document object in the DB.
+    #
+    # @param data [Hash, Enumerable<Hash>] Hash(es) returned from
+    #   Wgit::Model.url or Wgit::Model.document.
+    # @raise [RuntimeError] If the data is not valid.
     def update(data)
       if data.is_a?(Url)
         update_url(data)
@@ -174,96 +213,134 @@ module Wgit
         raise "data is not in the correct format (all Url's or Document's)"
       end
     end
-    def update_url(url)
-      assert_type(url, Url)
-      selection = { :url => url }
-      url_hash = Wgit::Model.url(url).merge(Wgit::Model.common_update_data)
-      update = { "$set" => url_hash }
-      _update(true, :urls, selection, update)
-    end
-  def update_doc(doc)
-    assert_type(doc, Document)
-    selection = { :url => doc.url }
-    doc_hash = Wgit::Model.document(doc).merge(Wgit::Model.common_update_data)
-    update = { "$set" => doc_hash }
-    _update(true, :documents, selection, update)
-  end
-private
+  private
+    # Return if the write to the DB succeeded or not.
     def write_succeeded?(result, count = 1, multi = false)
-        case result.class.to_s
-        # Single create result.
-        when "Mongo::Operation::Write::Insert::Result"
-            result.documents.first[:err].nil?
-        # Multiple create result.
-        when "Mongo::BulkWrite::Result"
-            result.inserted_count == count
-        # Single and multiple update result.
-        when "Mongo::Operation::Write::Update::Result", # MongoDB 3.0
-             "Mongo::Operation::Write::Update::LegacyResult" # MongoDB 2.4
-            if multi
-                result.n == count
-            else
-                result.documents.first[:err].nil?
-            end
+      case result.class.to_s
+      # Single create result.
+      when "Mongo::Operation::Insert::Result"
+        result.documents.first[:err].nil?
+      # Multiple create result.
+      when "Mongo::BulkWrite::Result"
+        result.inserted_count == count
+      # Single and multiple update result.
+      when "Mongo::Operation::Update::Result"
+        if multi
+          result.n == count
         else
-            raise "Result class not currently supported: #{result.class.to_s}"
+          result.documents.first[:err].nil?
         end
+      # Class no longer used, have you upgraded the 'mongo' gem?
+      else
+        raise "Result class not currently supported: #{result.class.to_s}"
+      end
+    end
+    # Insert one or more Url objects into the DB.
+    def insert_urls(url_or_urls)
+      unless url_or_urls.respond_to?(:map)
+        assert_type(url_or_urls, Url)
+        url_or_urls = Wgit::Model.url(url_or_urls)
+      else
+        assert_arr_types(url_or_urls, Url)
+        url_or_urls = url_or_urls.map do |url|
+          Wgit::Model.url(url)
+        end
+      end
+      create(:urls, url_or_urls)
     end
-    def create(collection, data)
-        assert_type(data, [Hash, Array])
-        # Single doc.
-        if data.is_a?(Hash)
-            data.merge!(Wgit::Model.common_insert_data)
-            result = @@client[collection.to_sym].insert_one(data)
-            unless write_succeeded?(result)
-              raise "DB write (insert) failed"
-            end
-            result.n
-        # Multiple docs.
-        elsif data.is_a?(Array)
-            assert_arr_types(data, Hash)
-            data.map! do |data_hash|
-                data_hash.merge(Wgit::Model.common_insert_data)
-            end
-            result = @@client[collection.to_sym].insert_many(data)
-            unless write_succeeded?(result, data.length)
-                raise "DB write(s) failed"
-            end
-            result.inserted_count
-        else
-            raise "data must be a Hash or an Array of Hash's"
+    # Insert one or more Document objects into the DB.
+    def insert_docs(doc_or_docs)
+      unless doc_or_docs.respond_to?(:map)
+        assert_type(doc_or_docs, [Document, Hash])
+        unless doc_or_docs.is_a?(Hash)
+          doc_or_docs = Wgit::Model.document(doc_or_docs)
         end
+      else
+        assert_arr_types(doc_or_docs, [Document, Hash])
+        doc_or_docs = doc_or_docs.map do |doc|
+          Wgit::Model.document(doc) unless doc.is_a?(Hash)
+        end
+      end
+      create(:documents, doc_or_docs)
     end
-    def retrieve(collection, query, sort = {}, projection = {},
+    # Create/insert one or more Url or Document records into the DB.
+    def create(collection, data)
+      assert_type(data, [Hash, Array])
+      # Single doc.
+      if data.is_a?(Hash)
+        data.merge!(Wgit::Model.common_insert_data)
+        result = @@client[collection.to_sym].insert_one(data)
+        unless write_succeeded?(result)
+          raise "DB write (insert) failed"
+        end
+        result.n
+      # Multiple docs.
+      elsif data.is_a?(Array)
+        assert_arr_types(data, Hash)
+        data.map! do |data_hash|
+          data_hash.merge(Wgit::Model.common_insert_data)
+        end
+        result = @@client[collection.to_sym].insert_many(data)
+        unless write_succeeded?(result, data.length)
+          raise "DB write(s) failed"
+        end
+        result.inserted_count
+      else
+        raise "data must be a Hash or an Array of Hash's"
+      end
+    end
+    # Retrieve Url or Document records from the DB.
+    def retrieve(collection, query,
+                 sort = {}, projection = {},
                  limit = 0, skip = 0)
-        assert_type(query, Hash)
-        @@client[collection.to_sym].find(query).projection(projection)
-                                  .skip(skip).limit(limit).sort(sort)
+      assert_type(query, Hash)
+      @@client[collection.to_sym].find(query).projection(projection)
+                                   .skip(skip).limit(limit).sort(sort)
+    end
+    # Update a Url object in the DB.
+    def update_url(url)
+      assert_type(url, Url)
+      selection = { url: url }
+      url_hash = Wgit::Model.url(url).merge(Wgit::Model.common_update_data)
+      update = { "$set" => url_hash }
+      _update(true, :urls, selection, update)
+    end
+    # Update a Document object in the DB.
+    def update_doc(doc)
+      assert_type(doc, Document)
+      selection = { url: doc.url }
+      doc_hash = Wgit::Model.document(doc).merge(Wgit::Model.common_update_data)
+      update = { "$set" => doc_hash }
+      _update(true, :documents, selection, update)
     end
+    # Update one or more Url or Document records in the DB.
     # NOTE: The Model.common_update_data should be merged in the calling
     # method as the update param can be bespoke due to its nature.
     def _update(single, collection, selection, update)
-        assert_arr_types([selection, update], Hash)
-        if single
-          result = @@client[collection.to_sym].update_one(selection, update)
-        else
-          result = @@client[collection.to_sym].update_many(selection, update)
-        end
-        raise "DB write (update) failed" unless write_succeeded?(result)
-        result.n
+      assert_arr_types([selection, update], Hash)
+      if single
+        result = @@client[collection.to_sym].update_one(selection, update)
+      else
+        result = @@client[collection.to_sym].update_many(selection, update)
+      end
+      raise "DB write (update) failed" unless write_succeeded?(result)
+      result.n
     end
     alias :count :size
     alias :length :size
+    alias :num_documents :num_docs
+    alias :document? :doc?
     alias :insert_url :insert_urls
     alias :insert_doc :insert_docs
-    alias :search_and_format :search_p
   end
 end