RubyGems - gscraper - Versions diffs - 0.1.7 → 0.2.0 - Mend

gscraper 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

data/COPYING.txt +339 -0
data/History.txt +21 -0
data/Manifest.txt +23 -10
data/README.txt +17 -21
data/Rakefile +3 -6
data/lib/gscraper.rb +22 -0
data/lib/gscraper/extensions.rb +22 -0
data/lib/gscraper/extensions/uri.rb +22 -0
data/lib/gscraper/extensions/uri/http.rb +25 -71
data/lib/gscraper/extensions/uri/query_params.rb +96 -0
data/lib/gscraper/gscraper.rb +30 -0
data/lib/gscraper/has_pages.rb +114 -0
data/lib/gscraper/licenses.rb +22 -0
data/lib/gscraper/page.rb +64 -0
data/lib/gscraper/search.rb +24 -0
data/lib/gscraper/search/ajax_query.rb +176 -0
data/lib/gscraper/search/page.rb +27 -72
data/lib/gscraper/search/query.rb +46 -457
data/lib/gscraper/search/result.rb +32 -29
data/lib/gscraper/search/search.rb +44 -3
data/lib/gscraper/search/web_query.rb +472 -0
data/lib/gscraper/sponsored_ad.rb +26 -2
data/lib/gscraper/sponsored_links.rb +77 -8
data/lib/gscraper/version.rb +23 -1
data/spec/extensions/uri/http_spec.rb +9 -0
data/spec/extensions/uri/query_params_spec.rb +38 -0
data/spec/gscraper_spec.rb +29 -0
data/spec/has_pages_examples.rb +19 -0
data/spec/has_sponsored_links_examples.rb +57 -0
data/spec/helpers/query.rb +1 -0
data/spec/helpers/uri.rb +8 -0
data/spec/page_has_results_examples.rb +13 -0
data/spec/search/ajax_query_spec.rb +124 -0
data/spec/search/page_has_results_examples.rb +51 -0
data/spec/search/query_spec.rb +103 -0
data/spec/search/web_query_spec.rb +74 -0
data/spec/spec_helper.rb +6 -0
data/tasks/spec.rb +7 -0
metadata +34 -20
data/LICENSE.txt +0 -23
data/lib/gscraper/web_agent.rb +0 -38
data/test/search/page_results.rb +0 -103
data/test/search/query_from_url.rb +0 -50
data/test/search/query_pages.rb +0 -32
data/test/search/query_result.rb +0 -30
data/test/test_gscraper.rb +0 -4

data/lib/gscraper/licenses.rb CHANGED Viewed

@@ -1,3 +1,25 @@
+#
+#--
+# GScraper - A web-scraping interface to various Google Services.
+#
+# Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+#
 module GScraper
   module Licenses
     # Any desired license

data/lib/gscraper/page.rb ADDED Viewed

@@ -0,0 +1,64 @@
+#
+#--
+# GScraper - A web-scraping interface to various Google Services.
+#
+# Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+#
+module GScraper
+  class Page < Array
+    #
+    # Creates a new Page object with the given _elements_. If a _block_
+    # is given, it will be passed the newly created Page object.
+    #
+    def initialize(elements=[],&block)
+      super(elements)
+      block.call(self) if block
+    end
+    #
+    # Returns a mapped Array of the elements within the Page using the
+    # given _block_. If the _block_ is not given, the page will be
+    # returned.
+    #
+    #   page.map # => Page
+    #
+    #   page.map { |element| element.field } # => [...]
+    #
+    def map(&block)
+      return self unless block
+      mapped = []
+      each { |element| mapped << block.call(element) }
+      return mapped
+    end
+    #
+    # Selects the elements within the Page which match the given _block_.
+    #
+    #   page.select { |element| element.field =~ /ruby/i }
+    #
+    def select(&block)
+      self.class.new(super(&block))
+    end
+  end
+end

data/lib/gscraper/search.rb CHANGED Viewed

@@ -1 +1,25 @@
+#
+#--
+# GScraper - A web-scraping interface to various Google Services.
+#
+# Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+#
+require 'gscraper/search/web_query'
+require 'gscraper/search/ajax_query'
 require 'gscraper/search/search'

data/lib/gscraper/search/ajax_query.rb ADDED Viewed

@@ -0,0 +1,176 @@
+#
+#--
+# GScraper - A web-scraping interface to various Google Services.
+#
+# Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+#
+require 'gscraper/search/result'
+require 'gscraper/search/page'
+require 'gscraper/search/query'
+require 'gscraper/extensions/uri'
+require 'gscraper/has_pages'
+require 'gscraper/gscraper'
+require 'json'
+module GScraper
+  module Search
+    class AJAXQuery < Query
+      include HasPages
+      # Maximum results per-page
+      RESULTS_PER_PAGE = 8
+      # AJAX API host
+      API_HOST = 'www.google.com'
+      # AJAX API URL
+      API_URL = "http://#{API_HOST}/uds/GwebSearch?callback=google.search.WebSearch.RawCompletion&context=0&lstkp=0&rsz=large"
+      # Default language
+      DEFAULT_LANGUAGE = 'en'
+      # Default signature
+      DEFAULT_SIG = '582c1116317355adf613a6a843f19ece'
+      # Default key
+      DEFAULT_KEY = 'notsupplied'
+      # Default version
+      DEFAULT_VERSION = '1.0'
+      # The search language
+      attr_accessor :language
+      # The search signature
+      attr_accessor :sig
+      # The search key
+      attr_accessor :key
+      # The API version
+      attr_accessor :version
+      #
+      # Creates a new AJAXQuery with the given _options_. If a _block_ is
+      # given it will be passed the newly created AJAXQuery object.
+      #
+      # _options_ may contain the following keys:
+      # <tt>:language</tt>:: The search language. Defaults to <tt>:en</tt>.
+      # <tt>:sig</tt>:: The search signature. Defaults to
+      #                 +582c1116317355adf613a6a843f19ece+.
+      # <tt>:key</tt>:: The search key. Defaults to <tt>:notsupplied</tt>.
+      # <tt>:version</tt>:: The desired API version. Defaults to
+      #                     <tt>1.0</tt>.
+      #
+      def initialize(options={},&block)
+        @agent = GScraper.web_agent(options)
+        @language = (options[:language] || DEFAULT_LANGUAGE)
+        @sig = (options[:sig] || DEFAULT_SIG)
+        @key = (options[:key] || DEFAULT_KEY)
+        @version = (options[:version] || DEFAULT_VERSION)
+        super(options,&block)
+      end
+      #
+      # Creates a new AJAXQuery object from the specified URL. If a block is
+      # given, it will be passed the newly created AJAXQuery object.
+      #
+      def self.from_url(url,options={},&block)
+        url = URI(url.to_s)
+        options[:language] = url.query_params['hl']
+        options[:query] = url.query_params['q']
+        options[:sig] = url.query_params['sig']
+        options[:key] = url.query_params['key']
+        options[:version] = url.query_params['v']
+        return self.new(options,&block)
+      end
+      #
+      # Returns +RESULTS_PER_PAGE+.
+      #
+      def results_per_page
+        RESULTS_PER_PAGE
+      end
+      #
+      # Returns the URL that represents the query.
+      #
+      def search_url
+        search_url = URI(API_URL)
+        search_url.query_params['hl'] = @language
+        search_url.query_params['gss'] = '.com'
+        search_url.query_params['q'] = expression
+        search_url.query_params['sig'] = @sig
+        search_url.query_params['key'] = @key
+        search_url.query_params['v'] = @version
+        return search_url
+      end
+      #
+      # Returns the URL that represents the query of a specific
+      # _page_index_.
+      #
+      def page_url(page_index)
+        url = search_url
+        if page_index > 1
+          url.query_params['start'] = result_offset_of(page_index)
+        end
+        return url
+      end
+      #
+      # Returns a Page object containing Result objects at the specified
+      # _page_index_.
+      #
+      def page(page_index)
+        Page.new do |new_page|
+          body = @agent.get(page_url(page_index)).body
+          hash = JSON.parse(body.scan(/\{.*\}/).first)
+          rank_offset = result_offset_of(page_index)
+          if (hash.kind_of?(Hash) && hash['results'])
+            hash['results'].each_with_index do |result,index|
+              rank = rank_offset + (index + 1)
+              title = Hpricot(result['title']).inner_text
+              url = result['unescapedUrl']
+              summary = Hpricot(result['content']).inner_text
+              cached_url = result['cacheUrl']
+              new_page << Result.new(rank,title,url,summary,cached_url)
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/gscraper/search/page.rb CHANGED Viewed

@@ -1,42 +1,31 @@
+#
+#--
+# GScraper - A web-scraping interface to various Google Services.
+#
+# Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+#
 require 'gscraper/search/result'
+require 'gscraper/page'
 module GScraper
   module Search
-    class Page < Array
-      #
-      # Creates a new Page object with the given _results_.
-      #
-      def initialize(results=[])
-        super(results)
-      end
-      #
-      # Returns a mapped Array of the results within the Page using the
-      # given _block_. If the _block_ is not given, the page will be
-      # returned.
-      #
-      #   page.map # => Page
-      #
-      #   page.map { |result| result.url } # => [...]
-      #
-      def map(&block)
-        return self unless block
-        mapped = []
-        each { |result| mapped << block.call(result) }
-        return mapped
-      end
-      #
-      # Selects the results within the Page which match the given _block_.
-      #
-      #   page.select { |result| result.title =~ /ruby/i }
-      #
-      def select(&block)
-        Page.new(super(&block))
-      end
+    class Page < GScraper::Page
       #
       # Selects the results using the specified _block_.
@@ -160,7 +149,7 @@ module GScraper
       #   page.cached_urls # => [...]
       #
       def cached_urls
-        map { |result| result.cached_url }
+        map { |result| result.cached_url }.compact
       end
       #
@@ -170,7 +159,7 @@ module GScraper
       #   page.cached_pages # => [...]
       #
       def cached_pages
-        map { |result| result.cached_page }
+        map { |result| result.cached_page }.compact
       end
       #
@@ -180,17 +169,7 @@ module GScraper
       #   page.similar_urls # => [...]
       #
       def similar_urls
-        map { |result| result.similar_url }
-      end
-      #
-      # Returns an Array containing the similar Queries of the results
-      # within the Page.
-      #
-      #   page.similar_queries # => [...]
-      #
-      def similar_queries
-        map { |result| result.similar_query }
+        map { |result| result.similar_url }.compact
       end
       #
@@ -263,20 +242,6 @@ module GScraper
         similar_urls.each(&block)
       end
-      #
-      # Iterates over each result's similar Query within the Page, passing
-      # each to the given _block_.
-      #
-      #   each_similar_query do |q|
-      #     q.first_page do |page|
-      #       puts page.urls.join("\n")
-      #     end
-      #   end
-      #
-      def each_similar_query(&block)
-        similar_queries.each(&block)
-      end
       #
       # Returns the ranks of the results that match the specified _block_.
       #
@@ -345,16 +310,6 @@ module GScraper
         results_with(&block).similar_urls
       end
-      #
-      # Returns the similar Queries of the results that match the
-      # specified _block_.
-      #
-      #   page.similar_queries_of { |result| result.title =~ /hackety/ }
-      #
-      def similar_queries_of(&block)
-        results_with(&block).similar_queries
-      end
     end
   end
 end

data/lib/gscraper/search/query.rb CHANGED Viewed

@@ -1,10 +1,33 @@
+#
+#--
+# GScraper - A web-scraping interface to various Google Services.
+#
+# Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+#
 require 'gscraper/search/result'
 require 'gscraper/search/page'
 require 'gscraper/sponsored_ad'
 require 'gscraper/sponsored_links'
 require 'gscraper/extensions/uri'
+require 'gscraper/has_pages'
 require 'gscraper/licenses'
-require 'gscraper/web_agent'
+require 'gscraper/gscraper'
 require 'hpricot'
@@ -12,20 +35,6 @@ module GScraper
   module Search
     class Query
-      include WebAgent
-      # Search host
-      SEARCH_HOST = 'www.google.com'
-      # Search URL
-      SEARCH_URL = "http://#{SEARCH_HOST}/search"
-      # Default results per-page
-      RESULTS_PER_PAGE = 10
-      # Results per-page
-      attr_accessor :results_per_page
       # Search query
       attr_accessor :query
@@ -71,67 +80,14 @@ module GScraper
       # Search for results with-out the words
       attr_accessor :without_words
-      # Search for results written in the language
-      attr_accessor :language
-      # Search for results from the region
-      attr_accessor :region
-      # Search for results in the format
-      attr_accessor :in_format
-      # Search for results not in the format
-      attr_accessor :not_in_format
-      # Search for results within the past day
-      attr_accessor :within_past_day
-      # Search for results within the past week
-      attr_accessor :within_past_week
-      # Search for results within the past months
-      attr_accessor :within_past_months
-      # Search for results within the past year
-      attr_accessor :within_past_year
       # Search for results containing numbers between the range
       attr_accessor :numeric_range
-      # Search for results where the query ocurrs within the area
-      attr_accessor :occurrs_within
-      # Search for results inside the domain
-      attr_accessor :inside_domain
-      # Search for results outside the domain
-      attr_accessor :outside_domain
-      # Search for results which have the rights
-      attr_accessor :rights
-      # Filter the search results
-      attr_accessor :filtered
-      # Search for results similar to the page
-      attr_accessor :similar_to
-      # Search for results linking to the page
-      attr_accessor :links_to
       #
       # Creates a new Query object from the given search options. If a
-      # block is given, it will be passed the newly created query object.
-      #
-      #   Query.new(:query => 'ruby', :with_words => 'sow rspec')
-      #
-      #   Query.new(:exact_phrase => 'fluent interfaces') do |q|
-      #     q.within_past_week = true
-      #   end
+      # block is given, it will be passed the newly created Query object.
       #
       def initialize(options={},&block)
-        @results_per_page = (options[:results_per_page] || RESULTS_PER_PAGE)
         @query = options[:query]
         @link = options[:link]
@@ -151,175 +107,34 @@ module GScraper
         @with_words = options[:with_words]
         @without_words = options[:without_words]
-        @language = options[:language]
-        @region = options[:region]
-        @in_format = options[:in_format]
-        @not_in_format = options[:not_in_format]
-        if options[:within_past_day]
-          @within_past_day = options[:within_past_day]
-          @within_past_week = false
-          @within_past_months = false
-          @within_past_year = false
-        elsif options[:within_past_week]
-          @within_past_day = false
-          @within_past_week = options[:within_past_week]
-          @within_past_months = false
-          @within_past_year = false
-        elsif options[:within_past_months]
-          @within_past_day = false
-          @within_past_week = false
-          @within_past_months = options[:within_past_months]
-          @within_past_year = false
-        elsif options[:within_past_year]
-          @within_past_day = false
-          @within_past_week = false
-          @within_past_months = false
-          @within_past_year = options[:within_past_year]
-        else
-          @within_past_day = false
-          @within_past_week = false
-          @within_past_months = false
-          @within_past_year = false
-        end
         @numeric_range = options[:numeric_range]
-        @occurrs_within = options[:occurrs_within]
-        @inside_domain = options[:inside_domain]
-        @outside_domain = options[:outside_domain]
-        @rights = options[:rights]
-        @filtered = options[:filtered]
-        @similar_to = options[:similar_to]
-        @links_to = options[:links_to]
         block.call(self) if block
       end
       #
-      # Creates a new Query object from the specified URL. If a block is
-      # given, it will be passed the newly created Query object.
-      #
-      #   Query.from_url('http://www.google.com/search?q=ruby+zen)
+      # Returns the query expression.
       #
-      #   Query.from_url('http://www.google.com/search?q=ruby') do |q|
-      #     q.within_last_month = true
-      #     q.occurrs_within = :title
-      #   end
-      #
-      def self.from_url(url,options={},&block)
-        url = URI.parse(url)
-        options[:results_per_page] = url.query_params['num']
-        options[:query] = url.query_params['as_q']
-        options[:exact_phrase] = url.query_params['as_epq']
-        options[:with_words] = url.query_params['as_oq']
-        options[:without_words] = url.query_params['as_eq']
-        options[:language] = url.query_params['lr']
-        options[:region] = url.query_params['cr']
-        case url.query_params['as_ft']
-        when 'i'
-          options[:in_format] = url.query_params['as_filetype']
-        when 'e'
-          options[:not_in_format] = url.query_params['as_filetype']
-        end
-        case url.query_params['as_qdr']
-        when 'd'
-          options[:within_past_day] = true
-        when 'w'
-          options[:within_past_week] = true
-        when 'm'
-          options[:within_past_months] = 1
-        when 'm2'
-          options[:within_past_months] = 2
-        when 'm3'
-          options[:within_past_months] = 3
-        when 'm6'
-          options[:within_past_months] = 6
-        when 'y'
-          options[:within_past_year] = true
-        end
-        if (url.query_params['as_nlo'] || url.query_params['as_nhi'])
-          options[:numeric_range] = Range.new(url.query_params['as_nlo'].to_i,url.query_params['as_nhi'].to_i)
-        end
-        case url.query_params['as_occt']
-        when 'title'
-          options[:occurrs_within] = :title
-        when 'body'
-          options[:occurrs_within] = :body
-        when 'url'
-          options[:occurrs_within] = :url
-        when 'links'
-          options[:occurrs_within] = :links
-        end
-        case url.query_params['as_dt']
-        when 'i'
-          options[:inside_domain] = url.query_params['as_sitesearch']
-        when 'e'
-          options[:outside_domain] = url.query_params['as_sitesearch']
-        end
-        case url.query_params['as_rights']
-        when '(cc_publicdomain|cc_attribute|cc_sharealike|cc_noncommercial|cc_nonderived)'
-          options[:rights] = Licenses::CC_BY_NC_ND
-        when '(cc_publicdomain|cc_attribute|cc_sharealike|cc_nonderived).-(cc_noncommercial)'
-          options[:rights] = Licenses::CC_BY_SA
-        when '(cc_publicdomain|cc_attribute|cc_sharealike|cc_noncommercial).-(cc_nonderived)'
-          options[:rights] = Licenses::CC_BY_NC
-        when '(cc_publicdomain|cc_attribute|cc_sharealike).-(cc_noncommercial|cc_nonderived)'
-          options[:rights] = Licenses::CC_BY
-        end
-        if url.query_params[:safe]=='active'
-          options[:filtered] = true
-        end
-        if url.query_params['as_rq']
-          options[:similar_to] = url.query_params['as_rq']
-        elsif url.query_params['as_lq']
-          options[:links_to] = url.query_params['as_lq']
-        end
-        return self.new(options,&block)
-      end
-      #
-      # Returns the URL that represents the query.
-      #
-      def search_url
-        url = URI(SEARCH_URL)
-        query_expr = []
-        set_param = lambda { |param,value|
-          url.query_params[param.to_s] = value if value
-        }
+      def expression
+        expr = []
         append_modifier = lambda { |name|
           modifier = instance_variable_get("@#{name}")
-          query_expr << "#{name}:#{modifier}" if modifier
+          expr << "#{name}:#{modifier}" if modifier
         }
-        join_ops = lambda { |name|
+        append_options = lambda { |name|
           ops = instance_variable_get("@#{name}")
           if ops.kind_of?(Array)
-            query_expr << "#{name}:#{ops.join(' ')}"
+            expr << "#{name}:#{ops.join(' ')}"
           elsif ops
-            query_expr << "#{name}:#{ops}"
+            expr << "#{name}:#{ops}"
           end
         }
-        set_param.call('num',@results_per_page)
-        query_expr << @query if @query
+        expr << @query if @query
         append_modifier.call(:link)
         append_modifier.call(:related)
@@ -327,256 +142,30 @@ module GScraper
         append_modifier.call(:site)
         append_modifier.call(:filetype)
-        join_ops.call(:allintitle)
+        append_options.call(:allintitle)
         append_modifier.call(:intitle)
-        join_ops.call(:allinurl)
+        append_options.call(:allinurl)
         append_modifier.call(:inurl)
-        join_ops.call(:allintext)
+        append_options.call(:allintext)
         append_modifier.call(:intext)
-        unless query_expr.empty?
-          url.query_params['as_q'] = query_expr.join(' ')
-        end
-        set_param.call('as_epq',@exact_phrase)
-        set_param.call('as_oq',@with_words)
-        set_param.call('as_eq',@without_words)
-        set_param.call('lr',@language)
-        set_param.call('cr',@region)
-        if @in_format
-          url.query_params['as_ft'] = 'i'
-          url.query_params['as_filtetype'] = @in_format
-        elsif @not_in_format
-          url.query_params['as_ft'] = 'e'
-          url.query_params['as_filtetype'] = @not_in_format
-        end
-        if @within_past_day
-          url.query_params['as_qdr'] = 'd'
-        elsif @within_past_week
-          url.query_params['as_qdr'] = 'w'
-        elsif @within_past_months
-          case @within_past_months
-          when 1
-            url.query_params['as_qdr'] = 'm'
-          when 2
-            url.query_params['as_qdr'] = 'm2'
-          when 3
-            url.query_params['as_qdr'] = 'm3'
-          when 6
-            url.query_params['as_qdr'] = 'm6'
-          end
-        elsif @within_past_year
-          url.query_params['as_qdr'] = 'y'
-        end
-        if @numeric_range
-          url.query_params['as_nlo'] = @numeric_range.begin
-          url.query_params['as_nhi'] = @numeric_range.end
-        end
-        case @occurrs_within
-        when :title, 'title'
-          url.query_params['as_occt'] = 'title'
-        when :body, 'body'
-          url.query_params['as_occt'] = 'body'
-        when :url, 'url'
-          url.query_params['as_occt'] = 'url'
-        when :links, 'links'
-          url.query_params['as_occt'] = 'links'
+        if @exact_phrase
+          expr << "\"#{@exact_phrase}\""
         end
-        if @inside_domain
-          url.query_params['as_dt'] = 'i'
-          url.query_params['as_sitesearch'] = @inside_domain
-        elsif @outside_domain
-          url.query_params['as_dt'] = 'e'
-          url.query_params['as_sitesearch'] = @outside_domain
+        if @with_words.kind_of?(Array)
+          expr << @with_words.join(' OR ')
         end
-        case @rights
-        when Licenses::CC_BY_NC_ND
-          url.query_params['as_rights'] = '(cc_publicdomain|cc_attribute|cc_sharealike|cc_noncommercial|cc_nonderived)'
-        when Licenses::CC_BY_SA
-          url.query_params['as_rights'] = '(cc_publicdomain|cc_attribute|cc_sharealike|cc_nonderived).-(cc_noncommercial)'
-        when Licenses::CC_BY_ND
-          url.query_params['as_rights'] = '(cc_publicdomain|cc_attribute|cc_sharealike|cc_noncommercial).-(cc_nonderived)'
-        when Licenses::CC_BY
-          url.query_params['as_rights'] = '(cc_publicdomain|cc_attribute|cc_sharealike).-(cc_noncommercial|cc_nonderived)'
+        if @without_words.kind_of?(Array)
+          expr << @without_words.map { |word| "-#{word}" }.join(' ')
         end
-        url.query_params['safe'] = true if @filtered
-        if @similar_to
-          url.query_params['as_rq'] = @similar_to
-        elsif @links_to
-          url.query_params['as_lq'] = @links_to
+        if @numeric_range.kind_of?(Range)
+          expr << "#{@numeric_range.begin}..#{@numeric_range.end}"
         end
-        return url
-      end
-      #
-      # Returns the URL that represents the query at the specific
-      # _page_index_.
-      #
-      def page_url(page_index)
-        url = search_url
-        url.query_params['start'] = page_result_offset(page_index)
-        url.query_params['sa'] = 'N'
-        return url
-      end
-      #
-      # Returns a Page object containing Result objects at the specified
-      # _page_index_. If a _block_ is given, it will be passed the newly
-      # created Page.
-      #
-      def page(page_index,&block)
-        doc = get_page(page_url(page_index))
-        new_page = Page.new
-        results = doc.search('//div.g')[0...@results_per_page.to_i]
-        results.each_with_index do |result,index|
-          rank = page_result_offset(page_index) + (index + 1)
-          link = result.at('//a.l')
-          title = link.inner_text
-          url = link.get_attribute('href')
-          summary_text = ''
-          cached_url = nil
-          similar_url = nil
-          if (content = (result.at('//td.j//font|//td.j/div.sml')))
-            content.children.each do |elem|
-              break if (!(elem.text?) && elem.name=='br')
-              summary_text << elem.inner_text
-            end
-            if (cached_link = result.at('nobr/a:first'))
-              cached_url = cached_link.get_attribute('href')
-            end
-            if (similar_link = result.at('nobr/a:last'))
-              similar_url = "http://#{SEARCH_HOST}" + similar_link.get_attribute('href')
-            end
-          end
-          new_page << Result.new(rank,title,url,summary_text,cached_url,similar_url)
-        end
-        block.call(new_page) if block
-        return new_page
-      end
-      #
-      # Returns the Results on the first page. If a _block_ is given it
-      # will be passed the newly created Page.
-      #
-      def first_page(&block)
-        page(1,&block)
-      end
-      #
-      # Returns the Result at the specified _index_.
-      #
-      def result_at(index)
-        page(result_page_index(index))[page_result_index(index)]
-      end
-      #
-      # Returns the first Result on the first_page.
-      #
-      def top_result
-        result_at(1)
-      end
-      #
-      # Iterates over the results at the specified _page_index_, passing
-      # each to the given _block_.
-      #
-      #   query.each_on_page(2) do |result|
-      #     puts result.title
-      #   end
-      #
-      def each_on_page(page_index,&block)
-        page(page_index).each(&block)
-      end
-      #
-      # Iterates over the results on the first page, passing each to the
-      # given _block_.
-      #
-      #   query.each_on_first_page do |result|
-      #     puts result.url
-      #   end
-      #
-      def each_on_first_page(&block)
-        each_on_page(1,&block)
-      end
-      #
-      # Returns a SponsoredLinks object containing SponsoredAd objects of
-      # the query. If a _block_ is given, it will be passed the newly
-      # created Page.
-      #
-      def sponsored_links(&block)
-        doc = get_page(search_url)
-        new_links = SponsoredLinks.new
-        # top and side ads
-        doc.search('//a[@id="pa1"]|//a[@id*="an"]').each do |link|
-          title = link.inner_text
-          url = "http://#{SEARCH_HOST}" + link.get_attribute('href')
-          new_links << SponsoredAd.new(title,url)
-        end
-        block.call(new_links) if block
-        return new_links
-      end
-      #
-      # Returns the first sponsored link on the first page of results.
-      #
-      def top_sponsored_link
-        top_sponsored_links.first
-      end
-      #
-      # Iterates over the sponsored links on the first page of
-      # results passing each to the specified _block_.
-      #
-      def each_sponsored_link(&block)
-        sponsored_links.each(&block)
-      end
-      protected
-      #
-      # Returns the rank offset for the specified _page_index_.
-      #
-      def page_result_offset(page_index)
-        (page_index.to_i - 1) * @results_per_page.to_i
-      end
-      #
-      # Returns the in-Page index of the _result_index_.
-      #
-      def page_result_index(result_index)
-        (result_index.to_i - 1) % @results_per_page.to_i
-      end
-      #
-      # Returns the page index for the specified _result_index_
-      #
-      def result_page_index(result_index)
-        ((result_index.to_i - 1) / @results_per_page.to_i) + 1
+        return expr.join(' ')
       end
     end