RubyGems - spider - Versions diffs - 0.4.4 → 0.5.4 - Mend

spider 0.4.4 → 0.5.4

Files changed (77) hide show

checksums.yaml +7 -0
data/AUTHORS +17 -0
data/CHANGES +16 -0
data/LICENSE +21 -0
data/{README → README.md} +73 -44
data/lib/spider.rb +12 -29
data/lib/spider/included_in_file.rb +32 -0
data/lib/spider/included_in_memcached.rb +1 -24
data/lib/spider/included_in_redis.rb +31 -0
data/lib/spider/next_urls_in_sqs.rb +6 -29
data/lib/spider/robot_rules.rb +61 -57
data/lib/spider/spider_instance.rb +16 -35
data/spec/spider/included_in_redis_spec.rb +43 -0
data/spider.gemspec +5 -3
metadata +38 -125
data/doc/classes/BeStaticServerPages.html +0 -197
data/doc/classes/BeStaticServerPages.src/M000030.html +0 -19
data/doc/classes/BeStaticServerPages.src/M000031.html +0 -19
data/doc/classes/BeStaticServerPages.src/M000032.html +0 -18
data/doc/classes/BeStaticServerPages.src/M000033.html +0 -18
data/doc/classes/IncludedInMemcached.html +0 -199
data/doc/classes/IncludedInMemcached.src/M000015.html +0 -18
data/doc/classes/IncludedInMemcached.src/M000016.html +0 -18
data/doc/classes/IncludedInMemcached.src/M000017.html +0 -18
data/doc/classes/LoopingServlet.html +0 -137
data/doc/classes/LoopingServlet.src/M000037.html +0 -23
data/doc/classes/NextUrlsInSQS.html +0 -204
data/doc/classes/NextUrlsInSQS.src/M000018.html +0 -19
data/doc/classes/NextUrlsInSQS.src/M000019.html +0 -22
data/doc/classes/NextUrlsInSQS.src/M000020.html +0 -19
data/doc/classes/QueryServlet.html +0 -137
data/doc/classes/QueryServlet.src/M000038.html +0 -19
data/doc/classes/RobotRules.html +0 -175
data/doc/classes/RobotRules.src/M000034.html +0 -19
data/doc/classes/RobotRules.src/M000035.html +0 -67
data/doc/classes/RobotRules.src/M000036.html +0 -24
data/doc/classes/Spider.html +0 -170
data/doc/classes/Spider.src/M000029.html +0 -21
data/doc/classes/SpiderInstance.html +0 -345
data/doc/classes/SpiderInstance.src/M000021.html +0 -18
data/doc/classes/SpiderInstance.src/M000022.html +0 -22
data/doc/classes/SpiderInstance.src/M000023.html +0 -22
data/doc/classes/SpiderInstance.src/M000024.html +0 -24
data/doc/classes/SpiderInstance.src/M000025.html +0 -18
data/doc/classes/SpiderInstance.src/M000026.html +0 -18
data/doc/classes/SpiderInstance.src/M000027.html +0 -18
data/doc/classes/SpiderInstance.src/M000028.html +0 -18
data/doc/created.rid +0 -1
data/doc/files/README.html +0 -223
data/doc/files/lib/spider/included_in_memcached_rb.html +0 -142
data/doc/files/lib/spider/next_urls_in_sqs_rb.html +0 -144
data/doc/files/lib/spider/robot_rules_rb.html +0 -114
data/doc/files/lib/spider/spider_instance_rb.html +0 -117
data/doc/files/lib/spider_rb.html +0 -254
data/doc/files/spec/spec_helper_rb.html +0 -196
data/doc/files/spec/spec_helper_rb.src/M000001.html +0 -20
data/doc/files/spec/spec_helper_rb.src/M000002.html +0 -26
data/doc/files/spec/spec_helper_rb.src/M000003.html +0 -24
data/doc/files/spec/spec_helper_rb.src/M000004.html +0 -18
data/doc/files/spec/spec_helper_rb.src/M000005.html +0 -23
data/doc/files/spec/spider/included_in_memcached_spec_rb.html +0 -142
data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000006.html +0 -19
data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000007.html +0 -18
data/doc/files/spec/spider/spider_instance_spec_rb.html +0 -210
data/doc/files/spec/spider/spider_instance_spec_rb.src/M000008.html +0 -21
data/doc/files/spec/spider/spider_instance_spec_rb.src/M000009.html +0 -19
data/doc/files/spec/spider/spider_instance_spec_rb.src/M000010.html +0 -19
data/doc/files/spec/spider/spider_instance_spec_rb.src/M000011.html +0 -27
data/doc/files/spec/spider/spider_instance_spec_rb.src/M000012.html +0 -26
data/doc/files/spec/spider/spider_instance_spec_rb.src/M000013.html +0 -27
data/doc/files/spec/spider_spec_rb.html +0 -127
data/doc/files/spec/spider_spec_rb.src/M000014.html +0 -23
data/doc/fr_class_index.html +0 -34
data/doc/fr_file_index.html +0 -35
data/doc/fr_method_index.html +0 -64
data/doc/index.html +0 -24
data/doc/rdoc-style.css +0 -208

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: b87ed979c115546fa802f888fea0baf322f458be6e50e40f8ea8fd9ee392c8ac
+  data.tar.gz: eabe506949614a5622afa2def1da954d352d737dccadd0622d060be13c061115
+SHA512:
+  metadata.gz: ab52efe227f19067dd52efb0333d5901d3725f4e88fa0b86942c12bd702efa6b3bf4ed72b03d5ce58b32a22c574b0ee31764fa5e37df07a57132c250bf6b0658
+  data.tar.gz: ede92b88eb09867c41c1f7fcca58c99a55e63f02bb54fa094ece40e8875240734aa9abdcaf043db003120a44b239198dc6d063779543927febb493116732b7f3

data/AUTHORS ADDED

@@ -0,0 +1,17 @@
+The Ruby Spider Gem would not be what it is today without the help of
+the following kind souls:
+Alexandre Rousseau
+Brian Campbell
+Henri Cook
+James Edward Gray II
+Jeremy Evans
+Joao Eriberto Mota Filho
+John Buckley
+John Nagro
+Matt Horan
+Marc (@brigriffin)
+Mike Burns (original author)
+Olle Jonsson
+Sander van der Vliet
+Stuart Yamartino

data/CHANGES CHANGED

@@ -1,3 +1,19 @@
+2018-04-23 v0.5.3
+* release simply to add missing CHANGES notes
+2018-04-23 v0.5.2
+* fixed #2 thanks to @jeremyevans
+* added Redis as cache wrapper thanks to @brigriffin
+2016-09-04 v0.5.1
+* added the ability to stop a crawl
+2016-05-13 v0.5.0
+* fixed #1 thanks to @eribertomota
+* got it running on more recent versions of ruby
+* cleaned up the docs a bit
+* cleaned up the licensing and attribution
 2009-05-21
 * fixed an issue with robots.txt on ssl hosts
 * fixed an issue with pulling robots.txt from disallowed hosts

data/LICENSE ADDED

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2007-2016 Spider Team Authors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/{README → README.md} RENAMED

@@ -1,66 +1,100 @@
-Spider, a Web spidering library for Ruby. It handles the robots.txt,
-scraping, collecting, and looping so that you can just handle the data.
+# Spider
+_a Web spidering library for Ruby. It handles the robots.txt,
+scraping, collecting, and looping so that you can just handle the data._
-== Examples
+## Examples
-=== Crawl the Web, loading each page in turn, until you run out of memory
+### Crawl the Web, loading each page in turn, until you run out of memory
+```ruby
  require 'spider'
- Spider.start_at('http://mike-burns.com/') {}
+ Spider.start_at('http://cashcats.biz/') {}
+```
-=== To handle erroneous responses
+### To handle erroneous responses
+```ruby
  require 'spider'
- Spider.start_at('http://mike-burns.com/') do |s|
+ Spider.start_at('http://cashcats.biz/') do |s|
    s.on :failure do |a_url, resp, prior_url|
      puts "URL failed: #{a_url}"
      puts " linked from #{prior_url}"
    end
  end
+```
-=== Or handle successful responses
+### Or handle successful responses
+```ruby
  require 'spider'
- Spider.start_at('http://mike-burns.com/') do |s|
+ Spider.start_at('http://cashcats.biz/') do |s|
    s.on :success do |a_url, resp, prior_url|
      puts "#{a_url}: #{resp.code}"
      puts resp.body
      puts
    end
  end
+```
-=== Limit to just one domain
+### Limit to just one domain
+```ruby
  require 'spider'
- Spider.start_at('http://mike-burns.com/') do |s|
+ Spider.start_at('http://cashcats.biz/') do |s|
    s.add_url_check do |a_url|
-     a_url =~ %r{^http://mike-burns.com.*}
+     a_url =~ %r{^http://cashcats.biz.*}
    end
  end
+```
-=== Pass headers to some requests
+### Pass headers to some requests
+```ruby
  require 'spider'
- Spider.start_at('http://mike-burns.com/') do |s|
+ Spider.start_at('http://cashcats.biz/') do |s|
    s.setup do |a_url|
      if a_url =~ %r{^http://.*wikipedia.*}
        headers['User-Agent'] = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
      end
    end
  end
+```
-=== Use memcached to track cycles
+### Use memcached to track cycles
+```ruby
  require 'spider'
  require 'spider/included_in_memcached'
  SERVERS = ['10.0.10.2:11211','10.0.10.3:11211','10.0.10.4:11211']
- Spider.start_at('http://mike-burns.com/') do |s|
+ Spider.start_at('http://cashcats.biz/') do |s|
    s.check_already_seen_with IncludedInMemcached.new(SERVERS)
  end
+```
-=== Track cycles with a custom object
+### Use Redis to track cycles
+```ruby
+ require 'spider'
+ require 'spider/included_in_redis'
+ Spider.start_at('http://cashcats.biz/') do |s|
+   s.check_already_seen_with IncludedInRedis.new(host: '127.0.0.1', port: 6379)
+ end
+```
+### Use Plain text to track cycles
+```ruby
+ require 'spider'
+ require 'spider/included_in_redis'
+ Spider.start_at('http://cashcats.biz/') do |s|
+   s.check_already_seen_with IncludedInFile.new('/tmp/cashcats_crawl.txt')
+ end
+```
+### Track cycles with a custom object
+```ruby
  require 'spider'
  class ExpireLinks < Hash
    def <<(v)
@@ -71,54 +105,62 @@ scraping, collecting, and looping so that you can just handle the data.
    end
  end
- Spider.start_at('http://mike-burns.com/') do |s|
+ Spider.start_at('http://cashcats.biz/') do |s|
    s.check_already_seen_with ExpireLinks.new
  end
+```
-=== Store nodes to visit with Amazon SQS
+### Store nodes to visit with Amazon SQS
+```ruby
  require 'spider'
  require 'spider/next_urls_in_sqs'
- Spider.start_at('http://mike-burns.com') do |s|
+ Spider.start_at('http://cashcats.biz') do |s|
    s.store_next_urls_with NextUrlsInSQS.new(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY)
  end
+```
-==== Store nodes to visit with a custom object
+### Store nodes to visit with a custom object
+```ruby
  require 'spider'
  class MyArray < Array
    def pop
-	super
+     super
    end
    def push(a_msg)
      super(a_msg)
    end
  end
- Spider.start_at('http://mike-burns.com') do |s|
+ Spider.start_at('http://cashcats.biz') do |s|
    s.store_next_urls_with MyArray.new
  end
+```
-=== Create a URL graph
+### Create a URL graph
+```ruby
  require 'spider'
  nodes = {}
- Spider.start_at('http://mike-burns.com/') do |s|
-   s.add_url_check {|a_url| a_url =~ %r{^http://mike-burns.com.*} }
+ Spider.start_at('http://cashcats.biz/') do |s|
+   s.add_url_check {|a_url| a_url =~ %r{^http://cashcats.biz.*} }
    s.on(:every) do |a_url, resp, prior_url|
      nodes[prior_url] ||= []
      nodes[prior_url] << a_url
    end
  end
+```
-=== Use a proxy
+### Use a proxy
+```ruby
  require 'net/http_configuration'
  require 'spider'
  http_conf = Net::HTTP::Configuration.new(:proxy_host => '7proxies.org',
-                                          :proxy_port => 8881)
+                                          :proxy_port => 8881)
  http_conf.apply do
    Spider.start_at('http://img.4chan.org/b/') do |s|
      s.on(:success) do |a_url, resp, prior_url|
@@ -128,19 +170,6 @@ scraping, collecting, and looping so that you can just handle the data.
      end
    end
  end
+```
-== Author
-John Nagro john.nagro@gmail.com
-Mike Burns http://mike-burns.com mike@mike-burns.com (original author)
-Many thanks to:
-Matt Horan
-Henri Cook
-Sander van der Vliet
-John Buckley
-Brian Campbell
-With `robot_rules' from James Edward Gray II via
-http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589
+_Copyright (c) 2007-2016 Spider Team Authors_

data/lib/spider.rb CHANGED

@@ -1,41 +1,24 @@
-# Copyright 2007-2008 Mike Burns & John Nagro
-# :include: README
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#      * Redistributions of source code must retain the above copyright
-#      notice, this list of conditions and the following disclaimer.
-#      * Redistributions in binary form must reproduce the above copyright
-#      notice, this list of conditions and the following disclaimer in the
-#      documentation and/or other materials provided with the distribution.
-#      * Neither the name Mike Burns nor the
-#      names of his contributors may be used to endorse or promote products
-#      derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY Mike Burns ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Mike Burns BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 require File.dirname(__FILE__)+'/spider/spider_instance'
 # A spidering library for Ruby. Handles robots.txt, scraping, finding more
 # links, and doing it all over again.
 class Spider
+  VERSION_INFO = [0, 5, 4] unless defined?(self::VERSION_INFO)
+  VERSION = VERSION_INFO.map(&:to_s).join('.') unless defined?(self::VERSION)
+  def self.version
+    VERSION
+  end
   # Runs the spider starting at the given URL. Also takes a block that is given
   # the SpiderInstance. Use the block to define the rules and handlers for
   # the discovered Web pages. See SpiderInstance for the possible rules and
   # handlers.
   #
-  #  Spider.start_at('http://mike-burns.com/') do |s|
+  #  Spider.start_at('http://cashcats.biz/') do |s|
   #    s.add_url_check do |a_url|
-  #      a_url =~ %r{^http://mike-burns.com.*}
+  #      a_url =~ %r{^http://cashcats.biz.*}
   #    end
   #
   #    s.on 404 do |a_url, resp, prior_url|
@@ -52,8 +35,8 @@ class Spider
   #  end
   def self.start_at(a_url, &block)
-    rules    = RobotRules.new('Ruby Spider 1.0')
-    a_spider = SpiderInstance.new({nil => a_url}, [], rules, [])
+    rules    = RobotRules.new("Ruby Spider #{Spider::VERSION}")
+    a_spider = SpiderInstance.new({nil => [a_url]}, [], rules, [])
     block.call(a_spider)
     a_spider.start!
   end

data/lib/spider/included_in_file.rb ADDED

@@ -0,0 +1,32 @@
+# Use plain text file to track cycles.
+# A specialized class using a plain text to track items stored. It supports
+# three operations: new, <<, and include? . Together these can be used to
+# add items to the text file, then determine whether the item has been added.
+#
+# To use it with Spider use the check_already_seen_with method:
+#
+#  Spider.start_at('http://example.com/') do |s|
+#    s.check_already_seen_with IncludedInFile.new('/tmp/crawled.log')
+#  end
+class IncludedInFile
+  # Construct a new IncludedInFile instance.
+  # @param filepath [String] as path of file to store crawled URL
+  def initialize(filepath)
+    @filepath = filepath
+    # create file if not exists
+    File.write(@filepath, '') unless File.file?(@filepath)
+    @urls = File.readlines(@filepath).map(&:chomp)
+  end
+  # Add an item to the file & array of URL.
+  def <<(v)
+    @urls << v.to_s
+    File.write(@filepath, "#{v}\r\n", File.size(@filepath), mode: 'a')
+  end
+  # True if the item is in the file.
+  def include?(v)
+    @urls.include? v.to_s
+  end
+end

data/lib/spider/included_in_memcached.rb CHANGED

@@ -1,32 +1,9 @@
 # Use memcached to track cycles.
-#
-# Copyright 2007 Mike Burns
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#      * Redistributions of source code must retain the above copyright
-#      notice, this list of conditions and the following disclaimer.
-#      * Redistributions in binary form must reproduce the above copyright
-#      notice, this list of conditions and the following disclaimer in the
-#      documentation and/or other materials provided with the distribution.
-#      * Neither the name Mike Burns nor the
-#      names of his contributors may be used to endorse or promote products
-#      derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY Mike Burns ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Mike Burns BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 require 'memcache'
 # A specialized class using memcached to track items stored. It supports
-# three operations: new, <<, and include? . Together these can be used to
+# three operations: new, <<, and include? . Together these can be used to
 # add items to the memcache, then determine whether the item has been added.
 #
 # To use it with Spider use the check_already_seen_with method:

data/lib/spider/included_in_redis.rb ADDED

@@ -0,0 +1,31 @@
+# Use Redis to track cycles.
+require 'redis'
+require 'json'
+# A specialized class using Redis to track items stored. It supports
+# three operations: new, <<, and include? . Together these can be used to
+# add items to Redis, then determine whether the item has been added.
+#
+# To use it with Spider use the check_already_seen_with method:
+#
+#  Spider.start_at('http://example.com/') do |s|
+#    s.check_already_seen_with IncludedInRedis.new(host: '127.0.0.1', port: 6379)
+#  end
+class IncludedInRedis
+  # Construct a new IncludedInRedis instance. All arguments here are
+  # passed to Redis (part of the redis gem).
+  def initialize(*a)
+    @c = Redis.new(*a)
+  end
+  # Add an item to Redis
+  def <<(v)
+    @c.set(v.to_s, v.to_json)
+  end
+  # True if the item is in Redis
+  def include?(v)
+    @c.get(v.to_s) == v.to_json
+  end
+end

data/lib/spider/next_urls_in_sqs.rb CHANGED

@@ -1,34 +1,11 @@
 # Use AmazonSQS to track nodes to visit.
-#
-# Copyright 2008 John Nagro
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#      * Redistributions of source code must retain the above copyright
-#      notice, this list of conditions and the following disclaimer.
-#      * Redistributions in binary form must reproduce the above copyright
-#      notice, this list of conditions and the following disclaimer in the
-#      documentation and/or other materials provided with the distribution.
-#      * Neither the name Mike Burns nor the
-#      names of his contributors may be used to endorse or promote products
-#      derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY Mike Burns ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Mike Burns BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 require 'rubygems'
 require 'right_aws'
 require 'yaml'
 # A specialized class using AmazonSQS to track nodes to walk. It supports
-# two operations: push and pop . Together these can be used to
+# two operations: push and pop . Together these can be used to
 # add items to the queue, then pull items off the queue.
 #
 # This is useful if you want multiple Spider processes crawling the same
@@ -47,8 +24,8 @@ class NextUrlsInSQS
     @sqs = RightAws::SqsGen2.new(aws_access_key, aws_secret_access_key)
     @queue = @sqs.queue(queue_name)
   end
-  # Pull an item off the queue, loop until data is found. Data is
+  # Pull an item off the queue, loop until data is found. Data is
   # encoded with YAML.
   def pop
     while true
@@ -57,10 +34,10 @@ class NextUrlsInSQS
       sleep 5
     end
   end
   # Put data on the queue. Data is encoded with YAML.
   def push(a_msg)
     encoded_message = YAML::dump(a_msg)
     @queue.push(a_msg)
-  end
-end
+  end
+end