RubyGems - clownfish - Versions diffs - 0.3.0 - Mend

clownfish 0.3.0

Files changed (24) hide show

checksums.yaml +15 -0
data/.gitignore +17 -0
data/.rspec +2 -0
data/Gemfile +3 -0
data/LICENSE.txt +22 -0
data/README.md +80 -0
data/Rakefile +6 -0
data/clownfish.gemspec +22 -0
data/ideas.txt +8 -0
data/lib/clownfish.rb +10 -0
data/lib/clownfish/adapter.rb +62 -0
data/lib/clownfish/anemone_ext.rb +16 -0
data/lib/clownfish/fish/count.rb +11 -0
data/lib/clownfish/fish/links_by_page.rb +60 -0
data/lib/clownfish/fish/response_times.rb +20 -0
data/lib/clownfish/helpers/status_group.rb +66 -0
data/lib/clownfish/helpers/url_statuses.rb +39 -0
data/lib/clownfish/version.rb +3 -0
data/spec/adapter_spec.rb +144 -0
data/spec/links_by_page_spec.rb +80 -0
data/spec/spec_helper.rb +70 -0
data/spec/status_group_spec.rb +58 -0
data/spec/url_statuses_spec.rb +91 -0
metadata +99 -0

checksums.yaml ADDED

@@ -0,0 +1,15 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: !binary |-
+    MGYzYmQ4YjNlYmEzNDAxODEwM2M5ODMyZjE2NzQ2N2MwOTczODIwZQ==
+  data.tar.gz: !binary |-
+    NDBiYjYzMzcwZTQ2MzExYWRkY2ZhMzA2NzFkMWNiODU1ODQwY2MxOQ==
+!binary "U0hBNTEy":
+  metadata.gz: !binary |-
+    NTk3N2U5OTEwNmI0MGY2ZDVhYWY4OGZiNDRmMDk5OGNhMjg3NTFiMjMxOTlh
+    YjYxZmZlNjQxMGViZWJhZGQzZjk3NjBlZTM4NGRjNDk2YzQxZmJlZTAzMTU2
+    YTBmNTYxNGY1MDJjNzc3OGI3NmJmOGYwN2ZhZTc2MDQwOTQwM2M=
+  data.tar.gz: !binary |-
+    ZmFkMTJhODdmMmY5ODllZGY2MGMyOTUxNTRhNWE0MzQ2OGZhZTcxZmQyMTA0
+    YjY5ZjlmNWZiNGJmYTZhYWJjMWRkOWI2MDMwZjg5MzVkODA3NWFjMmQ3ZTQ4
+    NjEzOTUxYjIzMWQwZmFhMTQxYjA5ZmNhNmYxMjY5YzI4OGUyZjg=

data/.gitignore ADDED

@@ -0,0 +1,17 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp

data/.rspec ADDED

	@@ -0,0 +1,2 @@
1	+ --color
2	+ --format progress

data/Gemfile ADDED

@@ -0,0 +1,3 @@
+source 'https://rubygems.org'
+gemspec

data/LICENSE.txt ADDED

@@ -0,0 +1,22 @@
+Copyright (c) 2013 Paul Salaets
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED

@@ -0,0 +1,80 @@
+# Clownfish
+Helper for [Anemone](http://anemone.rubyforge.org/). Makes common crawls easier to repeat.
+## Installation
+Add this line to your application's Gemfile:
+    gem 'clownfish'
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install clownfish
+## Usage
+```ruby
+require 'clownfish'
+clownfish = MyClownfish.new
+Anemone.crawl_with_clownfish(start_url, clownfish)
+# query clownfish for data from crawl
+```
+## Clownfish Spec
+A clownfish is an object that has one or more of the following instance methods:
+Reference: [Anemone RDocs](http://anemone.rubyforge.org/doc/index.html)
+### anemone_options
+Returns a `Hash` of `Symbol` to values. See [Anemone::Core::DEFAULT_OPTS](http://git.io/wFmCfA) for available options.
+This is forwarded as the second argument to `Anemone.crawl`. Invoked once before crawl.
+### skip_links_like
+Returns a single `Regexp` or `Array` of `Regexp`. Urls matching any of these will not be crawled. Invoked once before crawl.
+### on_every_page
+Takes one argument, an `Anemone::Page`. Invoked once per page during crawl.
+### focus_crawl
+Takes one argument, an `Anemone::Page`. Returns the links on that page that should be crawled. Invoked once per page during crawl.
+### after_crawl
+Takes one argument, an `Anemone::PageStore`. Invoked once after crawl is done.
+## Whats Included
+See [wiki](https://github.com/psalaets/clownfish/wiki) for examples.
+### Clownfish::LinksByPage
+Lists every page that has links, the links and the status code when following those links.
+### Clownfisn::ResponseTimes
+Record every url and it's response time.
+### Clownfisn::Count
+Count pages.
+## Contributing
+1. Fork it
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request

data/Rakefile ADDED

@@ -0,0 +1,6 @@
+require "bundler/gem_tasks"
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new
+task :default => :spec

data/clownfish.gemspec ADDED

@@ -0,0 +1,22 @@
+# -*- encoding: utf-8 -*-
+lib = File.expand_path('lib', File.dirname(__FILE__))
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'clownfish/version'
+Gem::Specification.new do |gem|
+  gem.name          = "clownfish"
+  gem.version       = Clownfish::VERSION
+  gem.authors       = ["Paul Salaets"]
+  gem.email         = ["psalaets@gmail.com"]
+  gem.summary       = "Anemone helper"
+  gem.description   = "Anemone helper making common crawls easier to repeat."
+  gem.homepage      = "https://github.com/psalaets/clownfish"
+  gem.license       = "MIT"
+  gem.files         = `git ls-files`.split($/)
+  gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
+  gem.require_paths = ["lib"]
+  gem.add_dependency('anemone', '~> 0.7.2')
+  gem.add_development_dependency('rspec', '~> 2.12')
+end

data/ideas.txt ADDED

@@ -0,0 +1,8 @@
+syntax ideas:
+Anemone.crawl('url', :with => clownfish)
+Anemone.crawl('url').with(clownfish)
+Anemone.crawl_with_clownfish('url', clownfish)

data/lib/clownfish.rb ADDED

@@ -0,0 +1,10 @@
+require "clownfish/version"
+require "clownfish/adapter"
+require "clownfish/anemone_ext"
+require "clownfish/helpers/status_group"
+require "clownfish/helpers/url_statuses"
+require "clownfish/fish/links_by_page"
+require "clownfish/fish/response_times"
+require "clownfish/fish/count"

data/lib/clownfish/adapter.rb ADDED

@@ -0,0 +1,62 @@
+module Clownfish
+  # Adapter between Anemone and clownfish objects.
+  class Adapter
+    # Internal: Create an Adapter that wraps a clownfish.
+    #
+    # clownfish - Object that conforms to clownfish spec.  See README.md.
+    def initialize(clownfish)
+      raise ArgumentError, "clownfish cannot be nil" if clownfish.nil?
+      @delegate = clownfish
+    end
+    # Internal: Forwards Anemone options from clownfish.
+    #
+    # Returns Hash of Anemone options, never nil.
+    def anemone_options
+      (@delegate.respond_to?(:anemone_options) && @delegate.anemone_options) || {}
+    end
+    # Internal: Connects clownfish to Anemone.
+    #
+    # anemone - Instance of Anemone::Core.
+    #
+    # Returns nothing.
+    def hook_into_anemone(anemone)
+      wire_up_after_crawl(anemone)
+      wire_up_on_every_page(anemone)
+      wire_up_focus_crawl(anemone)
+      relay_skip_links_like(anemone)
+    end
+    private
+    # Connects delegate's after_crawl to Anemone.
+    def wire_up_after_crawl(anemone)
+      anemone.after_crawl do |page_store|
+        @delegate.after_crawl(page_store)
+      end if @delegate.respond_to?(:after_crawl)
+    end
+    # Connects delegate's on_every_page to Anemone.
+    def wire_up_on_every_page(anemone)
+      anemone.on_every_page do |page|
+        @delegate.on_every_page(page)
+      end if @delegate.respond_to?(:on_every_page)
+    end
+    # Connects delegate's focus_crawl to Anemone.
+    def wire_up_focus_crawl(anemone)
+      anemone.focus_crawl do |page|
+        @delegate.focus_crawl(page) || []
+      end if @delegate.respond_to?(:focus_crawl)
+    end
+    # Passes delegate's skip_links_like to Anemone.
+    def relay_skip_links_like(anemone)
+      if @delegate.respond_to?(:skip_links_like)
+        regexes = @delegate.skip_links_like
+        anemone.skip_links_like([regexes].flatten)
+      end
+    end
+  end
+end

data/lib/clownfish/anemone_ext.rb ADDED

@@ -0,0 +1,16 @@
+require "anemone"
+module Anemone
+  # Public: Starts an Anemone crawl with a clownfish.
+  #
+  # urls      - String or Array of Strings telling where to start crawl from.
+  # clownfish - Object that conforms to clownfish spec.  See README.md.
+  #
+  # Returns nothing.
+  def self.crawl_with_clownfish(urls, clownfish)
+    adapter = Clownfish::Adapter.new(clownfish)
+    self.crawl(urls, adapter.anemone_options) do |anemone|
+      adapter.hook_into_anemone(anemone)
+    end
+  end
+end

data/lib/clownfish/fish/count.rb ADDED

@@ -0,0 +1,11 @@
+module Clownfish
+  # Clownfish that counts number of pages on a site.  Taken from Anemone.
+  class Count
+    # Number of pages found.  Only meaningful after a crawl.
+    attr_reader :count
+    def after_crawl(page_store)
+      @count = page_store.uniq!.size
+    end
+  end
+end

data/lib/clownfish/fish/links_by_page.rb ADDED

@@ -0,0 +1,60 @@
+module Clownfish
+  # Clownfish that records every link on a page and the repsonse status codes
+  # when the links are followed.
+  class LinksByPage
+    # Hash of url String to UrlStatuses.  The values are all links found on page
+    # at the key.
+    attr_reader :links_by_page
+    def initialize
+      @links_by_page = {}
+    end
+    def anemone_options
+      # Not looking at page bodies so don't keep them around
+      {:discard_page_bodies => true}
+    end
+    def on_every_page(page)
+      # First url in crawl has no page
+      referer = page.referer ? page.referer.to_s : '[starting point]'
+      @links_by_page[referer] = UrlStatuses.new unless @links_by_page.include? referer
+      links = @links_by_page[referer]
+      links.add_url(page.url.to_s, page.code)
+    end
+    # Print links by page to stdout.
+    #
+    # options - Hash specifying what and how to report.
+    #           :to     - IO to print report to.  Defaults to STDOUT.
+    #           :status - One or Array of status specifiers. Defaults to :all.
+    #                     Only links with these statues will be reported.  See
+    #                     Clownfish::StatusGroup for accepted status specifiers.
+    def report(options = {})
+      options = report_options(options)
+      out = options[:to]
+      specifiers = options[:status]
+      @links_by_page.each do |page, link_statuses|
+        link_status_pairs = link_statuses.query(specifiers)
+        unless link_status_pairs.empty?
+          out.puts "#{page}"
+          link_status_pairs.each do |link, status|
+            out.puts "#{status} #{link}"
+          end
+          out.puts
+        end
+      end
+    end
+    private
+    def report_options(options)
+      defaults = {:to => STDOUT, :status => :all}
+      defaults.merge(options)
+    end
+  end
+end

data/lib/clownfish/fish/response_times.rb ADDED

@@ -0,0 +1,20 @@
+module Clownfish
+  # Clownfish that records the response time of every url.
+  class ResponseTimes
+    # Hash where key is url String and value is number (milliseconds).
+    attr_reader :times_by_url
+    def initialize
+      @times_by_url = {}
+    end
+    def anemone_options
+      # Not looking at page bodies so don't keep them around
+      {:discard_page_bodies => true}
+    end
+    def on_every_page(page)
+      @times_by_url[page.url.to_s] = page.response_time
+    end
+  end
+end

data/lib/clownfish/helpers/status_group.rb ADDED

@@ -0,0 +1,66 @@
+module Clownfish
+  # One or more response status codes.  StatusGroups are filled with status
+  # specifiers to determine what is in the group.
+  #
+  # Status specifiers can be Integer status codes like 200, Integer Ranges like
+  # 400..404 or any of the following Symbols:
+  #   :all          - any status code
+  #   :success      - 2xx
+  #   :redirect     - 3xx
+  #   :non_error    - 2xx through 3xx
+  #   :client_error - 4xx
+  #   :server_error - 5xx
+  #   :error        - 4xx through 5xx
+  class StatusGroup
+    ALIASES = {
+      :all          => 200..599,
+      :success      => 200..299,
+      :redirect     => 300..399,
+      :non_error    => 200..399,
+      :client_error => 400..499,
+      :server_error => 500..599,
+      :error        => 400..599
+    }
+    # Public: Create a new group.
+    #
+    # statuses - One or more status specifiers or an Array of status specifiers.
+    def initialize(*specifiers)
+      @members = []
+      specifiers.flatten.each do |status|
+        self << status
+      end
+    end
+    # Public: Add a status specifier to this group.
+    #
+    # specifier - A status specifier
+    #
+    # Returns self for chaining purposes.
+    def <<(specifier)
+      @members << (resolve_alias(specifier) || specifier)
+      self
+    end
+    # Public: Tells if this group includes a given status code.
+    #
+    # status - Integer status code
+    #
+    # Returns true if status is included, false otherwise.
+    def include?(status)
+      @members.any? {|m| m === status}
+    end
+    private
+    # Resolves a group alias to its Range.
+    #
+    # group_alias - Symbol representing a set of status codes.
+    #
+    # Returns Range specified by group_alias or nil if there is none.
+    def resolve_alias(group_alias)
+      ALIASES[group_alias]
+    end
+  end
+end

data/lib/clownfish/helpers/url_statuses.rb ADDED

@@ -0,0 +1,39 @@
+module Clownfish
+  # Helper class for pairing urls with status codes.
+  class UrlStatuses
+    attr_reader :status_codes_by_url
+    def initialize
+      @status_codes_by_url = {}
+    end
+    def add_url(url, status_code)
+      @status_codes_by_url[url] = status_code
+    end
+    def each(&block)
+      @status_codes_by_url.each(&block)
+    end
+    def size
+      @status_codes_by_url.size
+    end
+    def empty?
+      size == 0
+    end
+    # Public: Gets url/status code pairs that match one of the specified status
+    # codes.
+    #
+    # status_group_specifiers - One, many or an Array of status group specifiers
+    #                           as accepted by StatusGroup.new.
+    #
+    # Returns url/status pairs that match status specifiers.
+    def query(*status_group_specifiers)
+      group = StatusGroup.new(status_group_specifiers)
+      @status_codes_by_url.find_all { |url, code| group.include? code }
+    end
+  end
+end

data/lib/clownfish/version.rb ADDED

@@ -0,0 +1,3 @@
+module Clownfish
+  VERSION = "0.3.0"
+end

data/spec/adapter_spec.rb ADDED

@@ -0,0 +1,144 @@
+require 'spec_helper'
+module Clownfish
+  describe Adapter do
+    context ".new" do
+      it "doesn't accept nil delegate" do
+        expect { Adapter.new(nil) }.to raise_error(ArgumentError)
+      end
+    end
+    context "#anemone_options" do
+      it "forwards anemone_options from delegate" do
+        delegate = double('delegate')
+        delegate.stub(:anemone_options) {{:name => 'bob'}}
+        adapter = Adapter.new(delegate)
+        adapter.anemone_options.should eq({:name => 'bob'})
+      end
+      it "returns empty Hash if delegate has no options" do
+        delegate = double('delegate')
+        delegate.stub(:anemone_options) {nil}
+        adapter = Adapter.new(delegate)
+        adapter.anemone_options.should eq({})
+      end
+      it "returns empty Hash if delegate doesn't support anemone_options" do
+        # Has no anemone_options method
+        delegate = Object.new
+        adapter = Adapter.new(delegate)
+        adapter.anemone_options.should eq({})
+      end
+    end
+    context "hooking into Anemone" do
+      before :each do
+        @page_store = Object.new
+        @page1, @page2 = Object.new, Object.new
+        @anemone = FakeAnemone.new(@page_store, @page1, @page2)
+      end
+      it "wires up after_crawl when delegate supports it" do
+        delegate = double('delegate')
+        delegate.should_receive(:after_crawl).with(@page_store).once
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+      end
+      it "ignores after_crawl when not supported" do
+        delegate = Object.new
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+      end
+      it "wires up on_every_page when delegate supports it" do
+        delegate = double('delegate')
+        delegate.should_receive(:on_every_page).with(@page1).once
+        delegate.should_receive(:on_every_page).with(@page2).once
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+      end
+      it "ignores on_every_page when not supported" do
+        delegate = Object.new
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+      end
+      it "wires up focus_crawl when delegate supports it" do
+        delegate = double('delegate')
+        delegate.should_receive(:focus_crawl).with(@page1) {['url1']}.once
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+        @anemone.last_focus_crawl_links.should eq(['url1'])
+      end
+      it "focuses on no links when delegate doesn't focus on any" do
+        delegate = double('delegate')
+        delegate.should_receive(:focus_crawl).with(@page1) {nil}
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+        @anemone.last_focus_crawl_links.should eq([])
+      end
+      it "ignores focus_crawl when delegate doesn't support it" do
+        delegate = Object.new
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+      end
+      it "relays skip_links_like regex when delegate returns one" do
+        delegate = double('delegate')
+        delegate.stub(:skip_links_like) {/a/}
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+        @anemone.last_skip_links_like_regexes.should eq([/a/])
+      end
+      it "relays skip_links_like regexes when delegate returns many" do
+        delegate = double('delegate')
+        delegate.stub(:skip_links_like) {[/a/, /b/]}
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+        @anemone.last_skip_links_like_regexes.should eq([/a/, /b/])
+      end
+      it "ignores skip_links_like when not supported" do
+        delegate = Object.new
+        adapter = Adapter.new(delegate)
+        adapter.hook_into_anemone(@anemone)
+      end
+    end # end of hooking into Anemone
+  end # end of describe Adapter
+end # end of Clownfish module

data/spec/links_by_page_spec.rb ADDED

@@ -0,0 +1,80 @@
+require 'spec_helper'
+require 'stringio'
+module Clownfish
+  describe LinksByPage do
+    describe "#report" do
+      before :each do
+        @home = FakePage.new('home.com', 200)
+        @links = FakePage.new('links.com', 200, 'home.com')
+        @client = FakePage.new('client.com', 404, 'links.com')
+        @server = FakePage.new('server.com', 500, 'links.com')
+      end
+      it "reports all statuses by default" do
+        fish = LinksByPage.new
+        fish.on_every_page(@client)
+        fish.on_every_page(@server)
+        out = StringIO.new
+        fish.report(:to => out)
+        out.string.should =~ %r{404 http://client.com\n500 http://server.com}
+      end
+      it "reports specified status when specifier given" do
+        fish = LinksByPage.new
+        fish.on_every_page(@client)
+        fish.on_every_page(@server)
+        out = StringIO.new
+        fish.report(:to => out, :status => :server_error)
+        out.string.should =~ %r{500 http://server.com}
+        out.string.should_not =~ /404/
+      end
+      it "reports specified statuses when many specified" do
+        fish = LinksByPage.new
+        fish.on_every_page(@links)
+        fish.on_every_page(@client)
+        fish.on_every_page(@server)
+        out = StringIO.new
+        fish.report(:to => out, :status => [500, 200..204])
+        out.string.should =~ %r{200 http://links.com}
+        out.string.should =~ %r{500 http://server.com}
+        out.string.should_not =~ /404/
+      end
+      it "omits page if none of its links will be shown" do
+        fish = LinksByPage.new
+        fish.on_every_page(@links)
+        fish.on_every_page(@client)
+        fish.on_every_page(@server)
+        out = StringIO.new
+        fish.report(:to => out, :status => [304])
+        out.string.should_not =~ %r{http://home.com}
+        out.string.should_not =~ %r{http://links.com}
+      end
+      it "shows referer of links with no referer as [starting point]" do
+        fish = LinksByPage.new
+        fish.on_every_page(@home)
+        out = StringIO.new
+        fish.report(:to => out)
+        out.string.should =~ /\[starting point\]/
+      end
+    end
+  end
+end

data/spec/spec_helper.rb ADDED

@@ -0,0 +1,70 @@
+require 'clownfish'
+require 'uri'
+RSpec.configure do |config|
+  config.treat_symbols_as_metadata_keys_with_true_values = true
+  config.run_all_when_everything_filtered = true
+  config.filter_run :focus
+  # Run specs in random order to surface order dependencies. If you find an
+  # order dependency and want to debug it, you can fix the order by providing
+  # the seed, which is printed after each run.
+  #     --seed 1234
+  config.order = 'random'
+end
+# Matcher for unordered equality Array
+RSpec::Matchers.define :have_same_elements_as do |expected|
+  match do |actual|
+    expected.sort == actual.sort
+  end
+end
+module Clownfish
+  # Fake Anemone::Core to help with tests.
+  class FakeAnemone
+    attr_reader :last_focus_crawl_links
+    attr_reader :last_skip_links_like_regexes
+    def initialize(page_store, page1, page2)
+      @page_store = page_store
+      @page1 = page1
+      @page2 = page2
+    end
+    def after_crawl
+      yield(@page_store)
+    end
+    def on_every_page
+      yield(@page1)
+      yield(@page2)
+    end
+    def focus_crawl
+      @last_focus_crawl_links = yield(@page1)
+    end
+    def skip_links_like(regexes)
+      @last_skip_links_like_regexes = regexes
+    end
+  end
+  # Fake and minimal Anemone::Page to help with tests.
+  class FakePage
+    attr_reader :url, :referer, :code
+    def initialize(url, code = 200, referer = nil)
+      @url = urlify(url)
+      @referer = urlify(referer)
+      @code = code
+    end
+    def urlify(str)
+      return str if str.class == URI || str.nil?
+      str = "http://#{str}" unless str.start_with? 'http'
+      URI(str)
+    end
+  end
+end

data/spec/status_group_spec.rb ADDED

@@ -0,0 +1,58 @@
+require 'spec_helper'
+module Clownfish
+  describe StatusGroup do
+    context "#<<" do
+      it "takes alias Symbols, Integers and Integer Ranges" do
+        group = StatusGroup.new
+        group << :client_error
+        group << 500
+        group << 200..204
+        group.include?(404).should be_true
+        group.include?(500).should be_true
+        group.include?(200).should be_true
+        group.include?(304).should be_false
+      end
+      it "can be chained" do
+        group = StatusGroup.new
+        group << :client_error << 304 << :server_error
+        group.include?(404).should be_true
+        group.include?(500).should be_true
+        group.include?(304).should be_true
+        group.include?(200).should be_false
+      end
+    end
+    context ".new" do
+      it "can take a single status specifier" do
+        group = StatusGroup.new(:server_error)
+        group.include?(500).should be_true
+        group.include?(200).should be_false
+      end
+      it "can take multiple status specifiers" do
+        group = StatusGroup.new(200, :redirect, 400..406)
+        group.include?(200).should be_true
+        group.include?(301).should be_true
+        group.include?(401).should be_true
+        group.include?(204).should be_false
+      end
+      it "can take Array of status specifiers" do
+        group = StatusGroup.new([:success, 500, 300..304])
+        group.include?(500).should be_true
+        group.include?(200).should be_true
+        group.include?(302).should be_true
+        group.include?(404).should be_false
+      end
+    end
+  end
+end

data/spec/url_statuses_spec.rb ADDED

@@ -0,0 +1,91 @@
+require 'spec_helper'
+module Clownfish
+  describe UrlStatuses do
+    it "accumulates urls with status codes" do
+      statuses = UrlStatuses.new
+      statuses.add_url('http://ok.com', 200)
+      statuses.add_url('http://huh.com', 404)
+      statuses.status_codes_by_url.should eq({'http://ok.com' => 200, 'http://huh.com' => 404})
+    end
+    it "starts off empty" do
+      statuses = UrlStatuses.new
+      statuses.empty?.should be_true
+    end
+    it "knows how many urls it has" do
+      statuses = UrlStatuses.new
+      statuses.add_url('http://ok.com', 200)
+      statuses.add_url('http://huh.com', 404)
+      statuses.size.should eq(2)
+    end
+    context '#each' do
+      it "yields url/code pairs to 2-arg block" do
+        statuses = UrlStatuses.new
+        statuses.add_url('http://ok.com', 200)
+        statuses.add_url('http://huh.com', 404)
+        pairs = []
+        statuses.each { |k, v| pairs << [k, v] }
+        pairs.should have_same_elements_as([['http://ok.com', 200], ['http://huh.com', 404]])
+      end
+      it "yields url/code Array to 1-arg block" do
+        statuses = UrlStatuses.new
+        statuses.add_url('http://ok.com', 200)
+        statuses.add_url('http://huh.com', 404)
+        pairs = []
+        statuses.each { |p| pairs << p }
+        pairs.should have_same_elements_as([['http://ok.com', 200], ['http://huh.com', 404]])
+      end
+    end
+    context '#query' do
+      it "returns url/status pairs that match a specifier" do
+        statuses = UrlStatuses.new
+        statuses.add_url('http://ok.com', 200)
+        statuses.add_url('http://huh.com', 404)
+        pairs = statuses.query(200)
+        pairs.should have_same_elements_as([['http://ok.com', 200]])
+      end
+      it "returns url/status pairs that match any specifier" do
+        statuses = UrlStatuses.new
+        statuses.add_url('http://ok.com', 200)
+        statuses.add_url('http://huh.com', 404)
+        statuses.add_url('http://ohno.com', 500)
+        pairs = statuses.query(200, :server_error)
+        pairs.should have_same_elements_as([['http://ok.com', 200], ['http://ohno.com', 500]])
+      end
+      it "returns empty Array if no pairs match a specifier" do
+        statuses = UrlStatuses.new
+        statuses.add_url('http://ok.com', 200)
+        statuses.add_url('http://huh.com', 404)
+        pairs = statuses.query(500, :redirect)
+        pairs.empty?.should be_true
+      end
+    end
+  end
+end

metadata ADDED

@@ -0,0 +1,99 @@
+--- !ruby/object:Gem::Specification
+name: clownfish
+version: !ruby/object:Gem::Version
+  version: 0.3.0
+platform: ruby
+authors:
+- Paul Salaets
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-02-27 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: anemone
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 0.7.2
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 0.7.2
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.12'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.12'
+description: Anemone helper making common crawls easier to repeat.
+email:
+- psalaets@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- .rspec
+- Gemfile
+- LICENSE.txt
+- README.md
+- Rakefile
+- clownfish.gemspec
+- ideas.txt
+- lib/clownfish.rb
+- lib/clownfish/adapter.rb
+- lib/clownfish/anemone_ext.rb
+- lib/clownfish/fish/count.rb
+- lib/clownfish/fish/links_by_page.rb
+- lib/clownfish/fish/response_times.rb
+- lib/clownfish/helpers/status_group.rb
+- lib/clownfish/helpers/url_statuses.rb
+- lib/clownfish/version.rb
+- spec/adapter_spec.rb
+- spec/links_by_page_spec.rb
+- spec/spec_helper.rb
+- spec/status_group_spec.rb
+- spec/url_statuses_spec.rb
+homepage: https://github.com/psalaets/clownfish
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.0.0
+signing_key:
+specification_version: 4
+summary: Anemone helper
+test_files:
+- spec/adapter_spec.rb
+- spec/links_by_page_spec.rb
+- spec/spec_helper.rb
+- spec/status_group_spec.rb
+- spec/url_statuses_spec.rb