hash_spidey 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .DS_Store
19
+
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source 'https://rubygems.org'
2
+
3
+ group :test do
4
+ gem 'rspec', :group => 'test'
5
+ gem "fakeweb", ["~> 1.3"], group: 'test'
6
+ end
7
+
8
+
9
+ # Specify your gem's dependencies in hash_spidey.gemspec
10
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 dannguyen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # HashSpidey
2
+
3
+ A sloppy implementation of [joeyAghion's Spidey](https://github.com/joeyAghion/spidey) abstract web crawling, using in-memory Hash to save pages and links. Very smelly and unstable until I figure out the best API.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'hash_spidey'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install hash_spidey
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,20 @@
1
+ require 'rdoc/task'
2
+ require 'rubygems'
3
+ require 'rubygems/package_task'
4
+ require 'rspec/core/rake_task'
5
+
6
+ desc 'Default: run specs.'
7
+ task :default => :rspec
8
+
9
+ desc 'Run the specs'
10
+ RSpec::Core::RakeTask.new(:rspec) do |t|
11
+ t.rspec_opts = ['--color']
12
+ t.pattern = './spec/**/*_spec.rb'
13
+ end
14
+
15
+ spec = Gem::Specification.load("#{File.dirname(__FILE__)}/hash_spidey.gemspec")
16
+
17
+ desc "Package gem."
18
+ Gem::PackageTask.new(spec) do |pkg|
19
+ pkg.gem_spec = spec
20
+ end
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'hash_spidey/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "hash_spidey"
8
+ spec.version = HashSpidey::VERSION
9
+ spec.authors = ["dannguyen"]
10
+ spec.email = ["dansonguyen@gmail.com"]
11
+ spec.description = %q{An implementation of joeyAghion's Spidey class at Artsy}
12
+ spec.summary = %q{Uses a Hash object to store crawling process, which it can then dump to an external store}
13
+ spec.homepage = "http://github.com/dannguyen"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+
24
+
25
+ spec.add_dependency 'spidey', '~> 0.1'
26
+ spec.add_dependency 'hashie'
27
+ spec.add_dependency 'addressable'
28
+
29
+ end
@@ -0,0 +1,12 @@
1
+ require "hash_spidey/version"
2
+
3
+ require 'hashie'
4
+ require 'spidey'
5
+ require_relative 'hash_spidey/hash_url_record'
6
+ require_relative 'hash_spidey/strategies/hash_store_strategy'
7
+
8
+ module HashSpidey
9
+ class AbstractSpider < Spidey::AbstractSpider
10
+ include HashSpidey::Strategies::HashStore
11
+ end
12
+ end
@@ -0,0 +1,35 @@
1
+ require 'hashie'
2
+ require 'mechanize'
3
+
4
+ module HashSpidey
5
+
6
+ class CrawlRecord < BasicObject
7
+
8
+ META_ATTS = %w(crawled_timestamp title header code response_header_charset meta_charset detected_encoding content_type)
9
+ attr_reader :crawled_timestamp
10
+
11
+ def initialize(obj, timestamp)
12
+ @crawled_timestamp = timestamp
13
+ @page_object = obj
14
+ end
15
+
16
+ def to_hash
17
+ msh = Hashie::Mash.new
18
+ META_ATTS.each do |att|
19
+ msh[att] = self.send(att) if self.respond_to?(att)
20
+ end
21
+ return msh
22
+ end
23
+
24
+ protected
25
+
26
+ def method_missing(name, *args, &block)
27
+ if @page_object.respond_to?(name)
28
+ @page_object.send(name, *args, &block)
29
+ else
30
+ super
31
+ end
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,73 @@
1
+ require 'addressable/uri'
2
+ require_relative 'crawl_record'
3
+
4
+ module HashSpidey
5
+ class HashUrlRecord
6
+
7
+ attr_reader :url, :code,
8
+ :initialized_timestamp, :crawled_timestamp, :recorded_timestamp,
9
+ :content, :handler, :spider, :handle_data,
10
+ :crawl_metadata
11
+
12
+
13
+ # convenience name for spidey
14
+ def self.spidey_handle(url, handler, spider, opts)
15
+ mash_opts = Hashie::Mash.new opts
16
+ mash_opts.spider = spider
17
+ mash_opts.handler = handler
18
+
19
+ return HashUrlRecord.new url, mash_opts
20
+ end
21
+
22
+ def initialize(url, opts={})
23
+ @url = url
24
+ @addressable_uri = Addressable::URI.parse(@url)
25
+ @initialized_timestamp = Time.now
26
+
27
+ mash_opts = Hashie::Mash.new(opts)
28
+ @spider = mash_opts.delete :spider
29
+ @handler = mash_opts.delete :handler
30
+ @handle_data = mash_opts.delete :handle_data # not sure if needed?...
31
+ end
32
+
33
+
34
+ def record_content(ct)
35
+ @content = ct
36
+ @recorded_timestamp = Time.now
37
+ end
38
+
39
+ # saves data related
40
+ def mark_as_crawled(page_obj={})
41
+ @crawled_timestamp = Time.now
42
+ # do something with mechanized page object
43
+ @crawl_metadata = HashSpidey::CrawlRecord.new(page_obj, @crawled_timestamp)
44
+ end
45
+
46
+ def recorded?
47
+ !(@recorded_timestamp.nil?)
48
+ end
49
+
50
+ def crawled?
51
+ !(crawled_timestamp.nil?)
52
+ end
53
+
54
+
55
+ ## this is just an alias
56
+
57
+ # obvious smells
58
+ def collected_timestamp; @recorded_timestamp; end
59
+ def header; @crawl_metadata.header unless @crawl_metadata.nil? ; end
60
+ def code; @crawl_metadata.code unless @crawl_metadata.nil? ; end
61
+
62
+ #### url inspection methods
63
+ [:host, :port, :query, :scheme, :path ].each do |foo|
64
+ define_method foo do
65
+ @addressable_uri.send foo
66
+ end
67
+ end
68
+
69
+ def query_values
70
+ @addressable_uri.query_values
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,112 @@
1
+ module HashSpidey
2
+ module Strategies
3
+
4
+ module HashStore
5
+
6
+ def initialize(attrs = {})
7
+ @url_collection = {}
8
+ @error_collection = []
9
+
10
+ super(attrs)
11
+ end
12
+
13
+ #### process strategies
14
+
15
+
16
+ ## conveinence methods
17
+ def crawls
18
+ @url_collection.select{|k,v| v.crawled?}
19
+ end
20
+
21
+
22
+ def uncrawled
23
+ @url_collection.reject{|k,v| v.crawled?}
24
+ end
25
+
26
+ def records
27
+ @url_collection.select{|k,v| v.recorded?}
28
+ end
29
+
30
+ def process_crawl(url, page)
31
+ h_url = @url_collection[url]
32
+ h_url.mark_as_crawled(page)
33
+ end
34
+
35
+
36
+ def crawl(options = {})
37
+ @crawl_started_at = Time.now
38
+ @until = Time.now + options[:crawl_for] if options[:crawl_for]
39
+
40
+ i = 0
41
+ each_url do |url, handler, default_data|
42
+ break if options[:max_urls] && i >= options[:max_urls]
43
+ begin
44
+ page = agent.get(url)
45
+ Spidey.logger.info "Handling #{url.inspect}"
46
+ process_crawl(url, page)
47
+ send handler, page, default_data
48
+ rescue => ex
49
+ add_error url: url, handler: handler, error: ex
50
+ end
51
+ sleep request_interval if request_interval > 0
52
+ i += 1
53
+ end
54
+ end
55
+
56
+
57
+ def handle(url, handler, handle_data = {})
58
+ Spidey.logger.info "Queueing #{url.inspect[0..200]}..."
59
+
60
+ spider_name = self.class.name
61
+ @url_collection[url] ||= HashUrlRecord.spidey_handle( url, handler, spider_name, handle_data )
62
+ end
63
+
64
+ # expects @url_collection to have :url, but if not, creates new HashUrlRecord
65
+ def record(data_hashie)
66
+ url = data_hashie.url
67
+ h_url = @url_collection[url] || HashUrlRecord.new(url)
68
+
69
+ # set the content and record_timestamp of the HashUrlRecord
70
+ h_url.record_content(data_hashie.content)
71
+
72
+ # reassign, update collection
73
+ @url_collection[url] = h_url
74
+ end
75
+
76
+
77
+ # wrapper around #record
78
+ def record_page(page, default_data={})
79
+ msh = Hashie::Mash.new(default_data)
80
+ msh.url = page.uri.to_s
81
+ msh.content = page.content
82
+
83
+ record(msh)
84
+ end
85
+
86
+ def each_url(&block)
87
+ while h_url = get_next_url_hash
88
+ yield h_url.url, h_url.handler, h_url.handle_data
89
+ end
90
+ end
91
+
92
+ protected
93
+
94
+ def add_error(attrs)
95
+ @error_collection << attrs
96
+ Spidey.logger.error "Error on #{attrs[:url]}. #{attrs[:error].class}: #{attrs[:error].message}"
97
+ end
98
+
99
+
100
+ private
101
+
102
+ def get_next_url_hash
103
+ return nil if (@until && Time.now >= @until) # exceeded time bound
104
+
105
+ # uncrawled is a filtered collection
106
+ uncrawled.values.first
107
+ end
108
+
109
+
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,3 @@
1
+ module HashSpidey
2
+ VERSION = "0.0.1"
3
+ end
data/spec/spec.rake ADDED
@@ -0,0 +1,17 @@
1
+ begin
2
+ require 'rspec/core/rake_task'
3
+
4
+ spec_tasks = Dir['spec/*/'].map { |d| File.basename(d) }
5
+
6
+ spec_tasks.each do |folder|
7
+ RSpec::Core::RakeTask.new("spec:#{folder}") do |t|
8
+ t.pattern = "./spec/#{folder}/**/*_spec.rb"
9
+ t.rspec_opts = %w(-fs --color)
10
+ end
11
+ end
12
+
13
+ desc "Run complete application spec suite"
14
+ task 'spec' => spec_tasks.map { |f| "spec:#{f}" }
15
+ rescue LoadError
16
+ puts "RSpec is not part of this bundle, skip specs."
17
+ end
@@ -0,0 +1,15 @@
1
+ require 'hash_spidey'
2
+ require 'fakeweb'
3
+
4
+ RSpec.configure do |config|
5
+ config.filter_run_excluding :skip => true
6
+ config.formatter = :documentation # :progress, :html, :textmate
7
+ config.fail_fast = true
8
+ config.before(:each) do
9
+ end
10
+
11
+ config.after(:each) do
12
+ end
13
+ end
14
+
15
+
@@ -0,0 +1,98 @@
1
+ require 'spec_helper'
2
+
3
+ describe HashSpidey::Strategies::HashStore do
4
+
5
+ before(:each) do
6
+
7
+ end
8
+
9
+
10
+ class TestSpider < HashSpidey::AbstractSpider
11
+ DEFAULT_REQUEST_INTERVAL = 0.001
12
+
13
+ include HashSpidey::Strategies::HashStore
14
+ def process_size(npage, data={})
15
+ npage.inspect
16
+ end
17
+
18
+ end
19
+
20
+ context 'generic #handle' do
21
+
22
+ before(:each) do
23
+ FakeWeb.register_uri(:get, "http://www.example.com/", :body => "Hello World", code: 200,
24
+ "content-type"=>"text/html; charset=UTF-8"
25
+ )
26
+ @spider = TestSpider.new request_interval: 0
27
+ @spider.handle "http://www.example.com/", :process_size
28
+ @spider.crawl
29
+ end
30
+
31
+ describe '#crawls' do
32
+ it 'should only add to #crawls' do
33
+ expect( @spider.crawls.count ).to eq 1
34
+ expect( @spider.records.count ).to eq 0
35
+ end
36
+
37
+ it 'should update #crawled_timestamp' do
38
+ @crawled_url = @spider.crawls.values.first
39
+ expect( @crawled_url.url ).to eq 'http://www.example.com/'
40
+ expect( @crawled_url.crawled_timestamp > @crawled_url.initialized_timestamp).to be_true
41
+ end
42
+
43
+ it 'should have #crawls act as a Hash' do
44
+ expect( @spider.crawls['http://www.example.com/'].url).to eq 'http://www.example.com/'
45
+ end
46
+
47
+ it "should not add duplicate URLs" do
48
+ @spider.handle "http://www.example.com/", :process_something_else # second time
49
+ expect( @spider.crawls.count ).to eq 1
50
+ end
51
+
52
+ context '@crawl_record' do
53
+
54
+ before(:each) do
55
+ @crawled_url = @spider.crawls["http://www.example.com/"]
56
+ end
57
+
58
+ it 'should respond to #code' do
59
+ expect(@crawled_url.code).to eq '200'
60
+ end
61
+
62
+ it 'should respond to header#content-type' do
63
+ expect(@crawled_url.header['content-type']).to eq "text/html; charset=UTF-8"
64
+ end
65
+ end
66
+ end
67
+
68
+
69
+
70
+ end
71
+
72
+
73
+ context 'generic #record' do
74
+ describe '#records' do
75
+ before(:each) do
76
+
77
+ @data = Hashie::Mash.new url: 'http://www.example.com/', content: 'Hello World'
78
+ @spider = TestSpider.new request_interval: 0
79
+ @spider.record @data
80
+ end
81
+
82
+ it "should add to records" do
83
+ expect(@spider.records.count).to eq 1
84
+ expect(@spider.records['http://www.example.com/'].content).to eq 'Hello World'
85
+ end
86
+
87
+ it 'should update existing result' do
88
+ @spider.record Hashie::Mash.new url: 'http://www.example.com/', content: 'Bye World'
89
+ expect(@spider.records['http://www.example.com/'].content).to eq 'Bye World'
90
+ expect(@spider.records.count).to eq 1
91
+ end
92
+ end
93
+ end
94
+
95
+
96
+
97
+
98
+ end
@@ -0,0 +1,73 @@
1
+ require 'spec_helper'
2
+
3
+ include HashSpidey
4
+ describe HashSpidey::HashUrlRecord do
5
+
6
+
7
+ context "delegate URI methods to Addressable::URI" do
8
+
9
+ before(:each) do
10
+ @hurl = HashUrlRecord.new 'http://www.example.com:80/stuff/?q=1&a=2&b=hello'
11
+ end
12
+
13
+
14
+ it 'should have #host' do
15
+ expect( @hurl.host ).to eq 'www.example.com'
16
+ end
17
+
18
+ it 'should have #port' do
19
+ expect( @hurl.port ).to eq 80
20
+ end
21
+
22
+ it 'should have #query' do
23
+ expect( @hurl.query ).to eq 'q=1&a=2&b=hello'
24
+ end
25
+
26
+ it 'should have #scheme' do
27
+ expect( @hurl.scheme ).to eq 'http'
28
+ end
29
+
30
+ it 'should have #path' do
31
+ expect( @hurl.path ).to eq '/stuff/'
32
+ end
33
+ end
34
+
35
+ context "state changes upon record and crawl" do
36
+ before(:each) do
37
+ @hurl = HashUrlRecord.new "http://www.example.com"
38
+ end
39
+
40
+ describe '#record_content' do
41
+ before(:each) do
42
+ @hurl.record_content 'hello'
43
+ end
44
+
45
+ it 'should set @recorded_timestamp' do
46
+ expect( @hurl.recorded_timestamp ).to be_within(2).of Time.now
47
+ end
48
+
49
+ it 'should set @content' do
50
+ expect( @hurl.content ).to eq 'hello'
51
+ end
52
+
53
+ it 'should have #recorded? be true' do
54
+ expect( @hurl.recorded?).to be_true
55
+ end
56
+ end
57
+
58
+ describe '#mark_as_crawled' do
59
+ before(:each) do
60
+ @hurl.mark_as_crawled
61
+ end
62
+
63
+ it 'should set @crawled_timestamp' do
64
+ expect( @hurl.crawled_timestamp ).to be_within(2).of Time.now
65
+ end
66
+
67
+ it 'should have #crawled? be true' do
68
+ expect( @hurl.crawled?).to be_true
69
+ end
70
+ end
71
+ end
72
+
73
+ end
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hash_spidey
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - dannguyen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-06-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: spidey
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '0.1'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '0.1'
62
+ - !ruby/object:Gem::Dependency
63
+ name: hashie
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: addressable
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: An implementation of joeyAghion's Spidey class at Artsy
95
+ email:
96
+ - dansonguyen@gmail.com
97
+ executables: []
98
+ extensions: []
99
+ extra_rdoc_files: []
100
+ files:
101
+ - .gitignore
102
+ - Gemfile
103
+ - LICENSE.txt
104
+ - README.md
105
+ - Rakefile
106
+ - hash_spidey.gemspec
107
+ - lib/hash_spidey.rb
108
+ - lib/hash_spidey/crawl_record.rb
109
+ - lib/hash_spidey/hash_url_record.rb
110
+ - lib/hash_spidey/strategies/hash_store_strategy.rb
111
+ - lib/hash_spidey/version.rb
112
+ - spec/spec.rake
113
+ - spec/spec_helper.rb
114
+ - spec/spiders/hash_store_strategy_spec.rb
115
+ - spec/unit/hash_url_record_spec.rb
116
+ homepage: http://github.com/dannguyen
117
+ licenses:
118
+ - MIT
119
+ post_install_message:
120
+ rdoc_options: []
121
+ require_paths:
122
+ - lib
123
+ required_ruby_version: !ruby/object:Gem::Requirement
124
+ none: false
125
+ requirements:
126
+ - - ! '>='
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ! '>='
133
+ - !ruby/object:Gem::Version
134
+ version: '0'
135
+ requirements: []
136
+ rubyforge_project:
137
+ rubygems_version: 1.8.23
138
+ signing_key:
139
+ specification_version: 3
140
+ summary: Uses a Hash object to store crawling process, which it can then dump to an
141
+ external store
142
+ test_files:
143
+ - spec/spec.rake
144
+ - spec/spec_helper.rb
145
+ - spec/spiders/hash_store_strategy_spec.rb
146
+ - spec/unit/hash_url_record_spec.rb