scrapers 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.rspec-example +2 -0
  4. data/.watchr +27 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +6 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +32 -0
  9. data/Rakefile +1 -0
  10. data/lib/scrapers/allrecipes.rb +49 -0
  11. data/lib/scrapers/discoverynews.rb +28 -0
  12. data/lib/scrapers/download.rb +37 -0
  13. data/lib/scrapers/gocomics.rb +43 -0
  14. data/lib/scrapers/imgur.rb +56 -0
  15. data/lib/scrapers/nasa_apod.rb +60 -0
  16. data/lib/scrapers/version.rb +7 -0
  17. data/lib/scrapers.rb +9 -0
  18. data/scrapers.gemspec +29 -0
  19. data/spec/scrapers/allrecipes_spec.rb +29 -0
  20. data/spec/scrapers/discoverynews_spec.rb +39 -0
  21. data/spec/scrapers/download_spec.rb +36 -0
  22. data/spec/scrapers/gocomics_spec.rb +46 -0
  23. data/spec/scrapers/imgur_spec.rb +31 -0
  24. data/spec/scrapers/nasa_apod_spec.rb +30 -0
  25. data/spec/scrapers_spec.rb +9 -0
  26. data/spec/spec_helper.rb +31 -0
  27. data/vcr_cassettes/allrecipes_morning-glory-muffins-i.yml +1047 -0
  28. data/vcr_cassettes/disconews_history-of-space.yml +385 -0
  29. data/vcr_cassettes/download_cassette.yml +431 -0
  30. data/vcr_cassettes/exampleatspecscrapersnasaapodspecrb16_cassette.yml +326 -0
  31. data/vcr_cassettes/exampleatspecscrapersnasaapodspecrb21_cassette.yml +326 -0
  32. data/vcr_cassettes/exampleatspecscrapersnasaapodspecrb22_cassette.yml +312 -0
  33. data/vcr_cassettes/gocomics_nonsequitur.yml +336 -0
  34. data/vcr_cassettes/shouldincludecontentencoded_cassette.yml +326 -0
  35. data/vcr_cassettes/shouldincludedescription_cassette.yml +326 -0
  36. data/vcr_cassettes/shouldincludeguid_cassette.yml +326 -0
  37. data/vcr_cassettes/shouldincludelink_cassette.yml +326 -0
  38. data/vcr_cassettes/shouldincludepubDate_cassette.yml +326 -0
  39. data/vcr_cassettes/shouldincludetitle_cassette.yml +326 -0
  40. metadata +203 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: eef6a353955404ad8beaf88e4db8ab4b0a4f3cec
4
+ data.tar.gz: 87ec094c87d315640c1ebb274b6f334b6dd73646
5
+ SHA512:
6
+ metadata.gz: ece9775696f757216e7c9fd9c2ef2a6ff4a1bd3a9dcbf1fd754caed7c751bdeb1f055a2ea6694b216dfaf61f7a1678cedc777fd0281d660d73e68f34399f2c90
7
+ data.tar.gz: 370b23a91c38f349a8f02a6fe4ed8770bb58017b43876ad736c449087da7b303aae605f2a314e76b6489bdfc89aaceaa2588f94a6e6e1d7b3e0423c50f2c5f63
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .rspec
data/.rspec-example ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/.watchr ADDED
@@ -0,0 +1,27 @@
1
+ # -*- ruby -*-
2
+ def run_spec(file)
3
+ unless File.exist?(file)
4
+ puts "#{file} does not exist"
5
+ return
6
+ end
7
+
8
+ puts "Running #{file}"
9
+ system "bundle exec rspec #{file}"
10
+ puts
11
+ end
12
+
13
+ watch("^spec/*_spec.rb$") do |match|
14
+ run_spec match[0]
15
+ end
16
+
17
+ watch("^spec/.*/*_spec.rb$") do |match|
18
+ run_spec match[0]
19
+ end
20
+
21
+ watch("^lib/(.*).rb$") do |match|
22
+ run_spec "spec/#{match[1]}_spec.rb"
23
+ end
24
+
25
+ watch("^lib/(.*/.*).rb$") do |match|
26
+ run_spec "spec/#{match[1]}_spec.rb"
27
+ end
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in scrapers.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,6 @@
1
+ guard :rspec do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+ end
6
+
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Tamara Temple
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,32 @@
1
+ # Scrapers
2
+
3
+ A library of web site scrapers utilizing mechanize and other goodies. Helpful in gathering images, moving things, saving things, etc.
4
+
5
+ LICENSE:: MIT
6
+ WEBSITE:: http://github.com/tamouse/scrapers
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'scrapers'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install scrapers
21
+
22
+ ## Usage
23
+
24
+ See the various RDoc for explanation of each item.
25
+
26
+ ## Contributing
27
+
28
+ 1. Fork it
29
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
30
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
31
+ 4. Push to the branch (`git push origin my-new-feature`)
32
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,49 @@
1
+ require 'mechanize'
2
+
3
+
4
+ module Scrapers
5
+
6
+ module AllRecipes
7
+
8
+ def self.scrape(url)
9
+
10
+ results = Hash.new
11
+
12
+ Scrapers.agent.get(url).tap do |page|
13
+ results[:url] = page.uri.to_s
14
+ results[:title] = page.title.strip
15
+ results[:ingredients] = scrape_ingredients(page)
16
+ results[:directions] = scrape_directions(page)
17
+ results[:photo] = scrape_photo(page)
18
+ end
19
+
20
+ results
21
+
22
+ end
23
+
24
+ def self.scrape_ingredients(page)
25
+ page.
26
+ search("ul.ingredient-wrap").
27
+ search(".//li").
28
+ map do |i|
29
+ i.text.gsub(/[[:space:]]+/,' ').sub(/^/,'*')
30
+ end
31
+ end
32
+
33
+ def self.scrape_directions(page)
34
+ page.
35
+ search("div.directLeft").first.
36
+ search("li").
37
+ map do |i|
38
+ i.text.gsub(/[[:space:]]+/,' ').sub(/^/,'# ')
39
+ end
40
+ end
41
+
42
+ def self.scrape_photo(page)
43
+ photo = page.search("img#imgPhoto").first
44
+ Hash[photo.attributes.map{|k,v| [k,v.value]}]
45
+ end
46
+
47
+ end
48
+
49
+ end
@@ -0,0 +1,28 @@
1
+ =begin rdoc
2
+
3
+ = DISCOVERYNEWS.RB
4
+
5
+ *Author*:: Tamara Temple <tamouse@gmail.com>
6
+ *Since*:: 2013-06-15
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ Scraper for disco news pictures of the week
11
+
12
+ =end
13
+
14
+ require 'mechanize'
15
+
16
+ module Scrapers
17
+
18
+ module DiscoNews
19
+
20
+ def self.disco_downloads(url)
21
+ @url = url.clone
22
+ @page = Scrapers.agent.get(url)
23
+ images = @page.images_with(:class => "media-hero").map(&:src)
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,37 @@
1
+ =begin rdoc
2
+
3
+ = DOWNLOAD.RB
4
+
5
+ *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
+ *Since*:: 2013-05-27
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ require 'mechanize'
13
+
14
+ module Scrapers
15
+
16
+ module Download
17
+
18
+ def self.download(url,dir=".")
19
+ Scrapers.agent.pluggable_parser.default = Mechanize::Download
20
+ @dir = validate_directory(dir)
21
+ dl = Scrapers.agent.get(url)
22
+ Dir.chdir(@dir) do |dir|
23
+ dl.save()
24
+ end
25
+ File.join(@dir,dl.filename)
26
+ end
27
+
28
+ def self.validate_directory(d)
29
+ STDERR.puts caller(0,1).first +
30
+ "d: #{d.inspect}. directory? #{File.directory?(d)}. writable? #{File.writable?(d)}"
31
+ raise "#{d} is not a writable directory!" unless File.directory?(d) and File.writable?(d)
32
+ d
33
+ end
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,43 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+
5
+ module Scrapers
6
+
7
+ module GoComics
8
+
9
+ GOCOMIC_URL = "http://www.gocomics.com/"
10
+
11
+ def self.scrape(comic)
12
+
13
+ results = Hash.new
14
+
15
+ url = URI.parse GOCOMIC_URL
16
+ url.path = "/#{comic}"
17
+
18
+ page = Nokogiri::HTML(open(url.to_s))
19
+ results[:url] = url.to_s
20
+ results[:title] = scrape_title(page)
21
+ results[:pubdate] = scrape_pubdate(page)
22
+ results[:img_src] = scrape_image_source(page)
23
+ results
24
+ end
25
+
26
+ def self.scrape_title(page)
27
+ page.at_css("title").content.strip.gsub(/[[:space:]]/,' ').squeeze(" ")
28
+ end
29
+
30
+ def self.scrape_pubdate(page)
31
+ Date.parse(page.at_css("ul.feature-nav > li").content).to_s
32
+ end
33
+
34
+ def self.scrape_image_source(page)
35
+ page.
36
+ at_css("p.feature_item").
37
+ at_css("img").
38
+ attr("src")
39
+ end
40
+
41
+ end
42
+
43
+ end
@@ -0,0 +1,56 @@
1
+ =begin rdoc
2
+
3
+ = IMGUR.RB
4
+
5
+ *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
+ *Since*:: 2013-05-27
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ module Scrapers
13
+
14
+ IMGUR_TEMPLATE="http://imgur.com/path"
15
+
16
+ class Imgur
17
+
18
+ attr_accessor :agent, :url, :download, :page
19
+
20
+ def initialize
21
+ @agent = Mechanize.new
22
+ @url = URI.parse(IMGUR_TEMPLATE)
23
+ @download = URI.parse(IMGUR_TEMPLATE)
24
+ end
25
+
26
+ def download_link(code)
27
+ make_url(code)
28
+ retrieve_page()
29
+ find_download()
30
+ @download.to_s
31
+ end
32
+
33
+ def make_url(imgur_code)
34
+ @url.path = "/#{imgur_code}"
35
+ end
36
+
37
+ def retrieve_page()
38
+ @page = @agent.get(@url.to_s)
39
+ end
40
+
41
+ def find_download(link_text=/Download/)
42
+ link = @page.link_with(:text => link_text)
43
+ raise "#{link_text} not found on #{@page.uri.to_s}" if link.nil?
44
+ @download.path = link.href
45
+ end
46
+
47
+ end
48
+
49
+ module_function
50
+
51
+ def imgur(url)
52
+ code = File.basename(url).sub(/\.[^.]+$/,'')
53
+ "http://imgur.com/download/#{code}/"
54
+ end
55
+
56
+ end
@@ -0,0 +1,60 @@
1
+ =begin rdoc
2
+
3
+ nasa_apod.rb -- oneline desc
4
+
5
+ Time-stamp: <2013-08-23 22:47:58 tamara>
6
+ Copyright (C) 2013 Tamara Temple Web Development
7
+ Author: Tamara Temple <tamouse@gmail.com>
8
+ License: MIT
9
+
10
+ == Discussion
11
+
12
+ NASA's Astronomy Picture of the Day is a great source for nice astro
13
+ photos and various other information. But it isn't something I
14
+ remember to go see every day, so I'd like it to drop in my in-box or
15
+ an evernote notebook. But the feed does not include the image, for
16
+ some ungodly reason, so I'm adding a scraper to grab the nice info off
17
+ the page including the photo.
18
+
19
+ =end
20
+
21
+
22
+ module Scrapers
23
+
24
+ module NasaApod
25
+
26
+ module_function
27
+
28
+ def scrape(url)
29
+ apod = Hash.new
30
+ unless url.nil?
31
+
32
+ Mechanize.start do |m|
33
+
34
+ m.get url
35
+
36
+ # APOD has a funky entry page, but we want the actual page
37
+ prev = m.current_page.link_with(:text => '<').href
38
+ m.get prev
39
+ canonical = m.current_page.link_with(:text => '>' ).href
40
+ m.get canonical
41
+
42
+ m.current_page.tap do |page|
43
+ apod[:title] = page.title.strip
44
+ apod[:link] = page.uri.to_s
45
+ apod[:description] = (page/("body")).text
46
+ apod[:pubDate] = page.response['date'].to_s
47
+ apod[:guid] = page.uri.to_s
48
+ apod[:content_encoded] = (page/("body")).to_html
49
+ end
50
+
51
+ end
52
+
53
+ end
54
+
55
+ apod
56
+ end
57
+
58
+ end
59
+
60
+ end
@@ -0,0 +1,7 @@
1
+ module Scrapers
2
+ VERSION = "0.2.0"
3
+ DESCRIPTION = "A library of web site scrapers utilizing mechanize and other goodies. Helpful in gathering images, moving things, saving things, etc."
4
+ SUMMARY = "Web site scrapers"
5
+ LICENSE = "MIT"
6
+ WEBSITE = "http://github.com/tamouse/scrapers"
7
+ end
data/lib/scrapers.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'mechanize'
2
+
3
+ Dir[File.join(File.expand_path('../', __FILE__),'**','*.rb')].each {|file| require file}
4
+
5
+ module Scrapers
6
+ def self.agent()
7
+ @agent ||= Mechanize.new
8
+ end
9
+ end
data/scrapers.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scrapers/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scrapers"
8
+ spec.version = Scrapers::VERSION
9
+ spec.authors = ["Tamara Temple"]
10
+ spec.email = ["tamouse@gmail.com"]
11
+ spec.description = Scrapers::DESCRIPTION
12
+ spec.summary = Scrapers::SUMMARY
13
+ spec.homepage = Scrapers::WEBSITE
14
+ spec.license = Scrapers::LICENSE
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_dependency "mechanize"
25
+ spec.add_development_dependency "guard"
26
+ spec.add_development_dependency "guard-rspec"
27
+ spec.add_development_dependency "webmock"
28
+ spec.add_development_dependency "vcr"
29
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ module Scrapers
4
+
5
+ describe AllRecipes do
6
+ it{should respond_to :scrape}
7
+ context "scraping" do
8
+ before(:all) do
9
+ @url = "http://allrecipes.com/recipe/morning-glory-muffins-i/detail.aspx"
10
+ @recipe = VCR.use_cassette('allrecipes.morning-glory-muffins-i') do
11
+ Scrapers::AllRecipes.scrape(@url)
12
+ end
13
+ end
14
+
15
+ it "retrieves a recipe" do
16
+ @recipe.should_not be_nil
17
+ end
18
+ it "should be a Hash" do
19
+ @recipe.should be_a(Hash)
20
+ end
21
+ %w{title url ingredients directions photo}.map(&:to_sym).each do |key|
22
+ it "should have key #{key}" do
23
+ @recipe.should have_key(key)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,39 @@
1
+ =begin rdoc
2
+
3
+ = DISCOVERYNEWS_SPEC.RB
4
+
5
+ *Author*:: Tamara Temple <tamouse@gmail.com>
6
+ *Since*:: 2013-06-15
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ require 'spec_helper'
13
+
14
+
15
+ module Scrapers
16
+
17
+ describe DiscoNews do
18
+ it {should respond_to(:disco_downloads)}
19
+
20
+ context "scraping" do
21
+ let(:url) {"http://news.discovery.com/space/history-of-space/stunning-space-photos-week-june-9-14-pictures-130614.htm"}
22
+ let(:images) do
23
+ VCR.use_cassette('disconews.history-of-space') do
24
+ Scrapers::DiscoNews.disco_downloads(url)
25
+ end
26
+ end
27
+
28
+ it "retrieves an array of images" do
29
+ images.should be_a(Array)
30
+ images.each do |i|
31
+ i.should =~ /^http:\/\/.*(jpe?g|png|gif)/
32
+ end
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+
@@ -0,0 +1,36 @@
1
+ =begin rdoc
2
+
3
+ = DOWNLOAD_SPEC.RB
4
+
5
+ *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
+ *Since*:: 2013-05-27
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ require 'spec_helper'
13
+ require 'tmpdir'
14
+
15
+ module Scrapers
16
+
17
+ describe Download do
18
+ it {Scrapers::Download.should respond_to :download}
19
+
20
+ context "download" do
21
+ before(:all) do
22
+ @url="http://imgur.com/download/v70StgA/%2Asnrrrrrrrrrrrf%21%2A"
23
+ VCR.use_cassette("download.cassette") do
24
+ @file = Scrapers::Download.download(@url,'tmp')
25
+ end
26
+ end
27
+
28
+ it "saves the file" do
29
+ @file.should =~ /.*snrrrrrrrrrrrf.*Imgur\.jpg/
30
+ File.exist?(@file).should be_true
31
+ end
32
+ end
33
+
34
+ end
35
+
36
+ end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+
3
+ module Scrapers
4
+
5
+ describe GoComics do
6
+ it{should respond_to :scrape}
7
+ context "scraping" do
8
+ before(:all) do
9
+ @comic_strip = 'nonsequitur'
10
+ @comic = VCR.use_cassette('gocomics.nonsequitur') do
11
+ Scrapers::GoComics.scrape(@comic_strip)
12
+ end
13
+ end
14
+
15
+ it "retrieves a comic" do
16
+ @comic.should_not be_nil
17
+ end
18
+ it "should be a Hash" do
19
+ @comic.should be_a(Hash)
20
+ end
21
+ %w{title url pubdate img_src}.map(&:to_sym).each do |key|
22
+ it "should have key #{key}" do
23
+ @comic.should have_key(key)
24
+ end
25
+ end
26
+ context "title" do
27
+ it{@comic[:title].should_not be_empty}
28
+ it{@comic[:title].should match /Non Sequitur Comic Strip on GoComics.com/}
29
+ end
30
+ context "url" do
31
+ it{@comic[:url].should_not be_empty}
32
+ it{@comic[:url].should match /www\.gocomics\.com\/nonsequitur/}
33
+ end
34
+ context "pubdate" do
35
+ it{@comic[:pubdate].should_not be_empty}
36
+ it{Date.parse(@comic[:pubdate]).should be_a(Date)}
37
+ end
38
+ context "img_src" do
39
+ it{@comic[:img_src].should_not be_empty}
40
+ it{URI.parse(@comic[:img_src]).should be_a(URI::HTTP)}
41
+ end
42
+
43
+ end
44
+ end
45
+ end
46
+
@@ -0,0 +1,31 @@
1
+ =begin rdoc
2
+
3
+ = IMGUR_SPEC.RB
4
+
5
+ *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
+ *Since*:: 2013-05-27
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ require 'spec_helper'
13
+
14
+ module Scrapers
15
+
16
+ describe "Scrapers" do
17
+ it {Scrapers.should respond_to(:imgur)}
18
+ end
19
+
20
+ describe "Fetch the download link" do
21
+ let(:url) {"http://imgur.com/v70StgA"}
22
+
23
+ it "should return the download link from a given url" do
24
+ Scrapers.imgur(url).should =~ %r{http://imgur.com/download/v70StgA/}
25
+ end
26
+
27
+ end
28
+
29
+
30
+
31
+ end
@@ -0,0 +1,30 @@
1
+
2
+ require 'spec_helper'
3
+
4
+
5
+ module Scrapers
6
+
7
+ describe NasaApod do
8
+ it {Scrapers::NasaApod.should respond_to :scrape}
9
+
10
+ context "scrape" do
11
+ let(:url){"http://apod.nasa.gov/apod/astropix.html"}
12
+ let(:apod_hash){
13
+ VCR.use_cassette("#{example.description.gsub(/[^-[:alnum:]]/,'')}.cassette", :record => :new_episodes) do
14
+ Scrapers::NasaApod.scrape(url)
15
+ end}
16
+ it {apod_hash.should be_a(Hash)}
17
+ %w{title link description pubDate guid content_encoded}.map(&:to_sym).each do |attr|
18
+ it "should include #{attr}" do
19
+ apod_hash.keys.should include attr
20
+ end
21
+ it {apod_hash[attr].should_not be_nil}
22
+ it {apod_hash[attr].should be_a(String)}
23
+
24
+ end
25
+
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ module Scrapers
4
+
5
+ describe Scrapers do
6
+ it{should respond_to(:agent)}
7
+ end
8
+
9
+ end