scrapers 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.rspec-example +2 -0
  4. data/.watchr +27 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +6 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +32 -0
  9. data/Rakefile +1 -0
  10. data/lib/scrapers/allrecipes.rb +49 -0
  11. data/lib/scrapers/discoverynews.rb +28 -0
  12. data/lib/scrapers/download.rb +37 -0
  13. data/lib/scrapers/gocomics.rb +43 -0
  14. data/lib/scrapers/imgur.rb +56 -0
  15. data/lib/scrapers/nasa_apod.rb +60 -0
  16. data/lib/scrapers/version.rb +7 -0
  17. data/lib/scrapers.rb +9 -0
  18. data/scrapers.gemspec +29 -0
  19. data/spec/scrapers/allrecipes_spec.rb +29 -0
  20. data/spec/scrapers/discoverynews_spec.rb +39 -0
  21. data/spec/scrapers/download_spec.rb +36 -0
  22. data/spec/scrapers/gocomics_spec.rb +46 -0
  23. data/spec/scrapers/imgur_spec.rb +31 -0
  24. data/spec/scrapers/nasa_apod_spec.rb +30 -0
  25. data/spec/scrapers_spec.rb +9 -0
  26. data/spec/spec_helper.rb +31 -0
  27. data/vcr_cassettes/allrecipes_morning-glory-muffins-i.yml +1047 -0
  28. data/vcr_cassettes/disconews_history-of-space.yml +385 -0
  29. data/vcr_cassettes/download_cassette.yml +431 -0
  30. data/vcr_cassettes/exampleatspecscrapersnasaapodspecrb16_cassette.yml +326 -0
  31. data/vcr_cassettes/exampleatspecscrapersnasaapodspecrb21_cassette.yml +326 -0
  32. data/vcr_cassettes/exampleatspecscrapersnasaapodspecrb22_cassette.yml +312 -0
  33. data/vcr_cassettes/gocomics_nonsequitur.yml +336 -0
  34. data/vcr_cassettes/shouldincludecontentencoded_cassette.yml +326 -0
  35. data/vcr_cassettes/shouldincludedescription_cassette.yml +326 -0
  36. data/vcr_cassettes/shouldincludeguid_cassette.yml +326 -0
  37. data/vcr_cassettes/shouldincludelink_cassette.yml +326 -0
  38. data/vcr_cassettes/shouldincludepubDate_cassette.yml +326 -0
  39. data/vcr_cassettes/shouldincludetitle_cassette.yml +326 -0
  40. metadata +203 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: eef6a353955404ad8beaf88e4db8ab4b0a4f3cec
4
+ data.tar.gz: 87ec094c87d315640c1ebb274b6f334b6dd73646
5
+ SHA512:
6
+ metadata.gz: ece9775696f757216e7c9fd9c2ef2a6ff4a1bd3a9dcbf1fd754caed7c751bdeb1f055a2ea6694b216dfaf61f7a1678cedc777fd0281d660d73e68f34399f2c90
7
+ data.tar.gz: 370b23a91c38f349a8f02a6fe4ed8770bb58017b43876ad736c449087da7b303aae605f2a314e76b6489bdfc89aaceaa2588f94a6e6e1d7b3e0423c50f2c5f63
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .rspec
data/.rspec-example ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/.watchr ADDED
@@ -0,0 +1,27 @@
1
+ # -*- ruby -*-
2
+ def run_spec(file)
3
+ unless File.exist?(file)
4
+ puts "#{file} does not exist"
5
+ return
6
+ end
7
+
8
+ puts "Running #{file}"
9
+ system "bundle exec rspec #{file}"
10
+ puts
11
+ end
12
+
13
+ watch("^spec/*_spec.rb$") do |match|
14
+ run_spec match[0]
15
+ end
16
+
17
+ watch("^spec/.*/*_spec.rb$") do |match|
18
+ run_spec match[0]
19
+ end
20
+
21
+ watch("^lib/(.*).rb$") do |match|
22
+ run_spec "spec/#{match[1]}_spec.rb"
23
+ end
24
+
25
+ watch("^lib/(.*/.*).rb$") do |match|
26
+ run_spec "spec/#{match[1]}_spec.rb"
27
+ end
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in scrapers.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,6 @@
1
+ guard :rspec do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+ end
6
+
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Tamara Temple
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,32 @@
1
+ # Scrapers
2
+
3
+ A library of web site scrapers utilizing mechanize and other goodies. Helpful in gathering images, moving things, saving things, etc.
4
+
5
+ LICENSE:: MIT
6
+ WEBSITE:: http://github.com/tamouse/scrapers
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'scrapers'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install scrapers
21
+
22
+ ## Usage
23
+
24
+ See the various RDoc for explanation of each item.
25
+
26
+ ## Contributing
27
+
28
+ 1. Fork it
29
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
30
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
31
+ 4. Push to the branch (`git push origin my-new-feature`)
32
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,49 @@
1
+ require 'mechanize'
2
+
3
+
4
+ module Scrapers
5
+
6
+ module AllRecipes
7
+
8
+ def self.scrape(url)
9
+
10
+ results = Hash.new
11
+
12
+ Scrapers.agent.get(url).tap do |page|
13
+ results[:url] = page.uri.to_s
14
+ results[:title] = page.title.strip
15
+ results[:ingredients] = scrape_ingredients(page)
16
+ results[:directions] = scrape_directions(page)
17
+ results[:photo] = scrape_photo(page)
18
+ end
19
+
20
+ results
21
+
22
+ end
23
+
24
+ def self.scrape_ingredients(page)
25
+ page.
26
+ search("ul.ingredient-wrap").
27
+ search(".//li").
28
+ map do |i|
29
+ i.text.gsub(/[[:space:]]+/,' ').sub(/^/,'*')
30
+ end
31
+ end
32
+
33
+ def self.scrape_directions(page)
34
+ page.
35
+ search("div.directLeft").first.
36
+ search("li").
37
+ map do |i|
38
+ i.text.gsub(/[[:space:]]+/,' ').sub(/^/,'# ')
39
+ end
40
+ end
41
+
42
+ def self.scrape_photo(page)
43
+ photo = page.search("img#imgPhoto").first
44
+ Hash[photo.attributes.map{|k,v| [k,v.value]}]
45
+ end
46
+
47
+ end
48
+
49
+ end
@@ -0,0 +1,28 @@
1
+ =begin rdoc
2
+
3
+ = DISCOVERYNEWS.RB
4
+
5
+ *Author*:: Tamara Temple <tamouse@gmail.com>
6
+ *Since*:: 2013-06-15
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ Scraper for disco news pictures of the week
11
+
12
+ =end
13
+
14
+ require 'mechanize'
15
+
16
+ module Scrapers
17
+
18
+ module DiscoNews
19
+
20
+ def self.disco_downloads(url)
21
+ @url = url.clone
22
+ @page = Scrapers.agent.get(url)
23
+ images = @page.images_with(:class => "media-hero").map(&:src)
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,37 @@
1
+ =begin rdoc
2
+
3
+ = DOWNLOAD.RB
4
+
5
+ *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
+ *Since*:: 2013-05-27
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ require 'mechanize'
13
+
14
+ module Scrapers
15
+
16
+ module Download
17
+
18
+ def self.download(url,dir=".")
19
+ Scrapers.agent.pluggable_parser.default = Mechanize::Download
20
+ @dir = validate_directory(dir)
21
+ dl = Scrapers.agent.get(url)
22
+ Dir.chdir(@dir) do |dir|
23
+ dl.save()
24
+ end
25
+ File.join(@dir,dl.filename)
26
+ end
27
+
28
+ def self.validate_directory(d)
29
+ STDERR.puts caller(0,1).first +
30
+ "d: #{d.inspect}. directory? #{File.directory?(d)}. writable? #{File.writable?(d)}"
31
+ raise "#{d} is not a writable directory!" unless File.directory?(d) and File.writable?(d)
32
+ d
33
+ end
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,43 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+
5
+ module Scrapers
6
+
7
+ module GoComics
8
+
9
+ GOCOMIC_URL = "http://www.gocomics.com/"
10
+
11
+ def self.scrape(comic)
12
+
13
+ results = Hash.new
14
+
15
+ url = URI.parse GOCOMIC_URL
16
+ url.path = "/#{comic}"
17
+
18
+ page = Nokogiri::HTML(open(url.to_s))
19
+ results[:url] = url.to_s
20
+ results[:title] = scrape_title(page)
21
+ results[:pubdate] = scrape_pubdate(page)
22
+ results[:img_src] = scrape_image_source(page)
23
+ results
24
+ end
25
+
26
+ def self.scrape_title(page)
27
+ page.at_css("title").content.strip.gsub(/[[:space:]]/,' ').squeeze(" ")
28
+ end
29
+
30
+ def self.scrape_pubdate(page)
31
+ Date.parse(page.at_css("ul.feature-nav > li").content).to_s
32
+ end
33
+
34
+ def self.scrape_image_source(page)
35
+ page.
36
+ at_css("p.feature_item").
37
+ at_css("img").
38
+ attr("src")
39
+ end
40
+
41
+ end
42
+
43
+ end
@@ -0,0 +1,56 @@
1
+ =begin rdoc
2
+
3
+ = IMGUR.RB
4
+
5
+ *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
+ *Since*:: 2013-05-27
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ module Scrapers
13
+
14
+ IMGUR_TEMPLATE="http://imgur.com/path"
15
+
16
+ class Imgur
17
+
18
+ attr_accessor :agent, :url, :download, :page
19
+
20
+ def initialize
21
+ @agent = Mechanize.new
22
+ @url = URI.parse(IMGUR_TEMPLATE)
23
+ @download = URI.parse(IMGUR_TEMPLATE)
24
+ end
25
+
26
+ def download_link(code)
27
+ make_url(code)
28
+ retrieve_page()
29
+ find_download()
30
+ @download.to_s
31
+ end
32
+
33
+ def make_url(imgur_code)
34
+ @url.path = "/#{imgur_code}"
35
+ end
36
+
37
+ def retrieve_page()
38
+ @page = @agent.get(@url.to_s)
39
+ end
40
+
41
+ def find_download(link_text=/Download/)
42
+ link = @page.link_with(:text => link_text)
43
+ raise "#{link_text} not found on #{@page.uri.to_s}" if link.nil?
44
+ @download.path = link.href
45
+ end
46
+
47
+ end
48
+
49
+ module_function
50
+
51
+ def imgur(url)
52
+ code = File.basename(url).sub(/\.[^.]+$/,'')
53
+ "http://imgur.com/download/#{code}/"
54
+ end
55
+
56
+ end
@@ -0,0 +1,60 @@
1
+ =begin rdoc
2
+
3
+ nasa_apod.rb -- oneline desc
4
+
5
+ Time-stamp: <2013-08-23 22:47:58 tamara>
6
+ Copyright (C) 2013 Tamara Temple Web Development
7
+ Author: Tamara Temple <tamouse@gmail.com>
8
+ License: MIT
9
+
10
+ == Discussion
11
+
12
+ NASA's Astronomy Picture of the Day is a great source for nice astro
13
+ photos and various other information. But it isn't something I
14
+ remember to go see every day, so I'd like it to drop in my in-box or
15
+ an evernote notebook. But the feed does not include the image, for
16
+ some ungodly reason, so I'm adding a scraper to grab the nice info off
17
+ the page including the photo.
18
+
19
+ =end
20
+
21
+
22
+ module Scrapers
23
+
24
+ module NasaApod
25
+
26
+ module_function
27
+
28
+ def scrape(url)
29
+ apod = Hash.new
30
+ unless url.nil?
31
+
32
+ Mechanize.start do |m|
33
+
34
+ m.get url
35
+
36
+ # APOD has a funky entry page, but we want the actual page
37
+ prev = m.current_page.link_with(:text => '<').href
38
+ m.get prev
39
+ canonical = m.current_page.link_with(:text => '>' ).href
40
+ m.get canonical
41
+
42
+ m.current_page.tap do |page|
43
+ apod[:title] = page.title.strip
44
+ apod[:link] = page.uri.to_s
45
+ apod[:description] = (page/("body")).text
46
+ apod[:pubDate] = page.response['date'].to_s
47
+ apod[:guid] = page.uri.to_s
48
+ apod[:content_encoded] = (page/("body")).to_html
49
+ end
50
+
51
+ end
52
+
53
+ end
54
+
55
+ apod
56
+ end
57
+
58
+ end
59
+
60
+ end
@@ -0,0 +1,7 @@
1
+ module Scrapers
2
+ VERSION = "0.2.0"
3
+ DESCRIPTION = "A library of web site scrapers utilizing mechanize and other goodies. Helpful in gathering images, moving things, saving things, etc."
4
+ SUMMARY = "Web site scrapers"
5
+ LICENSE = "MIT"
6
+ WEBSITE = "http://github.com/tamouse/scrapers"
7
+ end
data/lib/scrapers.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'mechanize'
2
+
3
+ Dir[File.join(File.expand_path('../', __FILE__),'**','*.rb')].each {|file| require file}
4
+
5
+ module Scrapers
6
+ def self.agent()
7
+ @agent ||= Mechanize.new
8
+ end
9
+ end
data/scrapers.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scrapers/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scrapers"
8
+ spec.version = Scrapers::VERSION
9
+ spec.authors = ["Tamara Temple"]
10
+ spec.email = ["tamouse@gmail.com"]
11
+ spec.description = Scrapers::DESCRIPTION
12
+ spec.summary = Scrapers::SUMMARY
13
+ spec.homepage = Scrapers::WEBSITE
14
+ spec.license = Scrapers::LICENSE
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_dependency "mechanize"
25
+ spec.add_development_dependency "guard"
26
+ spec.add_development_dependency "guard-rspec"
27
+ spec.add_development_dependency "webmock"
28
+ spec.add_development_dependency "vcr"
29
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ module Scrapers
4
+
5
+ describe AllRecipes do
6
+ it{should respond_to :scrape}
7
+ context "scraping" do
8
+ before(:all) do
9
+ @url = "http://allrecipes.com/recipe/morning-glory-muffins-i/detail.aspx"
10
+ @recipe = VCR.use_cassette('allrecipes.morning-glory-muffins-i') do
11
+ Scrapers::AllRecipes.scrape(@url)
12
+ end
13
+ end
14
+
15
+ it "retrieves a recipe" do
16
+ @recipe.should_not be_nil
17
+ end
18
+ it "should be a Hash" do
19
+ @recipe.should be_a(Hash)
20
+ end
21
+ %w{title url ingredients directions photo}.map(&:to_sym).each do |key|
22
+ it "should have key #{key}" do
23
+ @recipe.should have_key(key)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,39 @@
1
+ =begin rdoc
2
+
3
+ = DISCOVERYNEWS_SPEC.RB
4
+
5
+ *Author*:: Tamara Temple <tamouse@gmail.com>
6
+ *Since*:: 2013-06-15
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ require 'spec_helper'
13
+
14
+
15
+ module Scrapers
16
+
17
+ describe DiscoNews do
18
+ it {should respond_to(:disco_downloads)}
19
+
20
+ context "scraping" do
21
+ let(:url) {"http://news.discovery.com/space/history-of-space/stunning-space-photos-week-june-9-14-pictures-130614.htm"}
22
+ let(:images) do
23
+ VCR.use_cassette('disconews.history-of-space') do
24
+ Scrapers::DiscoNews.disco_downloads(url)
25
+ end
26
+ end
27
+
28
+ it "retrieves an array of images" do
29
+ images.should be_a(Array)
30
+ images.each do |i|
31
+ i.should =~ /^http:\/\/.*(jpe?g|png|gif)/
32
+ end
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+
@@ -0,0 +1,36 @@
1
+ =begin rdoc
2
+
3
+ = DOWNLOAD_SPEC.RB
4
+
5
+ *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
+ *Since*:: 2013-05-27
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ require 'spec_helper'
13
+ require 'tmpdir'
14
+
15
+ module Scrapers
16
+
17
+ describe Download do
18
+ it {Scrapers::Download.should respond_to :download}
19
+
20
+ context "download" do
21
+ before(:all) do
22
+ @url="http://imgur.com/download/v70StgA/%2Asnrrrrrrrrrrrf%21%2A"
23
+ VCR.use_cassette("download.cassette") do
24
+ @file = Scrapers::Download.download(@url,'tmp')
25
+ end
26
+ end
27
+
28
+ it "saves the file" do
29
+ @file.should =~ /.*snrrrrrrrrrrrf.*Imgur\.jpg/
30
+ File.exist?(@file).should be_true
31
+ end
32
+ end
33
+
34
+ end
35
+
36
+ end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+
3
+ module Scrapers
4
+
5
+ describe GoComics do
6
+ it{should respond_to :scrape}
7
+ context "scraping" do
8
+ before(:all) do
9
+ @comic_strip = 'nonsequitur'
10
+ @comic = VCR.use_cassette('gocomics.nonsequitur') do
11
+ Scrapers::GoComics.scrape(@comic_strip)
12
+ end
13
+ end
14
+
15
+ it "retrieves a comic" do
16
+ @comic.should_not be_nil
17
+ end
18
+ it "should be a Hash" do
19
+ @comic.should be_a(Hash)
20
+ end
21
+ %w{title url pubdate img_src}.map(&:to_sym).each do |key|
22
+ it "should have key #{key}" do
23
+ @comic.should have_key(key)
24
+ end
25
+ end
26
+ context "title" do
27
+ it{@comic[:title].should_not be_empty}
28
+ it{@comic[:title].should match /Non Sequitur Comic Strip on GoComics.com/}
29
+ end
30
+ context "url" do
31
+ it{@comic[:url].should_not be_empty}
32
+ it{@comic[:url].should match /www\.gocomics\.com\/nonsequitur/}
33
+ end
34
+ context "pubdate" do
35
+ it{@comic[:pubdate].should_not be_empty}
36
+ it{Date.parse(@comic[:pubdate]).should be_a(Date)}
37
+ end
38
+ context "img_src" do
39
+ it{@comic[:img_src].should_not be_empty}
40
+ it{URI.parse(@comic[:img_src]).should be_a(URI::HTTP)}
41
+ end
42
+
43
+ end
44
+ end
45
+ end
46
+
@@ -0,0 +1,31 @@
1
+ =begin rdoc
2
+
3
+ = IMGUR_SPEC.RB
4
+
5
+ *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
+ *Since*:: 2013-05-27
7
+ *Copyright*:: (c) 2013 Tamara Temple Web Development
8
+ *License*:: MIT
9
+
10
+ =end
11
+
12
+ require 'spec_helper'
13
+
14
+ module Scrapers
15
+
16
+ describe "Scrapers" do
17
+ it {Scrapers.should respond_to(:imgur)}
18
+ end
19
+
20
+ describe "Fetch the download link" do
21
+ let(:url) {"http://imgur.com/v70StgA"}
22
+
23
+ it "should return the download link from a given url" do
24
+ Scrapers.imgur(url).should =~ %r{http://imgur.com/download/v70StgA/}
25
+ end
26
+
27
+ end
28
+
29
+
30
+
31
+ end
@@ -0,0 +1,30 @@
1
+
2
+ require 'spec_helper'
3
+
4
+
5
+ module Scrapers
6
+
7
+ describe NasaApod do
8
+ it {Scrapers::NasaApod.should respond_to :scrape}
9
+
10
+ context "scrape" do
11
+ let(:url){"http://apod.nasa.gov/apod/astropix.html"}
12
+ let(:apod_hash){
13
+ VCR.use_cassette("#{example.description.gsub(/[^-[:alnum:]]/,'')}.cassette", :record => :new_episodes) do
14
+ Scrapers::NasaApod.scrape(url)
15
+ end}
16
+ it {apod_hash.should be_a(Hash)}
17
+ %w{title link description pubDate guid content_encoded}.map(&:to_sym).each do |attr|
18
+ it "should include #{attr}" do
19
+ apod_hash.keys.should include attr
20
+ end
21
+ it {apod_hash[attr].should_not be_nil}
22
+ it {apod_hash[attr].should be_a(String)}
23
+
24
+ end
25
+
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ module Scrapers
4
+
5
+ describe Scrapers do
6
+ it{should respond_to(:agent)}
7
+ end
8
+
9
+ end