scrapers 2.1.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/ChangeLog +7 -0
  4. data/Gemfile +0 -8
  5. data/Guardfile +1 -1
  6. data/bin/rubytapas +2 -75
  7. data/lib/scrapers.rb +1 -3
  8. data/lib/scrapers/manning_books.rb +37 -27
  9. data/lib/scrapers/rubytapas.rb +6 -81
  10. data/lib/scrapers/rubytapas/cli.rb +39 -0
  11. data/lib/scrapers/rubytapas/config.rb +11 -0
  12. data/lib/scrapers/rubytapas/dpdcart.rb +115 -0
  13. data/lib/scrapers/rubytapas/episode.rb +86 -0
  14. data/lib/scrapers/rubytapas/scraper.rb +142 -0
  15. data/lib/scrapers/version.rb +2 -2
  16. data/scrapers.gemspec +4 -1
  17. data/spec/lib/scrapers/rubytapas/dpdcart_spec.rb +68 -0
  18. data/spec/lib/scrapers/rubytapas/episode_spec.rb +140 -0
  19. data/spec/lib/scrapers/rubytapas/rubytapas_spec.rb +87 -0
  20. data/spec/lib/scrapers/rubytapas/scraper_spec.rb +83 -0
  21. data/spec/lib/scrapers/rubytapas/test_data/feed.xml +7038 -0
  22. data/spec/lib/scrapers/{wunderground_spec.rb → wunderground_spec.rb.no} +0 -0
  23. data/spec/scrapers/allrecipes_spec.rb +2 -2
  24. data/spec/scrapers/discoverynews_spec.rb +3 -14
  25. data/spec/scrapers/download_spec.rb +6 -16
  26. data/spec/scrapers/gocomics_spec.rb +3 -3
  27. data/spec/scrapers/imgur_spec.rb +10 -22
  28. data/spec/scrapers/manning_books_spec.rb +9 -6
  29. data/spec/scrapers/nasa_apod_spec.rb +12 -14
  30. data/spec/scrapers/sinfest_spec.rb +3 -3
  31. data/spec/scrapers/xkcd_spec.rb +1 -0
  32. data/spec/scrapers_spec.rb +2 -1
  33. data/spec/spec_helper.rb +1 -8
  34. data/spec/support/dir_helpers.rb +13 -0
  35. data/spec/support/use_vcr.rb +9 -0
  36. data/vcr_cassettes/nasa-apod.yml +348 -0
  37. data/vcr_cassettes/rubytapas-download-1.yml +6726 -0
  38. data/vcr_cassettes/rubytapas-download-all.yml +6726 -0
  39. data/vcr_cassettes/rubytapas_download.yml +982 -0
  40. data/vcr_cassettes/rubytapas_download_twice.yml +1064 -0
  41. data/vcr_cassettes/rubytapas_feed.yml +5880 -0
  42. data/vcr_cassettes/rubytapas_login.yml +849 -0
  43. metadata +74 -6
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require "scrapers/allrecipes"
2
3
 
3
4
  module Scrapers
4
5
 
@@ -11,7 +12,7 @@ module Scrapers
11
12
  Scrapers::AllRecipes.scrape(@url)
12
13
  end
13
14
  end
14
-
15
+
15
16
  it "retrieves a recipe" do
16
17
  @recipe.should_not be_nil
17
18
  end
@@ -26,4 +27,3 @@ module Scrapers
26
27
  end
27
28
  end
28
29
  end
29
-
@@ -1,15 +1,5 @@
1
- =begin rdoc
2
-
3
- = DISCOVERYNEWS_SPEC.RB
4
-
5
- *Author*:: Tamara Temple <tamouse@gmail.com>
6
- *Since*:: 2013-06-15
7
- *Copyright*:: (c) 2013 Tamara Temple Web Development
8
- *License*:: MIT
9
-
10
- =end
11
-
12
1
  require 'spec_helper'
2
+ require 'scrapers/discoverynews'
13
3
 
14
4
 
15
5
  module Scrapers
@@ -24,7 +14,7 @@ module Scrapers
24
14
  Scrapers::DiscoNews.disco_downloads(url)
25
15
  end
26
16
  end
27
-
17
+
28
18
  it "retrieves an array of images" do
29
19
  images.should be_a(Array)
30
20
  images.each do |i|
@@ -32,8 +22,7 @@ module Scrapers
32
22
  end
33
23
  end
34
24
  end
35
-
25
+
36
26
  end
37
27
 
38
28
  end
39
-
@@ -1,15 +1,5 @@
1
- =begin rdoc
2
-
3
- = DOWNLOAD_SPEC.RB
4
-
5
- *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
- *Since*:: 2013-05-27
7
- *Copyright*:: (c) 2013 Tamara Temple Web Development
8
- *License*:: MIT
9
-
10
- =end
11
-
12
1
  require 'spec_helper'
2
+ require "scrapers/download"
13
3
  require 'tmpdir'
14
4
 
15
5
  def in_tmpdir
@@ -24,7 +14,7 @@ end
24
14
  module Scrapers
25
15
 
26
16
  describe Download do
27
-
17
+
28
18
  it {Scrapers::Download.should respond_to :download}
29
19
 
30
20
  it "should download and save the file" do
@@ -34,7 +24,7 @@ module Scrapers
34
24
  @file = Scrapers::Download.download(@url,dir)
35
25
  end
36
26
  @file.should =~ /.*snrrrrrrrrrrrf.*Imgur\.jpg/
37
- File.exist?(@file).should be_true
27
+ File.exist?(@file).should be true
38
28
  end
39
29
  end
40
30
  it "should overwrite file with second download" do
@@ -46,7 +36,7 @@ module Scrapers
46
36
  end
47
37
  @file1.should eq @file2
48
38
  @file1.should eq File.join(dir,'sandwich.png')
49
- File.exist?(@file1).should be_true
39
+ File.exist?(@file1).should be true
50
40
  end
51
41
  end
52
42
  it "should make a new file on second download" do
@@ -64,9 +54,9 @@ module Scrapers
64
54
 
65
55
  @file1.should_not eq @file2
66
56
  @file1.should eq File.join(dir,'sandwich.png')
67
- File.exist?(@file1).should be_true
57
+ File.exist?(@file1).should be true
68
58
  @file2.should eq File.join(dir,'sandwich.png.1')
69
- File.exist?(@file2).should be_true
59
+ File.exist?(@file2).should be true
70
60
  end
71
61
  end
72
62
  end
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require "scrapers/gocomics"
2
3
 
3
4
  module Scrapers
4
5
 
@@ -11,7 +12,7 @@ module Scrapers
11
12
  Scrapers::GoComics.scrape(@comic_strip)
12
13
  end
13
14
  end
14
-
15
+
15
16
  it "retrieves a comic" do
16
17
  @comic.should_not be_nil
17
18
  end
@@ -39,8 +40,7 @@ module Scrapers
39
40
  it{@comic[:img_src].should_not be_empty}
40
41
  it{URI.parse(@comic[:img_src]).should be_a(URI::HTTP)}
41
42
  end
42
-
43
+
43
44
  end
44
45
  end
45
46
  end
46
-
@@ -1,31 +1,19 @@
1
- =begin rdoc
2
-
3
- = IMGUR_SPEC.RB
4
-
5
- *Author*:: Tamara Temple <tamara@tamaratemple.com>
6
- *Since*:: 2013-05-27
7
- *Copyright*:: (c) 2013 Tamara Temple Web Development
8
- *License*:: MIT
9
-
10
- =end
11
-
12
1
  require 'spec_helper'
2
+ require "scrapers/imgur"
13
3
 
14
4
  module Scrapers
15
5
 
16
- describe "Scrapers" do
17
- it {Scrapers.should respond_to(:imgur)}
18
- end
6
+ describe "Imgur Scraping" do
7
+ describe "Scrapers" do
8
+ it {expect(Scrapers).to respond_to(:imgur)}
9
+ end
19
10
 
20
- describe "Fetch the download link" do
21
- let(:url) {"http://imgur.com/v70StgA"}
11
+ describe "Fetch the download link" do
12
+ let(:url) {"http://imgur.com/v70StgA"}
22
13
 
23
- it "should return the download link from a given url" do
24
- Scrapers.imgur(url).should =~ %r{http://imgur.com/download/v70StgA/}
14
+ it "should return the download link from a given url" do
15
+ expect(Scrapers.imgur(url)).to match(%r{http://imgur.com/download/v70StgA/})
16
+ end
25
17
  end
26
-
27
18
  end
28
-
29
-
30
-
31
19
  end
@@ -17,7 +17,7 @@ RSpec.describe Scrapers::ManningBooks::Scraper do
17
17
  it { is_expected.to respond_to :download_books }
18
18
  end
19
19
  describe "#login" do
20
- let(:scraper) { Scrapers::ManningBooks::Scraper.new }
20
+ let(:scraper) { Scrapers::ManningBooks::Scraper.new }
21
21
  let(:agent) { double('agent') }
22
22
 
23
23
  before do
@@ -46,7 +46,7 @@ RSpec.describe Scrapers::ManningBooks::Scraper do
46
46
  scraper.login(agent) { |m| @result = "in yield" }
47
47
  expect(@result).to eq("in yield")
48
48
  end
49
-
49
+
50
50
  end
51
51
 
52
52
  context "when login is not passed a block" do
@@ -62,11 +62,15 @@ RSpec.describe Scrapers::ManningBooks::Scraper do
62
62
  let(:agent) {double('agent')}
63
63
  let(:books) do
64
64
  3.times.map do |i|
65
- OpenStruct.new(href: "http://#{Scrapers::ManningBooks::DASHBOARD_URL}/#{i}")
65
+ {
66
+ title: "Book #{i}",
67
+ downloads: {
68
+ pdf: "path/to/download.pdf"
69
+ }
70
+ }
66
71
  end
67
72
  end
68
73
 
69
-
70
74
  before do
71
75
  allow(Scrapers::NetrcReader).to receive(:new) do
72
76
  OpenStruct.new(user: "joe@example.com", pw: "password")
@@ -79,10 +83,9 @@ RSpec.describe Scrapers::ManningBooks::Scraper do
79
83
  save_stdout = $stdout
80
84
  $stdout = double('output').as_null_object
81
85
  expect(agent).to receive(:get).exactly(3).times
82
- expect(agent).to receive(:current_page).exactly(3*4).times.and_return(agent)
86
+ expect(agent).to receive(:current_page).exactly(3*3).times.and_return(agent)
83
87
  expect(agent).to receive(:filename).exactly(3*2).times.and_return("FILENAME")
84
88
  expect(agent).to receive(:save!).exactly(3).times
85
- expect(agent).to receive(:uri).exactly(3).times
86
89
  results = scraper.download_books(agent, books)
87
90
  $stdout = save_stdout
88
91
  expect(results.size).to eq(3)
@@ -1,35 +1,33 @@
1
-
2
1
  require 'spec_helper'
3
-
2
+ require "scrapers/nasa_apod"
4
3
 
5
4
  module Scrapers
6
-
5
+
7
6
  describe NasaApod do
8
7
  it {Scrapers::NasaApod.should respond_to :scrape}
9
8
 
10
9
  context "scrape" do
11
10
 
12
11
  before(:all) do
13
- pending "until apod back up"
14
- # @url = "http://apod.nasa.gov/apod/astropix.html"
15
- # VCR.use_cassette("nasa-apod", :record => :new_episodes) do
16
- # @apod_hash = Scrapers::NasaApod.scrape(@url)
17
- # end
12
+ @url = "http://apod.nasa.gov/apod/astropix.html"
13
+ VCR.use_cassette("nasa-apod", :record => :new_episodes) do
14
+ @apod_hash = Scrapers::NasaApod.scrape(@url)
15
+ end
18
16
  end
19
-
20
- xit "should be a Hash" do
17
+
18
+ it "should be a Hash" do
21
19
  @apod_hash.should be_a(Hash)
22
20
  end
23
-
21
+
24
22
  %w{title link description pubDate guid content_encoded}.map(&:to_sym).each do |attr|
25
23
  it "should include #{attr}" do
26
24
  @apod_hash.keys.should include attr
27
25
  end
28
- xit "#{attr} should not be nil" do
26
+ it "#{attr} should not be nil" do
29
27
  @apod_hash[attr].should_not be_nil
30
28
  end
31
-
32
- xit "#{attr} should be a Sring" do
29
+
30
+ it "#{attr} should be a Sring" do
33
31
  @apod_hash[attr].should be_a(String)
34
32
  end
35
33
 
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require "scrapers/sinfest"
2
3
 
3
4
  module Scrapers
4
5
 
@@ -10,7 +11,7 @@ module Scrapers
10
11
  Scrapers::Sinfest.scrape
11
12
  end
12
13
  end
13
-
14
+
14
15
  it "retrieves a comic" do
15
16
  @comic.should_not be_nil
16
17
  end
@@ -39,8 +40,7 @@ module Scrapers
39
40
  it{URI.parse(@comic[:img_src]).should be_a(URI::HTTP)}
40
41
  it{@comic[:img_src].should eq 'http://sinfest.net/comikaze/comics/2013-10-19.gif'}
41
42
  end
42
-
43
+
43
44
  end
44
45
  end
45
46
  end
46
-
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require "scrapers/xkcd"
2
3
 
3
4
  describe Scrapers::Xkcd do
4
5
  it {should respond_to :scrape}
@@ -1,9 +1,10 @@
1
1
  require 'spec_helper'
2
+ require 'scrapers'
2
3
 
3
4
  module Scrapers
4
5
 
5
6
  describe Scrapers do
6
- it{should respond_to(:agent)}
7
+ it{ expect(Scrapers).to respond_to(:agent) }
7
8
  end
8
9
 
9
10
  end
@@ -1,5 +1,3 @@
1
- require 'vcr'
2
-
3
1
  # This file was generated by the `rspec --init` command. Conventionally, all
4
2
  # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
5
3
  # Require this file using `require "spec_helper"` to ensure that it is only
@@ -14,10 +12,5 @@ RSpec.configure do |config|
14
12
  # order dependency and want to debug it, you can fix the order by providing
15
13
  # the seed, which is printed after each run.
16
14
  # --seed 1234
17
- # config.order = 'random'
18
- end
19
-
20
- VCR.configure do |c|
21
- c.cassette_library_dir = 'vcr_cassettes'
22
- c.hook_into :webmock
15
+ config.order = 'random'
23
16
  end
@@ -0,0 +1,13 @@
1
+ require 'tempfile'
2
+ require 'tmpdir'
3
+
4
+ def run_under_tmpdir(&block)
5
+ raise "no block given" unless block_given?
6
+ Dir.mktmpdir do |dir|
7
+ Dir.chdir(dir) do |dir|
8
+ yield dir
9
+ end
10
+ end
11
+ end
12
+
13
+ alias :run_in_tmpdir :run_under_tmpdir
@@ -0,0 +1,9 @@
1
+ # Only include this in tests that actally use VCR.
2
+ # Better for unit tests to read saved data or use mocks.
3
+
4
+ require 'vcr'
5
+
6
+ VCR.configure do |c|
7
+ c.cassette_library_dir = 'vcr_cassettes'
8
+ c.hook_into :webmock
9
+ end
@@ -0,0 +1,348 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://apod.nasa.gov/apod/astropix.html
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip,deflate,identity
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Mechanize/2.7.3 Ruby/2.2.0p0 (http://github.com/sparklemotion/mechanize/)
16
+ Accept-Charset:
17
+ - ISO-8859-1,utf-8;q=0.7,*;q=0.7
18
+ Accept-Language:
19
+ - en-us,en;q=0.5
20
+ Host:
21
+ - apod.nasa.gov
22
+ Connection:
23
+ - keep-alive
24
+ Keep-Alive:
25
+ - 300
26
+ response:
27
+ status:
28
+ code: 200
29
+ message: OK
30
+ headers:
31
+ Date:
32
+ - Tue, 30 Jun 2015 21:47:29 GMT
33
+ Server:
34
+ - WebServer/1.0
35
+ Accept-Ranges:
36
+ - bytes
37
+ Keep-Alive:
38
+ - timeout=5, max=100
39
+ Connection:
40
+ - Keep-Alive
41
+ Transfer-Encoding:
42
+ - chunked
43
+ Content-Type:
44
+ - text/html; charset=ISO-8859-1
45
+ body:
46
+ encoding: UTF-8
47
+ string: "<!doctype html>\n<html>\n<head>\n<title>Astronomy Picture of the Day\n</title>
48
+ \n<!-- gsfc meta tags -->\n<meta name=\"orgcode\" content=\"661\">\n<meta
49
+ name=\"rno\" content=\"phillip.a.newman\">\n<meta name=\"content-owner\" content=\"Jerry.T.Bonnell.1\">\n<meta
50
+ name=\"webmaster\" content=\"Stephen.F.Fantasia.1\">\n<meta name=\"description\"
51
+ content=\"A different astronomy and space science\nrelated image is featured
52
+ each day, along with a brief explanation.\">\n<!-- -->\n<meta name=\"keywords\"
53
+ content=\"Ceres, asteroid, mountain\">\n<!-- -->\n<script id=\"_fed_an_js_tag\"
54
+ type=\"text/javascript\"\nsrc=\"js/federated-analytics.all.min.js?agency=NASA\"></script>\n\n</head>\n\n<body
55
+ BGCOLOR=\"#F4F4FF\" text=\"#000000\" link=\"#0000FF\" vlink=\"#7F0F9F\"\nalink=\"#FF0000\">\n\n<center>\n<h1>
56
+ Astronomy Picture of the Day </h1>\n<p>\n\n<a href=\"archivepix.html\">Discover
57
+ the cosmos!</a>\nEach day a different image or photograph of our fascinating
58
+ universe is\nfeatured, along with a brief explanation written by a professional
59
+ astronomer.\n<p>\n\n2015 June 30 \n<br> \n<a href=\"image/1506/CeresMountain_Dawn_1041.jpg\">\n<IMG
60
+ SRC=\"image/1506/CeresMountain_Dawn_960.jpg\"\n alt=\"See Explanation. Clicking
61
+ on the picture will download\n the highest resolution version available.\"></a>\n\n</center>\n\n<center>\n<b>
62
+ An Unusual Mountain on Asteroid Ceres </b> <br> \n<b> Image Credit: </b> \n<a
63
+ href=\"http://www.nasa.gov/\">NASA</a>, \n<a href=\"http://www.jpl.nasa.gov/\">JPL-Caltech</a>,
64
+ \n<a href=\"http://dawn.igpp.ucla.edu/\">UCLA</a>,\nMPS/DLR/IDA\n</center>
65
+ <p> \n\n<b> Explanation: </b> \nWhat created this large mountain on asteroid
66
+ Ceres?\n\nNo one is yet sure.\n\nAs if in anticipation of today being \n<a
67
+ href=\"http://www.asteroidday.org/\">Asteroid</a> \n<a href=\"https://en.wikipedia.org/wiki/Asteroid_Day\">Day</a>
68
+ on Earth, the robotic spacecraft \n<a href=\"http://dawn.jpl.nasa.gov/\">Dawn</a>
69
+ in orbit around Ceres took the best yet image of an unusually tall mountain
70
+ on the Asteroid Belt's largest asteroid.\n\nVisible at the top of the \n<a
71
+ href=\"http://www.nasa.gov/jpl/pia19578/dawn-survey-orbit-image-10\"\n>featured
72
+ image</a>, the exceptional mountain rises about \nfive kilometers up from
73
+ an area that otherwise appears pretty level.\n\nThe <a href=\"http://photojournal.jpl.nasa.gov/catalog/PIA19578\"\n>image</a>
74
+ was taken about two weeks ago from about 4,400 kilometers away. \n\nAlthough
75
+ <a href=\n\"http://www.slate.com/blogs/bad_astronomy/2015/06/22/ceres_dawn_images_reveal_a_5_km_tall_mountain.html\"\n>origin
76
+ hypotheses for the mountain</a> include volcanism, impacts, and plate tectonics,
77
+ clear evidence backing any of these is currently lacking. \n\nAlso visible
78
+ \n<a href=\"ap150610.html\">across Ceres'</a> surface are some enigmatic light
79
+ areas: \n<a href=\"ap150514.html\">bright spots</a> whose origin and composition
80
+ that also \nremain an \n<a href=\"http://i.ytimg.com/vi/OIamIdHV37I/maxresdefault.jpg\">active
81
+ topic of investigation</a>.\n\nEven though Dawn is expected to continue to
82
+ orbit Ceres, officially dubbed a \n<a href=\"https://en.wikipedia.org/wiki/Dwarf_planet\">dwarf
83
+ planet</a>, for millions of years, the \n<a href=\"https://www.youtube.com/watch?v=5UFTfwTxeEk\">hydrazine
84
+ fuel</a> used to point \n<a href=\"http://www.popsci.com/whats-next-dawn-mission-keri-bean\">Dawn's
85
+ communications</a> \nantenna toward Earth is expected to run out sometime
86
+ next year.\n\n\n<p> <center> \n<b> Tonight & tomorrow: </b> \n<a href=\"https://www.youtube.com/watch?v=_ppuCZR8Mkw\">See
87
+ Venus & Jupiter together after sunset</a> <br>\n<b> Tomorrow's picture: </b>star
88
+ clouds\n\n<p> <hr>\n<a href=\"ap150629.html\">&lt;</a>\n| <a href=\"archivepix.html\">Archive</a>\n|
89
+ <a href=\"lib/apsubmit2015.html\">Submissions</a> \n| <a href=\"lib/aptree.html\">Index</a>\n|
90
+ <a href=\"http://antwrp.gsfc.nasa.gov/cgi-bin/apod/apod_search\">Search</a>\n|
91
+ <a href=\"calendar/allyears.html\">Calendar</a>\n| <a href=\"/apod.rss\">RSS</a>\n|
92
+ <a href=\"lib/edlinks.html\">Education</a>\n| <a href=\"lib/about_apod.html\">About
93
+ APOD</a>\n| <a href=\n\"http://asterisk.apod.com/discuss_apod.php?date=150630\">Discuss</a>\n|
94
+ <a href=\"ap150701.html\">&gt;</a>\n\n<hr><p>\n<b> Authors & editors: </b>\n<a
95
+ href=\"http://www.phy.mtu.edu/faculty/Nemiroff.html\">Robert Nemiroff</a>\n(<a
96
+ href=\"http://www.phy.mtu.edu/\">MTU</a>) &\n<a href=\"http://antwrp.gsfc.nasa.gov/htmltest/jbonnell/www/bonnell.html\"\n>Jerry
97
+ Bonnell</a> (<a href=\"http://www.astro.umd.edu/\">UMCP</a>)<br>\n<b>NASA
98
+ Official: </b> Phillip Newman\n<a href=\"lib/about_apod.html#srapply\">Specific
99
+ rights apply</a>.<br>\n<a href=\"http://www.nasa.gov/about/highlights/HP_Privacy.html\">NASA
100
+ Web\nPrivacy Policy and Important Notices</a><br>\n<b>A service of:</b>\n<a
101
+ href=\"http://astrophysics.gsfc.nasa.gov/\">ASD</a> at\n<a href=\"http://www.nasa.gov/\">NASA</a>
102
+ /\n<a href=\"http://www.nasa.gov/centers/goddard/\">GSFC</a>\n<br><b>&</b>
103
+ <a href=\"http://www.mtu.edu/\">Michigan Tech. U.</a><br>\n</center>\n</body>\n</html>\n"
104
+ http_version:
105
+ recorded_at: Tue, 30 Jun 2015 21:47:29 GMT
106
+ - request:
107
+ method: get
108
+ uri: http://apod.nasa.gov/apod/ap150629.html
109
+ body:
110
+ encoding: US-ASCII
111
+ string: ''
112
+ headers:
113
+ Accept-Encoding:
114
+ - gzip,deflate,identity
115
+ Accept:
116
+ - "*/*"
117
+ User-Agent:
118
+ - Mechanize/2.7.3 Ruby/2.2.0p0 (http://github.com/sparklemotion/mechanize/)
119
+ Accept-Charset:
120
+ - ISO-8859-1,utf-8;q=0.7,*;q=0.7
121
+ Accept-Language:
122
+ - en-us,en;q=0.5
123
+ Host:
124
+ - apod.nasa.gov
125
+ Referer:
126
+ - !ruby/object:URI::HTTP
127
+ scheme: http
128
+ user:
129
+ password:
130
+ host: apod.nasa.gov
131
+ port: 80
132
+ path: "/apod/astropix.html"
133
+ query:
134
+ opaque:
135
+ fragment:
136
+ parser: &1 !ruby/object:URI::RFC3986_Parser
137
+ regexp:
138
+ :SCHEME: !ruby/regexp /\A[A-Za-z][A-Za-z0-9+\-.]*\z/
139
+ :USERINFO: !ruby/regexp /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/
140
+ :HOST: !ruby/regexp /\A(?:(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{,4}::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))\z/
141
+ :ABS_PATH: !ruby/regexp /\A\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/
142
+ :REL_PATH: !ruby/regexp /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/
143
+ :QUERY: !ruby/regexp /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/
144
+ :FRAGMENT: !ruby/regexp /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/
145
+ :OPAQUE: !ruby/regexp /\A(?:[^\/].*)?\z/
146
+ :PORT: !ruby/regexp /\A[\x09\x0a\x0c\x0d ]*\d*[\x09\x0a\x0c\x0d ]*\z/
147
+ Connection:
148
+ - keep-alive
149
+ Keep-Alive:
150
+ - 300
151
+ response:
152
+ status:
153
+ code: 200
154
+ message: OK
155
+ headers:
156
+ Date:
157
+ - Tue, 30 Jun 2015 21:47:30 GMT
158
+ Server:
159
+ - WebServer/1.0
160
+ Accept-Ranges:
161
+ - bytes
162
+ Keep-Alive:
163
+ - timeout=5, max=100
164
+ Connection:
165
+ - Keep-Alive
166
+ Transfer-Encoding:
167
+ - chunked
168
+ Content-Type:
169
+ - text/html; charset=ISO-8859-1
170
+ body:
171
+ encoding: UTF-8
172
+ string: "<!doctype html>\n<html>\n<head>\n<title> APOD: 2015 June 29 - Sunspot
173
+ Group AR 2339 Crosses the Sun \n</title> \n<!-- gsfc meta tags -->\n<meta
174
+ name=\"orgcode\" content=\"661\">\n<meta name=\"rno\" content=\"phillip.a.newman\">\n<meta
175
+ name=\"content-owner\" content=\"Jerry.T.Bonnell.1\">\n<meta name=\"webmaster\"
176
+ content=\"Stephen.F.Fantasia.1\">\n<meta name=\"description\" content=\"A
177
+ different astronomy and space science\nrelated image is featured each day,
178
+ along with a brief explanation.\">\n<!-- -->\n<meta name=\"keywords\" content=\"Sun,
179
+ sunspot, active region\">\n<!-- -->\n<script id=\"_fed_an_js_tag\" type=\"text/javascript\"\nsrc=\"js/federated-analytics.all.min.js?agency=NASA\"></script>\n\n</head>\n\n<body
180
+ BGCOLOR=\"#F4F4FF\" text=\"#000000\" link=\"#0000FF\" vlink=\"#7F0F9F\"\nalink=\"#FF0000\">\n\n<center>\n<h1>
181
+ Astronomy Picture of the Day </h1>\n<p>\n\n<a href=\"archivepix.html\">Discover
182
+ the cosmos!</a>\nEach day a different image or photograph of our fascinating
183
+ universe is\nfeatured, along with a brief explanation written by a professional
184
+ astronomer.\n<p>\n\n2015 June 29 \n<br> \n<iframe width=\"960\" height=\"540\"
185
+ src=\"https://www.youtube.com/embed/BejRxZAa66E?rel=0\" frameborder=\"0\"
186
+ allowfullscreen></iframe>\n\n</center>\n\n<center>\n<b> Sunspot Group AR 2339
187
+ Crosses the Sun </b> <br> \n<b> Images Credit: </b> \n<a href=\"http://www.nasa.gov/\">NASA</a>,
188
+ \n<a href=\"http://sdo.gsfc.nasa.gov/\">SDO</a>;\n<b> Video compilation &
189
+ Copyright: </b> \n<a href=\"mailto: astro .dot. stas @at@ gmail .dot. com\">Stanislav
190
+ Korotkiy</a>\n(<a href=\"http://vk.com/astro.nomy\">AstroAlert</a>) &\nMikhail
191
+ Chubarets; <br>\n<i> Music: </i> <a href=\"https://www.youtube.com/watch?v=vtHZqcDlUp4\"\n>Pas
192
+ de Deux (Bird Creek)</a> \n</center> <p> \n\n<b> Explanation: </b> \nHow do
193
+ sunspots evolve?\n\nLarge dark \n<a href=\"ap050216.html\">sunspot</a>s --
194
+ and the active regions that contain them -- may last for weeks, but all during
195
+ that time they are constantly changing.\n\nSuch variations were particularly
196
+ apparent a few weeks ago as the active region \n<a href=\"http://earthsky.org/todays-image/large-sunspot-group-ar-2339\">AR
197
+ 2339</a> \ncame around the limb of the Sun and was tracked for the next 12
198
+ days by NASA's \n<a href=\"http://sdo.gsfc.nasa.gov/mission/\">Solar Dynamic
199
+ Observatory</a>. \n\nIn the \n<a href=\"https://www.youtube.com/watch?v=BejRxZAa66E\">featured
200
+ time lapse video</a>, \nsome sunspots drift apart, while others merge.\n\nAll
201
+ the while, the dark central \n<a href=\"http://www.spaceweatherlive.com/en/help/what-are-sunspots\">umbral
202
+ regions</a> \nshift internally and their surrounding lighter penumbras shimmer
203
+ and wave. \n\nThe surrounding \n<a href=\"http://missionscience.nasa.gov/sun/\">Sun</a>
204
+ \nappears to flicker as the carpet of yellow \n<a href=\"ap111106.html\">granules
205
+ come and go</a> on the time scale of hours. \n\nIn general, <a href=\"https://en.wikipedia.org/wiki/Sunspot\">sunspots</a>
206
+ \nare relatively cool regions where the local \n<a href=\"http://solarscience.msfc.nasa.gov/the_key.shtml\">magnetic
207
+ field</a> \npokes through the Sun's surface and inhibits heating.\n\nOver
208
+ the past week, an even more active region -- \n<a href=\"http://spaceweather.com/archive.php?view=1&day=23&month=06&year=2015\"\n>AR
209
+ 2371</a> -- has been crossing the Sun and releasing powerful flares that have
210
+ resulted in \n<a href=\"http://spaceweathergallery.com/aurora_gallery.html\"\n>impressive
211
+ auroras</a> here on Earth.\n\n<p> <center> \n<b> Follow APOD on: </b> \n<a
212
+ href=\"https://www.facebook.com/AstronomyPictureOfTheDay\">Facebook</a>, \n<a
213
+ href=\"https://plus.google.com/u/1/+AstronomyPictureOfTheDay\">Google Plus</a>,
214
+ or \n<a href=\"http://twitter.com/apod/\">Twitter</a> <br>\n<b> Tomorrow's
215
+ picture: </b><a href=\"ap150630.html\">asteroid day</a>\n\n<p> <hr>\n<a href=\"ap150628.html\">&lt;</a>\n|
216
+ <a href=\"archivepix.html\">Archive</a>\n| <a href=\"lib/apsubmit2015.html\">Submissions</a>
217
+ \n| <a href=\"lib/aptree.html\">Index</a>\n| <a href=\"http://antwrp.gsfc.nasa.gov/cgi-bin/apod/apod_search\">Search</a>\n|
218
+ <a href=\"calendar/allyears.html\">Calendar</a>\n| <a href=\"/apod.rss\">RSS</a>\n|
219
+ <a href=\"lib/edlinks.html\">Education</a>\n| <a href=\"lib/about_apod.html\">About
220
+ APOD</a>\n| <a href=\n\"http://asterisk.apod.com/discuss_apod.php?date=150629\">Discuss</a>\n|
221
+ <a href=\"ap150630.html\">&gt;</a>\n\n<hr><p>\n<b> Authors & editors: </b>\n<a
222
+ href=\"http://www.phy.mtu.edu/faculty/Nemiroff.html\">Robert Nemiroff</a>\n(<a
223
+ href=\"http://www.phy.mtu.edu/\">MTU</a>) &\n<a href=\"http://antwrp.gsfc.nasa.gov/htmltest/jbonnell/www/bonnell.html\"\n>Jerry
224
+ Bonnell</a> (<a href=\"http://www.astro.umd.edu/\">UMCP</a>)<br>\n<b>NASA
225
+ Official: </b> Phillip Newman\n<a href=\"lib/about_apod.html#srapply\">Specific
226
+ rights apply</a>.<br>\n<a href=\"http://www.nasa.gov/about/highlights/HP_Privacy.html\">NASA
227
+ Web\nPrivacy Policy and Important Notices</a><br>\n<b>A service of:</b>\n<a
228
+ href=\"http://astrophysics.gsfc.nasa.gov/\">ASD</a> at\n<a href=\"http://www.nasa.gov/\">NASA</a>
229
+ /\n<a href=\"http://www.nasa.gov/centers/goddard/\">GSFC</a>\n<br><b>&</b>
230
+ <a href=\"http://www.mtu.edu/\">Michigan Tech. U.</a><br>\n</center>\n</body>\n</html>\n"
231
+ http_version:
232
+ recorded_at: Tue, 30 Jun 2015 21:47:30 GMT
233
+ - request:
234
+ method: get
235
+ uri: http://apod.nasa.gov/apod/ap150630.html
236
+ body:
237
+ encoding: US-ASCII
238
+ string: ''
239
+ headers:
240
+ Accept-Encoding:
241
+ - gzip,deflate,identity
242
+ Accept:
243
+ - "*/*"
244
+ User-Agent:
245
+ - Mechanize/2.7.3 Ruby/2.2.0p0 (http://github.com/sparklemotion/mechanize/)
246
+ Accept-Charset:
247
+ - ISO-8859-1,utf-8;q=0.7,*;q=0.7
248
+ Accept-Language:
249
+ - en-us,en;q=0.5
250
+ Host:
251
+ - apod.nasa.gov
252
+ Referer:
253
+ - !ruby/object:URI::HTTP
254
+ scheme: http
255
+ user:
256
+ password:
257
+ host: apod.nasa.gov
258
+ port: 80
259
+ path: "/apod/ap150629.html"
260
+ query:
261
+ opaque:
262
+ fragment:
263
+ parser: *1
264
+ Connection:
265
+ - keep-alive
266
+ Keep-Alive:
267
+ - 300
268
+ response:
269
+ status:
270
+ code: 200
271
+ message: OK
272
+ headers:
273
+ Date:
274
+ - Tue, 30 Jun 2015 21:47:30 GMT
275
+ Server:
276
+ - WebServer/1.0
277
+ Accept-Ranges:
278
+ - bytes
279
+ Keep-Alive:
280
+ - timeout=5, max=100
281
+ Connection:
282
+ - Keep-Alive
283
+ Transfer-Encoding:
284
+ - chunked
285
+ Content-Type:
286
+ - text/html; charset=ISO-8859-1
287
+ body:
288
+ encoding: UTF-8
289
+ string: "<!doctype html>\n<html>\n<head>\n<title> APOD: 2015 June 30 - An Unusual
290
+ Mountain on Asteroid Ceres \n</title> \n<!-- gsfc meta tags -->\n<meta name=\"orgcode\"
291
+ content=\"661\">\n<meta name=\"rno\" content=\"phillip.a.newman\">\n<meta
292
+ name=\"content-owner\" content=\"Jerry.T.Bonnell.1\">\n<meta name=\"webmaster\"
293
+ content=\"Stephen.F.Fantasia.1\">\n<meta name=\"description\" content=\"A
294
+ different astronomy and space science\nrelated image is featured each day,
295
+ along with a brief explanation.\">\n<!-- -->\n<meta name=\"keywords\" content=\"Ceres,
296
+ asteroid, mountain\">\n<!-- -->\n<script id=\"_fed_an_js_tag\" type=\"text/javascript\"\nsrc=\"js/federated-analytics.all.min.js?agency=NASA\"></script>\n\n</head>\n\n<body
297
+ BGCOLOR=\"#F4F4FF\" text=\"#000000\" link=\"#0000FF\" vlink=\"#7F0F9F\"\nalink=\"#FF0000\">\n\n<center>\n<h1>
298
+ Astronomy Picture of the Day </h1>\n<p>\n\n<a href=\"archivepix.html\">Discover
299
+ the cosmos!</a>\nEach day a different image or photograph of our fascinating
300
+ universe is\nfeatured, along with a brief explanation written by a professional
301
+ astronomer.\n<p>\n\n2015 June 30 \n<br> \n<a href=\"image/1506/CeresMountain_Dawn_1041.jpg\">\n<IMG
302
+ SRC=\"image/1506/CeresMountain_Dawn_960.jpg\"\n alt=\"See Explanation. Clicking
303
+ on the picture will download\n the highest resolution version available.\"></a>\n\n</center>\n\n<center>\n<b>
304
+ An Unusual Mountain on Asteroid Ceres </b> <br> \n<b> Image Credit: </b> \n<a
305
+ href=\"http://www.nasa.gov/\">NASA</a>, \n<a href=\"http://www.jpl.nasa.gov/\">JPL-Caltech</a>,
306
+ \n<a href=\"http://dawn.igpp.ucla.edu/\">UCLA</a>,\nMPS/DLR/IDA\n</center>
307
+ <p> \n\n<b> Explanation: </b> \nWhat created this large mountain on asteroid
308
+ Ceres?\n\nNo one is yet sure.\n\nAs if in anticipation of today being \n<a
309
+ href=\"http://www.asteroidday.org/\">Asteroid</a> \n<a href=\"https://en.wikipedia.org/wiki/Asteroid_Day\">Day</a>
310
+ on Earth, the robotic spacecraft \n<a href=\"http://dawn.jpl.nasa.gov/\">Dawn</a>
311
+ in orbit around Ceres took the best yet image of an unusually tall mountain
312
+ on the Asteroid Belt's largest asteroid.\n\nVisible at the top of the \n<a
313
+ href=\"http://www.nasa.gov/jpl/pia19578/dawn-survey-orbit-image-10\"\n>featured
314
+ image</a>, the exceptional mountain rises about \nfive kilometers up from
315
+ an area that otherwise appears pretty level.\n\nThe <a href=\"http://photojournal.jpl.nasa.gov/catalog/PIA19578\"\n>image</a>
316
+ was taken about two weeks ago from about 4,400 kilometers away. \n\nAlthough
317
+ <a href=\n\"http://www.slate.com/blogs/bad_astronomy/2015/06/22/ceres_dawn_images_reveal_a_5_km_tall_mountain.html\"\n>origin
318
+ hypotheses for the mountain</a> include volcanism, impacts, and plate tectonics,
319
+ clear evidence backing any of these is currently lacking. \n\nAlso visible
320
+ \n<a href=\"ap150610.html\">across Ceres'</a> surface are some enigmatic light
321
+ areas: \n<a href=\"ap150514.html\">bright spots</a> whose origin and composition
322
+ that also \nremain an \n<a href=\"http://i.ytimg.com/vi/OIamIdHV37I/maxresdefault.jpg\">active
323
+ topic of investigation</a>.\n\nEven though Dawn is expected to continue to
324
+ orbit Ceres, officially dubbed a \n<a href=\"https://en.wikipedia.org/wiki/Dwarf_planet\">dwarf
325
+ planet</a>, for millions of years, the \n<a href=\"https://www.youtube.com/watch?v=5UFTfwTxeEk\">hydrazine
326
+ fuel</a> used to point \n<a href=\"http://www.popsci.com/whats-next-dawn-mission-keri-bean\">Dawn's
327
+ communications</a> \nantenna toward Earth is expected to run out sometime
328
+ next year.\n\n\n<p> <center> \n<b> Tonight & tomorrow: </b> \n<a href=\"https://www.youtube.com/watch?v=_ppuCZR8Mkw\">See
329
+ Venus & Jupiter together after sunset</a> <br>\n<b> Tomorrow's picture: </b><a
330
+ href=\"ap150701.html\">star clouds</a>\n\n<p> <hr>\n<a href=\"ap150629.html\">&lt;</a>\n|
331
+ <a href=\"archivepix.html\">Archive</a>\n| <a href=\"lib/apsubmit2015.html\">Submissions</a>
332
+ \n| <a href=\"lib/aptree.html\">Index</a>\n| <a href=\"http://antwrp.gsfc.nasa.gov/cgi-bin/apod/apod_search\">Search</a>\n|
333
+ <a href=\"calendar/allyears.html\">Calendar</a>\n| <a href=\"/apod.rss\">RSS</a>\n|
334
+ <a href=\"lib/edlinks.html\">Education</a>\n| <a href=\"lib/about_apod.html\">About
335
+ APOD</a>\n| <a href=\n\"http://asterisk.apod.com/discuss_apod.php?date=150630\">Discuss</a>\n|
336
+ <a href=\"ap150701.html\">&gt;</a>\n\n<hr><p>\n<b> Authors & editors: </b>\n<a
337
+ href=\"http://www.phy.mtu.edu/faculty/Nemiroff.html\">Robert Nemiroff</a>\n(<a
338
+ href=\"http://www.phy.mtu.edu/\">MTU</a>) &\n<a href=\"http://antwrp.gsfc.nasa.gov/htmltest/jbonnell/www/bonnell.html\"\n>Jerry
339
+ Bonnell</a> (<a href=\"http://www.astro.umd.edu/\">UMCP</a>)<br>\n<b>NASA
340
+ Official: </b> Phillip Newman\n<a href=\"lib/about_apod.html#srapply\">Specific
341
+ rights apply</a>.<br>\n<a href=\"http://www.nasa.gov/about/highlights/HP_Privacy.html\">NASA
342
+ Web\nPrivacy Policy and Important Notices</a><br>\n<b>A service of:</b>\n<a
343
+ href=\"http://astrophysics.gsfc.nasa.gov/\">ASD</a> at\n<a href=\"http://www.nasa.gov/\">NASA</a>
344
+ /\n<a href=\"http://www.nasa.gov/centers/goddard/\">GSFC</a>\n<br><b>&</b>
345
+ <a href=\"http://www.mtu.edu/\">Michigan Tech. U.</a><br>\n</center>\n</body>\n</html>\n"
346
+ http_version:
347
+ recorded_at: Tue, 30 Jun 2015 21:47:30 GMT
348
+ recorded_with: VCR 2.9.3