metainspector 1.9.7 → 1.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Mon, 30 May 2011 09:58:20 GMT
|
3
|
+
Server: Microsoft-IIS/6.0
|
4
|
+
X-Powered-By: PleskWin
|
5
|
+
X-Powered-By: ASP.NET
|
6
|
+
Cache-Control: private
|
7
|
+
Content-Length: 25902
|
8
|
+
Content-Type: text/html
|
9
|
+
Expires: Sun, 29 May 2011 09:58:18 GMT
|
10
|
+
Set-Cookie: ASPSESSIONIDCSBSQADC=AHENHHKBGGDIFJLHHCCJBHMP; path=/
|
11
|
+
Cache-control: private
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
<html>
|
16
|
+
<head>
|
17
|
+
<title>Diseño paginas web</title>
|
18
|
+
</head>
|
19
|
+
<body>
|
20
|
+
<a href="index.asp">Index</a>
|
21
|
+
<a href="faqs.asp">Faqs</a>
|
22
|
+
</body>
|
23
|
+
</html>
|
data/spec/metainspector_spec.rb
CHANGED
@@ -5,6 +5,7 @@ require File.join(File.dirname(__FILE__), "/spec_helper")
|
|
5
5
|
describe MetaInspector do
|
6
6
|
FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
7
7
|
FakeWeb.register_uri(:get, "http://www.alazan.com", :response => fixture_file("alazan.com.response"))
|
8
|
+
FakeWeb.register_uri(:get, "http://alazan.com/websolution.asp", :response => fixture_file("alazan_websolution.response"))
|
8
9
|
FakeWeb.register_uri(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/", :response => fixture_file("theonion.com.response"))
|
9
10
|
FakeWeb.register_uri(:get, "http://theonion-no-description.com", :response => fixture_file("theonion-no-description.com.response"))
|
10
11
|
FakeWeb.register_uri(:get, "http://www.iteh.at", :response => fixture_file("iteh.at.response"))
|
@@ -141,7 +142,7 @@ describe MetaInspector do
|
|
141
142
|
|
142
143
|
it "should get correct absolute links for internal pages" do
|
143
144
|
m = MetaInspector.new('http://w3clove.com/faqs')
|
144
|
-
m.links.should == [ "http://w3clove.com
|
145
|
+
m.links.should == [ "http://w3clove.com/#",
|
145
146
|
"http://w3clove.com/",
|
146
147
|
"http://w3clove.com/faqs",
|
147
148
|
"http://w3clove.com/plans-and-pricing",
|
@@ -166,6 +167,12 @@ describe MetaInspector do
|
|
166
167
|
"http://us4.campaign-archive1.com/home/?u=6af3ab69c286561d0f0f25671&id=04a0dab609" ]
|
167
168
|
end
|
168
169
|
|
170
|
+
it "should get correct absolute links, correcting relative links from URL not ending with slash" do
|
171
|
+
m = MetaInspector.new('http://alazan.com/websolution.asp')
|
172
|
+
m.links.should == [ "http://alazan.com/index.asp",
|
173
|
+
"http://alazan.com/faqs.asp" ]
|
174
|
+
end
|
175
|
+
|
169
176
|
it "should return empty array if no links found" do
|
170
177
|
m = MetaInspector.new('http://example.com/empty')
|
171
178
|
m.links.should == []
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 35
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 9
|
9
|
-
-
|
10
|
-
version: 1.9.
|
9
|
+
- 8
|
10
|
+
version: 1.9.8
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -154,6 +154,7 @@ files:
|
|
154
154
|
- samples/basic_scraping.rb
|
155
155
|
- samples/spider.rb
|
156
156
|
- spec/fixtures/alazan.com.response
|
157
|
+
- spec/fixtures/alazan_websolution.response
|
157
158
|
- spec/fixtures/empty_page.response
|
158
159
|
- spec/fixtures/guardian.co.uk.response
|
159
160
|
- spec/fixtures/iteh.at.response
|