metainspector 1.9.6 → 1.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +5 -1
- data/lib/meta_inspector/scraper.rb +1 -1
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/fixtures/empty_page.response +17 -0
- data/spec/metainspector_spec.rb +6 -0
- metadata +34 -33
data/README.rdoc
CHANGED
@@ -1,4 +1,8 @@
|
|
1
|
-
MetaInspector is a gem for web scraping purposes. You give it an URL, and it lets you easily get its title, links,
|
1
|
+
MetaInspector is a gem for web scraping purposes. You give it an URL, and it lets you easily get its title, links, images, charset, description, keywords, meta tags...
|
2
|
+
|
3
|
+
= See it in action!
|
4
|
+
|
5
|
+
You can try MetaInspector live at this little demo: https://metainspectordemo.herokuapp.com
|
2
6
|
|
3
7
|
= Installation
|
4
8
|
|
@@ -0,0 +1,17 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Fri, 18 Nov 2011 21:46:46 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Connection: keep-alive
|
6
|
+
Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
|
7
|
+
Content-Length: 4987
|
8
|
+
X-Varnish: 2000423390
|
9
|
+
Age: 0
|
10
|
+
Via: 1.1 varnish
|
11
|
+
|
12
|
+
<html>
|
13
|
+
<head>
|
14
|
+
</head>
|
15
|
+
<body>
|
16
|
+
</body>
|
17
|
+
</html>
|
data/spec/metainspector_spec.rb
CHANGED
@@ -16,6 +16,7 @@ describe MetaInspector do
|
|
16
16
|
FakeWeb.register_uri(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc", :response => fixture_file("youtube.response"))
|
17
17
|
FakeWeb.register_uri(:get, "http://w3clove.com/faqs", :response => fixture_file("w3clove_faqs.response"))
|
18
18
|
FakeWeb.register_uri(:get, "https://twitter.com/w3clove", :response => fixture_file("twitter_w3clove.response"))
|
19
|
+
FakeWeb.register_uri(:get, "https://example.com/empty", :response => fixture_file("empty_page.response"))
|
19
20
|
|
20
21
|
describe 'Initialization' do
|
21
22
|
it 'should accept an URL with a scheme' do
|
@@ -164,6 +165,11 @@ describe MetaInspector do
|
|
164
165
|
"http://twitter.com/W3CLove",
|
165
166
|
"http://us4.campaign-archive1.com/home/?u=6af3ab69c286561d0f0f25671&id=04a0dab609" ]
|
166
167
|
end
|
168
|
+
|
169
|
+
it "should return empty array if no links found" do
|
170
|
+
m = MetaInspector.new('http://example.com/empty')
|
171
|
+
m.links.should == []
|
172
|
+
end
|
167
173
|
end
|
168
174
|
|
169
175
|
describe 'Non-HTTP links' do
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 61
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 9
|
9
|
-
-
|
10
|
-
version: 1.9.
|
9
|
+
- 7
|
10
|
+
version: 1.9.7
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -15,12 +15,10 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-08-12 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
|
-
|
22
|
-
prerelease: false
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
21
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
24
22
|
none: false
|
25
23
|
requirements:
|
26
24
|
- - ~>
|
@@ -30,12 +28,12 @@ dependencies:
|
|
30
28
|
- 1
|
31
29
|
- 5
|
32
30
|
version: "1.5"
|
31
|
+
prerelease: false
|
33
32
|
type: :runtime
|
34
|
-
|
33
|
+
name: nokogiri
|
34
|
+
requirement: *id001
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
|
-
|
37
|
-
prerelease: false
|
38
|
-
requirement: &id002 !ruby/object:Gem::Requirement
|
36
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
39
37
|
none: false
|
40
38
|
requirements:
|
41
39
|
- - "="
|
@@ -46,12 +44,12 @@ dependencies:
|
|
46
44
|
- 3
|
47
45
|
- 20111021164500
|
48
46
|
version: 1.3.20111021164500
|
47
|
+
prerelease: false
|
49
48
|
type: :runtime
|
50
|
-
|
49
|
+
name: charguess
|
50
|
+
requirement: *id002
|
51
51
|
- !ruby/object:Gem::Dependency
|
52
|
-
|
53
|
-
prerelease: false
|
54
|
-
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
55
53
|
none: false
|
56
54
|
requirements:
|
57
55
|
- - "="
|
@@ -62,12 +60,12 @@ dependencies:
|
|
62
60
|
- 3
|
63
61
|
- 2
|
64
62
|
version: 0.3.2
|
63
|
+
prerelease: false
|
65
64
|
type: :runtime
|
66
|
-
|
65
|
+
name: rash
|
66
|
+
requirement: *id003
|
67
67
|
- !ruby/object:Gem::Dependency
|
68
|
-
|
69
|
-
prerelease: false
|
70
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
68
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
71
69
|
none: false
|
72
70
|
requirements:
|
73
71
|
- - "="
|
@@ -78,12 +76,12 @@ dependencies:
|
|
78
76
|
- 11
|
79
77
|
- 0
|
80
78
|
version: 2.11.0
|
79
|
+
prerelease: false
|
81
80
|
type: :development
|
82
|
-
|
81
|
+
name: rspec
|
82
|
+
requirement: *id004
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
|
85
|
-
prerelease: false
|
86
|
-
requirement: &id005 !ruby/object:Gem::Requirement
|
84
|
+
version_requirements: &id005 !ruby/object:Gem::Requirement
|
87
85
|
none: false
|
88
86
|
requirements:
|
89
87
|
- - "="
|
@@ -94,12 +92,12 @@ dependencies:
|
|
94
92
|
- 3
|
95
93
|
- 0
|
96
94
|
version: 1.3.0
|
95
|
+
prerelease: false
|
97
96
|
type: :development
|
98
|
-
|
97
|
+
name: fakeweb
|
98
|
+
requirement: *id005
|
99
99
|
- !ruby/object:Gem::Dependency
|
100
|
-
|
101
|
-
prerelease: false
|
102
|
-
requirement: &id006 !ruby/object:Gem::Requirement
|
100
|
+
version_requirements: &id006 !ruby/object:Gem::Requirement
|
103
101
|
none: false
|
104
102
|
requirements:
|
105
103
|
- - "="
|
@@ -110,12 +108,12 @@ dependencies:
|
|
110
108
|
- 0
|
111
109
|
- 2
|
112
110
|
version: 1.0.2
|
111
|
+
prerelease: false
|
113
112
|
type: :development
|
114
|
-
|
113
|
+
name: awesome_print
|
114
|
+
requirement: *id006
|
115
115
|
- !ruby/object:Gem::Dependency
|
116
|
-
|
117
|
-
prerelease: false
|
118
|
-
requirement: &id007 !ruby/object:Gem::Requirement
|
116
|
+
version_requirements: &id007 !ruby/object:Gem::Requirement
|
119
117
|
none: false
|
120
118
|
requirements:
|
121
119
|
- - "="
|
@@ -127,8 +125,10 @@ dependencies:
|
|
127
125
|
- 2
|
128
126
|
- 2
|
129
127
|
version: 0.9.2.2
|
128
|
+
prerelease: false
|
130
129
|
type: :development
|
131
|
-
|
130
|
+
name: rake
|
131
|
+
requirement: *id007
|
132
132
|
description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
|
133
133
|
email:
|
134
134
|
- jaimeiniesta@gmail.com
|
@@ -154,6 +154,7 @@ files:
|
|
154
154
|
- samples/basic_scraping.rb
|
155
155
|
- samples/spider.rb
|
156
156
|
- spec/fixtures/alazan.com.response
|
157
|
+
- spec/fixtures/empty_page.response
|
157
158
|
- spec/fixtures/guardian.co.uk.response
|
158
159
|
- spec/fixtures/iteh.at.response
|
159
160
|
- spec/fixtures/nonhttp.response
|
@@ -196,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
196
197
|
requirements: []
|
197
198
|
|
198
199
|
rubyforge_project:
|
199
|
-
rubygems_version: 1.8.
|
200
|
+
rubygems_version: 1.8.24
|
200
201
|
signing_key:
|
201
202
|
specification_version: 3
|
202
203
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL
|