metainspector 1.15.1 → 1.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/meta_inspector/scraper.rb +4 -4
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/fixtures/relative_links.response +20 -0
- data/spec/metainspector_spec.rb +32 -0
- data/spec/spec_helper.rb +5 -0
- metadata +126 -135
@@ -230,11 +230,11 @@ module MetaInspector
|
|
230
230
|
|
231
231
|
# Convert a relative url like "/users" to an absolute one like "http://example.com/users"
|
232
232
|
# Respecting already absolute URLs like the ones starting with http:, ftp:, telnet:, mailto:, javascript: ...
|
233
|
-
def absolutify_url(
|
234
|
-
if
|
235
|
-
normalize_url(
|
233
|
+
def absolutify_url(uri)
|
234
|
+
if uri =~ /^\w*\:/i
|
235
|
+
normalize_url(uri)
|
236
236
|
else
|
237
|
-
URI.parse(
|
237
|
+
URI.parse(@url).merge(normalize_url(uri)).to_s
|
238
238
|
end
|
239
239
|
rescue URI::InvalidURIError, Addressable::URI::InvalidURIError => e
|
240
240
|
add_fatal_error "Link parsing exception: #{e.message}" and nil
|
@@ -0,0 +1,20 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/1.0.5
|
3
|
+
Date: Thu, 29 Dec 2011 23:10:13 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Content-Length: 15013
|
6
|
+
Last-Modified: Fri, 02 Dec 2011 21:00:49 GMT
|
7
|
+
Connection: keep-alive
|
8
|
+
Accept-Ranges: bytes
|
9
|
+
|
10
|
+
<!DOCTYPE html>
|
11
|
+
<html>
|
12
|
+
<head>
|
13
|
+
<meta charset="utf-8" />
|
14
|
+
<title>Relative links</title>
|
15
|
+
</head>
|
16
|
+
<body>
|
17
|
+
<p>Relative links</p>
|
18
|
+
<a href="about">About</a>
|
19
|
+
</body>
|
20
|
+
</html>
|
data/spec/metainspector_spec.rb
CHANGED
@@ -251,6 +251,38 @@ describe MetaInspector do
|
|
251
251
|
end
|
252
252
|
end
|
253
253
|
|
254
|
+
describe 'Relative links' do
|
255
|
+
describe 'From a root URL' do
|
256
|
+
before(:each) do
|
257
|
+
@m = MetaInspector.new('http://relative.com/')
|
258
|
+
end
|
259
|
+
|
260
|
+
it 'should get the relative links' do
|
261
|
+
@m.internal_links.should == ['http://relative.com/about']
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
describe 'From a document' do
|
266
|
+
before(:each) do
|
267
|
+
@m = MetaInspector.new('http://relative.com/company')
|
268
|
+
end
|
269
|
+
|
270
|
+
it 'should get the relative links' do
|
271
|
+
@m.internal_links.should == ['http://relative.com/about']
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
describe 'From a directory' do
|
276
|
+
before(:each) do
|
277
|
+
@m = MetaInspector.new('http://relative.com/company/')
|
278
|
+
end
|
279
|
+
|
280
|
+
it 'should get the relative links' do
|
281
|
+
@m.internal_links.should == ['http://relative.com/company/about']
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
254
286
|
describe 'Non-HTTP links' do
|
255
287
|
before(:each) do
|
256
288
|
@m = MetaInspector.new('http://example.com/nonhttp')
|
data/spec/spec_helper.rb
CHANGED
@@ -42,6 +42,11 @@ FakeWeb.register_uri(:get, "http://www.inkthemes.com/", :response => fixture_fil
|
|
42
42
|
FakeWeb.register_uri(:get, "http://pagerankalert.com/image.png", :body => "Image", :content_type => "image/png")
|
43
43
|
FakeWeb.register_uri(:get, "http://pagerankalert.com/file.tar.gz", :body => "Image", :content_type => "application/x-gzip")
|
44
44
|
|
45
|
+
# These examples are used to test relative links
|
46
|
+
FakeWeb.register_uri(:get, "http://relative.com/", :response => fixture_file("relative_links.response"))
|
47
|
+
FakeWeb.register_uri(:get, "http://relative.com/company", :response => fixture_file("relative_links.response"))
|
48
|
+
FakeWeb.register_uri(:get, "http://relative.com/company/", :response => fixture_file("relative_links.response"))
|
49
|
+
|
45
50
|
# These examples are used to test the redirections from HTTP to HTTPS and vice versa
|
46
51
|
# http://facebook.com => https://facebook.com
|
47
52
|
FakeWeb.register_uri(:get, "http://facebook.com/", :response => fixture_file("facebook.com.response"))
|
metadata
CHANGED
@@ -1,159 +1,152 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.15.2
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 15
|
9
|
-
- 1
|
10
|
-
version: 1.15.1
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Jaime Iniesta
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2013-03-13 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: nokogiri
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
18
|
+
requirements:
|
26
19
|
- - ~>
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
segments:
|
30
|
-
- 1
|
31
|
-
- 5
|
32
|
-
version: "1.5"
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.5'
|
33
22
|
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: rash
|
37
23
|
prerelease: false
|
38
|
-
|
39
|
-
none: false
|
40
|
-
requirements:
|
41
|
-
- -
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.5'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rash
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - '='
|
36
|
+
- !ruby/object:Gem::Version
|
48
37
|
version: 0.3.2
|
49
38
|
type: :runtime
|
50
|
-
version_requirements: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
|
-
name: open_uri_redirections
|
53
39
|
prerelease: false
|
54
|
-
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - '='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.3.2
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: open_uri_redirections
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
55
49
|
none: false
|
56
|
-
requirements:
|
50
|
+
requirements:
|
57
51
|
- - ~>
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
hash: 27
|
60
|
-
segments:
|
61
|
-
- 0
|
62
|
-
- 1
|
63
|
-
- 0
|
52
|
+
- !ruby/object:Gem::Version
|
64
53
|
version: 0.1.0
|
65
54
|
type: :runtime
|
66
|
-
version_requirements: *id003
|
67
|
-
- !ruby/object:Gem::Dependency
|
68
|
-
name: addressable
|
69
55
|
prerelease: false
|
70
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.1.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: addressable
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
71
65
|
none: false
|
72
|
-
requirements:
|
66
|
+
requirements:
|
73
67
|
- - ~>
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
hash: 7
|
76
|
-
segments:
|
77
|
-
- 2
|
78
|
-
- 3
|
79
|
-
- 2
|
68
|
+
- !ruby/object:Gem::Version
|
80
69
|
version: 2.3.2
|
81
70
|
type: :runtime
|
82
|
-
version_requirements: *id004
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rspec
|
85
71
|
prerelease: false
|
86
|
-
|
87
|
-
none: false
|
88
|
-
requirements:
|
89
|
-
- -
|
90
|
-
- !ruby/object:Gem::Version
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 2.3.2
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rspec
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - '='
|
84
|
+
- !ruby/object:Gem::Version
|
96
85
|
version: 2.12.0
|
97
86
|
type: :development
|
98
|
-
version_requirements: *id005
|
99
|
-
- !ruby/object:Gem::Dependency
|
100
|
-
name: fakeweb
|
101
87
|
prerelease: false
|
102
|
-
|
103
|
-
none: false
|
104
|
-
requirements:
|
105
|
-
- -
|
106
|
-
- !ruby/object:Gem::Version
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - '='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 2.12.0
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: fakeweb
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - '='
|
100
|
+
- !ruby/object:Gem::Version
|
112
101
|
version: 1.3.0
|
113
102
|
type: :development
|
114
|
-
version_requirements: *id006
|
115
|
-
- !ruby/object:Gem::Dependency
|
116
|
-
name: awesome_print
|
117
103
|
prerelease: false
|
118
|
-
|
119
|
-
none: false
|
120
|
-
requirements:
|
121
|
-
- -
|
122
|
-
- !ruby/object:Gem::Version
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - '='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.3.0
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: awesome_print
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - '='
|
116
|
+
- !ruby/object:Gem::Version
|
128
117
|
version: 1.1.0
|
129
118
|
type: :development
|
130
|
-
version_requirements: *id007
|
131
|
-
- !ruby/object:Gem::Dependency
|
132
|
-
name: rake
|
133
119
|
prerelease: false
|
134
|
-
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - '='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.1.0
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: rake
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
135
129
|
none: false
|
136
|
-
requirements:
|
130
|
+
requirements:
|
137
131
|
- - ~>
|
138
|
-
- !ruby/object:Gem::Version
|
139
|
-
hash: 73
|
140
|
-
segments:
|
141
|
-
- 10
|
142
|
-
- 0
|
143
|
-
- 3
|
132
|
+
- !ruby/object:Gem::Version
|
144
133
|
version: 10.0.3
|
145
134
|
type: :development
|
146
|
-
|
147
|
-
|
148
|
-
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ~>
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 10.0.3
|
142
|
+
description: MetaInspector lets you scrape a web page and get its title, charset,
|
143
|
+
link and meta tags
|
144
|
+
email:
|
149
145
|
- jaimeiniesta@gmail.com
|
150
146
|
executables: []
|
151
|
-
|
152
147
|
extensions: []
|
153
|
-
|
154
148
|
extra_rdoc_files: []
|
155
|
-
|
156
|
-
files:
|
149
|
+
files:
|
157
150
|
- .gitignore
|
158
151
|
- .rspec.example
|
159
152
|
- .travis.yml
|
@@ -185,6 +178,7 @@ files:
|
|
185
178
|
- spec/fixtures/nonhttp.response
|
186
179
|
- spec/fixtures/pagerankalert.com.response
|
187
180
|
- spec/fixtures/protocol_relative.response
|
181
|
+
- spec/fixtures/relative_links.response
|
188
182
|
- spec/fixtures/tea-tron.com.response
|
189
183
|
- spec/fixtures/theonion-no-description.com.response
|
190
184
|
- spec/fixtures/theonion.com.response
|
@@ -198,36 +192,33 @@ files:
|
|
198
192
|
- spec/spec_helper.rb
|
199
193
|
homepage: https://github.com/jaimeiniesta/metainspector
|
200
194
|
licenses: []
|
201
|
-
|
202
195
|
post_install_message:
|
203
196
|
rdoc_options: []
|
204
|
-
|
205
|
-
require_paths:
|
197
|
+
require_paths:
|
206
198
|
- lib
|
207
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
199
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
208
200
|
none: false
|
209
|
-
requirements:
|
210
|
-
- -
|
211
|
-
- !ruby/object:Gem::Version
|
212
|
-
|
213
|
-
segments:
|
201
|
+
requirements:
|
202
|
+
- - ! '>='
|
203
|
+
- !ruby/object:Gem::Version
|
204
|
+
version: '0'
|
205
|
+
segments:
|
214
206
|
- 0
|
215
|
-
|
216
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
207
|
+
hash: 500814243782127721
|
208
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
217
209
|
none: false
|
218
|
-
requirements:
|
219
|
-
- -
|
220
|
-
- !ruby/object:Gem::Version
|
221
|
-
|
222
|
-
segments:
|
210
|
+
requirements:
|
211
|
+
- - ! '>='
|
212
|
+
- !ruby/object:Gem::Version
|
213
|
+
version: '0'
|
214
|
+
segments:
|
223
215
|
- 0
|
224
|
-
|
216
|
+
hash: 500814243782127721
|
225
217
|
requirements: []
|
226
|
-
|
227
218
|
rubyforge_project:
|
228
|
-
rubygems_version: 1.8.
|
219
|
+
rubygems_version: 1.8.25
|
229
220
|
signing_key:
|
230
221
|
specification_version: 3
|
231
|
-
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
|
222
|
+
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
|
223
|
+
with metadata from a given URL
|
232
224
|
test_files: []
|
233
|
-
|