metainspector 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +2 -1
- data/lib/meta_inspector/scraper.rb +15 -1
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +3 -1
- data/spec/fixtures/alazan.com.response +377 -0
- data/spec/fixtures/iteh.at.response +971 -0
- data/spec/fixtures/pagerankalert.com.response +186 -0
- data/spec/fixtures/tea-tron.com.response +957 -0
- data/spec/fixtures/theonion.com.response +1061 -0
- data/spec/metainspector_spec.rb +43 -4
- data/spec/spec_helper.rb +10 -1
- metadata +28 -3
data/spec/metainspector_spec.rb
CHANGED
@@ -5,6 +5,9 @@ require File.join(File.dirname(__FILE__), "/spec_helper")
|
|
5
5
|
describe MetaInspector do
|
6
6
|
|
7
7
|
context 'Initialization' do
|
8
|
+
|
9
|
+
FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
10
|
+
|
8
11
|
it 'should accept an URL with a scheme' do
|
9
12
|
@m = MetaInspector.new('http://pagerankalert.com')
|
10
13
|
@m.url.should == 'http://pagerankalert.com'
|
@@ -17,6 +20,12 @@ describe MetaInspector do
|
|
17
20
|
end
|
18
21
|
|
19
22
|
context 'Doing a basic scrape' do
|
23
|
+
|
24
|
+
FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
25
|
+
FakeWeb.register_uri(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/", :response => fixture_file("theonion.com.response"))
|
26
|
+
FakeWeb.register_uri(:get, "http://www.iteh.at", :response => fixture_file("iteh.at.response"))
|
27
|
+
FakeWeb.register_uri(:get, "http://www.tea-tron.com/jbravo/blog/", :response => fixture_file("tea-tron.com.response"))
|
28
|
+
|
20
29
|
EXPECTED_TITLE = 'PageRankAlert.com :: Track your PageRank changes'
|
21
30
|
|
22
31
|
before(:each) do
|
@@ -36,10 +45,6 @@ describe MetaInspector do
|
|
36
45
|
@m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
|
37
46
|
end
|
38
47
|
|
39
|
-
it "should get the links" do
|
40
|
-
@m.links.size.should == 9
|
41
|
-
end
|
42
|
-
|
43
48
|
it "should have a Nokogiri::HTML::Document as parsed_document" do
|
44
49
|
@m.parsed_document.class.should == Nokogiri::HTML::Document
|
45
50
|
end
|
@@ -59,6 +64,36 @@ describe MetaInspector do
|
|
59
64
|
end
|
60
65
|
end
|
61
66
|
|
67
|
+
context 'Links' do
|
68
|
+
before(:each) do
|
69
|
+
@m = MetaInspector.new('http://pagerankalert.com')
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should get the links" do
|
73
|
+
@m.links.should == [
|
74
|
+
"/",
|
75
|
+
"/es?language=es",
|
76
|
+
"/users/sign_up",
|
77
|
+
"/users/sign_in",
|
78
|
+
"http://pagerankalert.posterous.com",
|
79
|
+
"http://twitter.com/pagerankalert",
|
80
|
+
"http://twitter.com/share"
|
81
|
+
]
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should convert links to absolute urls" do
|
85
|
+
@m.absolute_links.should == [
|
86
|
+
"http://pagerankalert.com/",
|
87
|
+
"http://pagerankalert.com/es?language=es",
|
88
|
+
"http://pagerankalert.com/users/sign_up",
|
89
|
+
"http://pagerankalert.com/users/sign_in",
|
90
|
+
"http://pagerankalert.posterous.com",
|
91
|
+
"http://twitter.com/pagerankalert",
|
92
|
+
"http://twitter.com/share"
|
93
|
+
]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
62
97
|
context 'Getting meta tags by ghost methods' do
|
63
98
|
before(:each) do
|
64
99
|
@m = MetaInspector.new('http://pagerankalert.com')
|
@@ -97,6 +132,10 @@ describe MetaInspector do
|
|
97
132
|
end
|
98
133
|
|
99
134
|
context 'Charset detection' do
|
135
|
+
|
136
|
+
FakeWeb.register_uri(:get, "http://www.pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
137
|
+
FakeWeb.register_uri(:get, "http://www.alazan.com", :response => fixture_file("alazan.com.response"))
|
138
|
+
|
100
139
|
it "should detect windows-1252 charset" do
|
101
140
|
@m = MetaInspector.new('http://www.alazan.com')
|
102
141
|
@m.charset.should == "windows-1252"
|
data/spec/spec_helper.rb
CHANGED
@@ -1,4 +1,13 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
3
|
$: << File.join(File.dirname(__FILE__), "/../lib")
|
4
|
-
require 'meta_inspector'
|
4
|
+
require 'meta_inspector'
|
5
|
+
require 'fakeweb'
|
6
|
+
|
7
|
+
FakeWeb.allow_net_connect = false
|
8
|
+
|
9
|
+
def fixture_file(filename)
|
10
|
+
return '' if filename == ''
|
11
|
+
file_path = File.expand_path(File.dirname(__FILE__) + '/fixtures/' + filename)
|
12
|
+
File.read(file_path)
|
13
|
+
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
-
|
7
|
+
- 5
|
8
8
|
- 0
|
9
|
-
version: 1.
|
9
|
+
version: 1.5.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Jaime Iniesta
|
@@ -62,6 +62,21 @@ dependencies:
|
|
62
62
|
version: 2.6.0
|
63
63
|
type: :development
|
64
64
|
version_requirements: *id003
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
name: fakeweb
|
67
|
+
prerelease: false
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ~>
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
segments:
|
74
|
+
- 1
|
75
|
+
- 3
|
76
|
+
- 0
|
77
|
+
version: 1.3.0
|
78
|
+
type: :development
|
79
|
+
version_requirements: *id004
|
65
80
|
description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
|
66
81
|
email:
|
67
82
|
- jaimeiniesta@gmail.com
|
@@ -85,10 +100,15 @@ files:
|
|
85
100
|
- meta_inspector.gemspec
|
86
101
|
- samples/basic_scraping.rb
|
87
102
|
- samples/spider.rb
|
103
|
+
- spec/fixtures/alazan.com.response
|
104
|
+
- spec/fixtures/iteh.at.response
|
105
|
+
- spec/fixtures/pagerankalert.com.response
|
106
|
+
- spec/fixtures/tea-tron.com.response
|
107
|
+
- spec/fixtures/theonion.com.response
|
88
108
|
- spec/metainspector_spec.rb
|
89
109
|
- spec/spec_helper.rb
|
90
110
|
has_rdoc: true
|
91
|
-
homepage: https://
|
111
|
+
homepage: https://github.com/jaimeiniesta/metainspector
|
92
112
|
licenses: []
|
93
113
|
|
94
114
|
post_install_message:
|
@@ -120,5 +140,10 @@ signing_key:
|
|
120
140
|
specification_version: 3
|
121
141
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL
|
122
142
|
test_files:
|
143
|
+
- spec/fixtures/alazan.com.response
|
144
|
+
- spec/fixtures/iteh.at.response
|
145
|
+
- spec/fixtures/pagerankalert.com.response
|
146
|
+
- spec/fixtures/tea-tron.com.response
|
147
|
+
- spec/fixtures/theonion.com.response
|
123
148
|
- spec/metainspector_spec.rb
|
124
149
|
- spec/spec_helper.rb
|