metainspector 1.7.1 → 1.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +1 -0
- data/lib/meta_inspector/scraper.rb +18 -4
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +20 -25
- data/spec/fixtures/guardian.co.uk.response +3618 -0
- data/spec/fixtures/theonion-no-description.com.response +1060 -0
- data/spec/metainspector_spec.rb +22 -3
- metadata +78 -126
data/spec/metainspector_spec.rb
CHANGED
@@ -25,6 +25,7 @@ describe MetaInspector do
|
|
25
25
|
FakeWeb.register_uri(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/", :response => fixture_file("theonion.com.response"))
|
26
26
|
FakeWeb.register_uri(:get, "http://www.iteh.at", :response => fixture_file("iteh.at.response"))
|
27
27
|
FakeWeb.register_uri(:get, "http://www.tea-tron.com/jbravo/blog/", :response => fixture_file("tea-tron.com.response"))
|
28
|
+
FakeWeb.register_uri(:get, "http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups", :response => fixture_file("guardian.co.uk.response"))
|
28
29
|
|
29
30
|
EXPECTED_TITLE = 'PageRankAlert.com :: Track your PageRank changes'
|
30
31
|
|
@@ -45,12 +46,18 @@ describe MetaInspector do
|
|
45
46
|
@m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
|
46
47
|
@m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
|
47
48
|
end
|
48
|
-
|
49
|
-
it "should find all page images" do
|
49
|
+
|
50
|
+
it "should find all page images" do
|
50
51
|
@m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
|
51
52
|
@m.images == ["/images/pagerank_alert.png?1309512337"]
|
52
53
|
end
|
53
54
|
|
55
|
+
it "should ignore malformed image tags" do
|
56
|
+
# There is an image tag without a source. The scraper should not fatal.
|
57
|
+
@m = MetaInspector.new("http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups")
|
58
|
+
@m.images.size.should == 11
|
59
|
+
end
|
60
|
+
|
54
61
|
it "should have a Nokogiri::HTML::Document as parsed_document" do
|
55
62
|
@m.parsed_document.class.should == Nokogiri::HTML::Document
|
56
63
|
end
|
@@ -69,7 +76,19 @@ describe MetaInspector do
|
|
69
76
|
@m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
|
70
77
|
end
|
71
78
|
end
|
72
|
-
|
79
|
+
|
80
|
+
context 'Page with missing meta description' do
|
81
|
+
FakeWeb.register_uri(:get, "http://theonion-no-description.com", :response => fixture_file("theonion-no-description.com.response"))
|
82
|
+
|
83
|
+
it "should find secondary description" do
|
84
|
+
@m = MetaInspector.new('http://theonion-no-description.com')
|
85
|
+
@m.description == "SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday,"+
|
86
|
+
" an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description."
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
73
92
|
context 'Links' do
|
74
93
|
before(:each) do
|
75
94
|
@m = MetaInspector.new('http://pagerankalert.com')
|
metadata
CHANGED
@@ -1,137 +1,101 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 1
|
7
|
-
- 7
|
8
|
-
- 1
|
9
|
-
version: 1.7.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.8.2
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Jaime Iniesta
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-11-07 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: nokogiri
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &70348297877840 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 1
|
30
|
-
- 5
|
31
|
-
- 0
|
18
|
+
requirements:
|
19
|
+
- - =
|
20
|
+
- !ruby/object:Gem::Version
|
32
21
|
version: 1.5.0
|
33
22
|
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: charguess
|
37
23
|
prerelease: false
|
38
|
-
|
24
|
+
version_requirements: *70348297877840
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: charguess
|
27
|
+
requirement: &70348297900640 !ruby/object:Gem::Requirement
|
39
28
|
none: false
|
40
|
-
requirements:
|
41
|
-
- -
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
segments:
|
44
|
-
- 1
|
45
|
-
- 3
|
46
|
-
- 20110226181011
|
29
|
+
requirements:
|
30
|
+
- - =
|
31
|
+
- !ruby/object:Gem::Version
|
47
32
|
version: 1.3.20110226181011
|
48
33
|
type: :runtime
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: rash
|
52
34
|
prerelease: false
|
53
|
-
|
35
|
+
version_requirements: *70348297900640
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rash
|
38
|
+
requirement: &70348297900180 !ruby/object:Gem::Requirement
|
54
39
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
segments:
|
59
|
-
- 0
|
60
|
-
- 3
|
61
|
-
- 0
|
40
|
+
requirements:
|
41
|
+
- - =
|
42
|
+
- !ruby/object:Gem::Version
|
62
43
|
version: 0.3.0
|
63
44
|
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
66
|
-
name: rspec
|
67
45
|
prerelease: false
|
68
|
-
|
46
|
+
version_requirements: *70348297900180
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: rspec
|
49
|
+
requirement: &70348297899720 !ruby/object:Gem::Requirement
|
69
50
|
none: false
|
70
|
-
requirements:
|
71
|
-
- -
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
segments:
|
74
|
-
- 2
|
75
|
-
- 6
|
76
|
-
- 0
|
51
|
+
requirements:
|
52
|
+
- - =
|
53
|
+
- !ruby/object:Gem::Version
|
77
54
|
version: 2.6.0
|
78
55
|
type: :development
|
79
|
-
version_requirements: *id004
|
80
|
-
- !ruby/object:Gem::Dependency
|
81
|
-
name: fakeweb
|
82
56
|
prerelease: false
|
83
|
-
|
57
|
+
version_requirements: *70348297899720
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: fakeweb
|
60
|
+
requirement: &70348297899260 !ruby/object:Gem::Requirement
|
84
61
|
none: false
|
85
|
-
requirements:
|
86
|
-
- -
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
segments:
|
89
|
-
- 1
|
90
|
-
- 3
|
91
|
-
- 0
|
62
|
+
requirements:
|
63
|
+
- - =
|
64
|
+
- !ruby/object:Gem::Version
|
92
65
|
version: 1.3.0
|
93
66
|
type: :development
|
94
|
-
version_requirements: *id005
|
95
|
-
- !ruby/object:Gem::Dependency
|
96
|
-
name: awesome_print
|
97
67
|
prerelease: false
|
98
|
-
|
68
|
+
version_requirements: *70348297899260
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: awesome_print
|
71
|
+
requirement: &70348297898800 !ruby/object:Gem::Requirement
|
99
72
|
none: false
|
100
|
-
requirements:
|
101
|
-
- -
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
segments:
|
104
|
-
- 0
|
105
|
-
- 4
|
106
|
-
- 0
|
73
|
+
requirements:
|
74
|
+
- - =
|
75
|
+
- !ruby/object:Gem::Version
|
107
76
|
version: 0.4.0
|
108
77
|
type: :development
|
109
|
-
version_requirements: *id006
|
110
|
-
- !ruby/object:Gem::Dependency
|
111
|
-
name: rake
|
112
78
|
prerelease: false
|
113
|
-
|
79
|
+
version_requirements: *70348297898800
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: rake
|
82
|
+
requirement: &70348297898340 !ruby/object:Gem::Requirement
|
114
83
|
none: false
|
115
|
-
requirements:
|
116
|
-
- -
|
117
|
-
- !ruby/object:Gem::Version
|
118
|
-
segments:
|
119
|
-
- 0
|
120
|
-
- 9
|
121
|
-
- 2
|
84
|
+
requirements:
|
85
|
+
- - =
|
86
|
+
- !ruby/object:Gem::Version
|
122
87
|
version: 0.9.2
|
123
88
|
type: :development
|
124
|
-
|
125
|
-
|
126
|
-
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *70348297898340
|
91
|
+
description: MetaInspector lets you scrape a web page and get its title, charset,
|
92
|
+
link and meta tags
|
93
|
+
email:
|
127
94
|
- jaimeiniesta@gmail.com
|
128
95
|
executables: []
|
129
|
-
|
130
96
|
extensions: []
|
131
|
-
|
132
97
|
extra_rdoc_files: []
|
133
|
-
|
134
|
-
files:
|
98
|
+
files:
|
135
99
|
- .gitignore
|
136
100
|
- .rspec.example
|
137
101
|
- .travis.yml
|
@@ -147,49 +111,37 @@ files:
|
|
147
111
|
- samples/basic_scraping.rb
|
148
112
|
- samples/spider.rb
|
149
113
|
- spec/fixtures/alazan.com.response
|
114
|
+
- spec/fixtures/guardian.co.uk.response
|
150
115
|
- spec/fixtures/iteh.at.response
|
151
116
|
- spec/fixtures/pagerankalert.com.response
|
152
117
|
- spec/fixtures/tea-tron.com.response
|
118
|
+
- spec/fixtures/theonion-no-description.com.response
|
153
119
|
- spec/fixtures/theonion.com.response
|
154
120
|
- spec/metainspector_spec.rb
|
155
121
|
- spec/spec_helper.rb
|
156
|
-
has_rdoc: true
|
157
122
|
homepage: https://github.com/jaimeiniesta/metainspector
|
158
123
|
licenses: []
|
159
|
-
|
160
124
|
post_install_message:
|
161
125
|
rdoc_options: []
|
162
|
-
|
163
|
-
require_paths:
|
126
|
+
require_paths:
|
164
127
|
- lib
|
165
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
128
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
166
129
|
none: false
|
167
|
-
requirements:
|
168
|
-
- -
|
169
|
-
- !ruby/object:Gem::Version
|
170
|
-
|
171
|
-
|
172
|
-
version: "0"
|
173
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
174
135
|
none: false
|
175
|
-
requirements:
|
176
|
-
- -
|
177
|
-
- !ruby/object:Gem::Version
|
178
|
-
|
179
|
-
- 0
|
180
|
-
version: "0"
|
136
|
+
requirements:
|
137
|
+
- - ! '>='
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0'
|
181
140
|
requirements: []
|
182
|
-
|
183
|
-
|
184
|
-
rubygems_version: 1.3.7
|
141
|
+
rubyforge_project:
|
142
|
+
rubygems_version: 1.8.6
|
185
143
|
signing_key:
|
186
144
|
specification_version: 3
|
187
|
-
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
|
188
|
-
|
189
|
-
|
190
|
-
- spec/fixtures/iteh.at.response
|
191
|
-
- spec/fixtures/pagerankalert.com.response
|
192
|
-
- spec/fixtures/tea-tron.com.response
|
193
|
-
- spec/fixtures/theonion.com.response
|
194
|
-
- spec/metainspector_spec.rb
|
195
|
-
- spec/spec_helper.rb
|
145
|
+
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
|
146
|
+
with metadata from a given URL
|
147
|
+
test_files: []
|