metainspector 1.7.1 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -0
- data/lib/meta_inspector/scraper.rb +18 -4
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +20 -25
- data/spec/fixtures/guardian.co.uk.response +3618 -0
- data/spec/fixtures/theonion-no-description.com.response +1060 -0
- data/spec/metainspector_spec.rb +22 -3
- metadata +78 -126
data/spec/metainspector_spec.rb
CHANGED
@@ -25,6 +25,7 @@ describe MetaInspector do
|
|
25
25
|
FakeWeb.register_uri(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/", :response => fixture_file("theonion.com.response"))
|
26
26
|
FakeWeb.register_uri(:get, "http://www.iteh.at", :response => fixture_file("iteh.at.response"))
|
27
27
|
FakeWeb.register_uri(:get, "http://www.tea-tron.com/jbravo/blog/", :response => fixture_file("tea-tron.com.response"))
|
28
|
+
FakeWeb.register_uri(:get, "http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups", :response => fixture_file("guardian.co.uk.response"))
|
28
29
|
|
29
30
|
EXPECTED_TITLE = 'PageRankAlert.com :: Track your PageRank changes'
|
30
31
|
|
@@ -45,12 +46,18 @@ describe MetaInspector do
|
|
45
46
|
@m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
|
46
47
|
@m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
|
47
48
|
end
|
48
|
-
|
49
|
-
it "should find all page images" do
|
49
|
+
|
50
|
+
it "should find all page images" do
|
50
51
|
@m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
|
51
52
|
@m.images == ["/images/pagerank_alert.png?1309512337"]
|
52
53
|
end
|
53
54
|
|
55
|
+
it "should ignore malformed image tags" do
|
56
|
+
# There is an image tag without a source. The scraper should not fatal.
|
57
|
+
@m = MetaInspector.new("http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups")
|
58
|
+
@m.images.size.should == 11
|
59
|
+
end
|
60
|
+
|
54
61
|
it "should have a Nokogiri::HTML::Document as parsed_document" do
|
55
62
|
@m.parsed_document.class.should == Nokogiri::HTML::Document
|
56
63
|
end
|
@@ -69,7 +76,19 @@ describe MetaInspector do
|
|
69
76
|
@m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
|
70
77
|
end
|
71
78
|
end
|
72
|
-
|
79
|
+
|
80
|
+
context 'Page with missing meta description' do
|
81
|
+
FakeWeb.register_uri(:get, "http://theonion-no-description.com", :response => fixture_file("theonion-no-description.com.response"))
|
82
|
+
|
83
|
+
it "should find secondary description" do
|
84
|
+
@m = MetaInspector.new('http://theonion-no-description.com')
|
85
|
+
@m.description == "SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday,"+
|
86
|
+
" an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description."
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
73
92
|
context 'Links' do
|
74
93
|
before(:each) do
|
75
94
|
@m = MetaInspector.new('http://pagerankalert.com')
|
metadata
CHANGED
@@ -1,137 +1,101 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 1
|
7
|
-
- 7
|
8
|
-
- 1
|
9
|
-
version: 1.7.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.8.2
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Jaime Iniesta
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-11-07 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: nokogiri
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &70348297877840 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 1
|
30
|
-
- 5
|
31
|
-
- 0
|
18
|
+
requirements:
|
19
|
+
- - =
|
20
|
+
- !ruby/object:Gem::Version
|
32
21
|
version: 1.5.0
|
33
22
|
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: charguess
|
37
23
|
prerelease: false
|
38
|
-
|
24
|
+
version_requirements: *70348297877840
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: charguess
|
27
|
+
requirement: &70348297900640 !ruby/object:Gem::Requirement
|
39
28
|
none: false
|
40
|
-
requirements:
|
41
|
-
- -
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
segments:
|
44
|
-
- 1
|
45
|
-
- 3
|
46
|
-
- 20110226181011
|
29
|
+
requirements:
|
30
|
+
- - =
|
31
|
+
- !ruby/object:Gem::Version
|
47
32
|
version: 1.3.20110226181011
|
48
33
|
type: :runtime
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: rash
|
52
34
|
prerelease: false
|
53
|
-
|
35
|
+
version_requirements: *70348297900640
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rash
|
38
|
+
requirement: &70348297900180 !ruby/object:Gem::Requirement
|
54
39
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
segments:
|
59
|
-
- 0
|
60
|
-
- 3
|
61
|
-
- 0
|
40
|
+
requirements:
|
41
|
+
- - =
|
42
|
+
- !ruby/object:Gem::Version
|
62
43
|
version: 0.3.0
|
63
44
|
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
66
|
-
name: rspec
|
67
45
|
prerelease: false
|
68
|
-
|
46
|
+
version_requirements: *70348297900180
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: rspec
|
49
|
+
requirement: &70348297899720 !ruby/object:Gem::Requirement
|
69
50
|
none: false
|
70
|
-
requirements:
|
71
|
-
- -
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
segments:
|
74
|
-
- 2
|
75
|
-
- 6
|
76
|
-
- 0
|
51
|
+
requirements:
|
52
|
+
- - =
|
53
|
+
- !ruby/object:Gem::Version
|
77
54
|
version: 2.6.0
|
78
55
|
type: :development
|
79
|
-
version_requirements: *id004
|
80
|
-
- !ruby/object:Gem::Dependency
|
81
|
-
name: fakeweb
|
82
56
|
prerelease: false
|
83
|
-
|
57
|
+
version_requirements: *70348297899720
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: fakeweb
|
60
|
+
requirement: &70348297899260 !ruby/object:Gem::Requirement
|
84
61
|
none: false
|
85
|
-
requirements:
|
86
|
-
- -
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
segments:
|
89
|
-
- 1
|
90
|
-
- 3
|
91
|
-
- 0
|
62
|
+
requirements:
|
63
|
+
- - =
|
64
|
+
- !ruby/object:Gem::Version
|
92
65
|
version: 1.3.0
|
93
66
|
type: :development
|
94
|
-
version_requirements: *id005
|
95
|
-
- !ruby/object:Gem::Dependency
|
96
|
-
name: awesome_print
|
97
67
|
prerelease: false
|
98
|
-
|
68
|
+
version_requirements: *70348297899260
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: awesome_print
|
71
|
+
requirement: &70348297898800 !ruby/object:Gem::Requirement
|
99
72
|
none: false
|
100
|
-
requirements:
|
101
|
-
- -
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
segments:
|
104
|
-
- 0
|
105
|
-
- 4
|
106
|
-
- 0
|
73
|
+
requirements:
|
74
|
+
- - =
|
75
|
+
- !ruby/object:Gem::Version
|
107
76
|
version: 0.4.0
|
108
77
|
type: :development
|
109
|
-
version_requirements: *id006
|
110
|
-
- !ruby/object:Gem::Dependency
|
111
|
-
name: rake
|
112
78
|
prerelease: false
|
113
|
-
|
79
|
+
version_requirements: *70348297898800
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: rake
|
82
|
+
requirement: &70348297898340 !ruby/object:Gem::Requirement
|
114
83
|
none: false
|
115
|
-
requirements:
|
116
|
-
- -
|
117
|
-
- !ruby/object:Gem::Version
|
118
|
-
segments:
|
119
|
-
- 0
|
120
|
-
- 9
|
121
|
-
- 2
|
84
|
+
requirements:
|
85
|
+
- - =
|
86
|
+
- !ruby/object:Gem::Version
|
122
87
|
version: 0.9.2
|
123
88
|
type: :development
|
124
|
-
|
125
|
-
|
126
|
-
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *70348297898340
|
91
|
+
description: MetaInspector lets you scrape a web page and get its title, charset,
|
92
|
+
link and meta tags
|
93
|
+
email:
|
127
94
|
- jaimeiniesta@gmail.com
|
128
95
|
executables: []
|
129
|
-
|
130
96
|
extensions: []
|
131
|
-
|
132
97
|
extra_rdoc_files: []
|
133
|
-
|
134
|
-
files:
|
98
|
+
files:
|
135
99
|
- .gitignore
|
136
100
|
- .rspec.example
|
137
101
|
- .travis.yml
|
@@ -147,49 +111,37 @@ files:
|
|
147
111
|
- samples/basic_scraping.rb
|
148
112
|
- samples/spider.rb
|
149
113
|
- spec/fixtures/alazan.com.response
|
114
|
+
- spec/fixtures/guardian.co.uk.response
|
150
115
|
- spec/fixtures/iteh.at.response
|
151
116
|
- spec/fixtures/pagerankalert.com.response
|
152
117
|
- spec/fixtures/tea-tron.com.response
|
118
|
+
- spec/fixtures/theonion-no-description.com.response
|
153
119
|
- spec/fixtures/theonion.com.response
|
154
120
|
- spec/metainspector_spec.rb
|
155
121
|
- spec/spec_helper.rb
|
156
|
-
has_rdoc: true
|
157
122
|
homepage: https://github.com/jaimeiniesta/metainspector
|
158
123
|
licenses: []
|
159
|
-
|
160
124
|
post_install_message:
|
161
125
|
rdoc_options: []
|
162
|
-
|
163
|
-
require_paths:
|
126
|
+
require_paths:
|
164
127
|
- lib
|
165
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
128
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
166
129
|
none: false
|
167
|
-
requirements:
|
168
|
-
- -
|
169
|
-
- !ruby/object:Gem::Version
|
170
|
-
|
171
|
-
|
172
|
-
version: "0"
|
173
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
174
135
|
none: false
|
175
|
-
requirements:
|
176
|
-
- -
|
177
|
-
- !ruby/object:Gem::Version
|
178
|
-
|
179
|
-
- 0
|
180
|
-
version: "0"
|
136
|
+
requirements:
|
137
|
+
- - ! '>='
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0'
|
181
140
|
requirements: []
|
182
|
-
|
183
|
-
|
184
|
-
rubygems_version: 1.3.7
|
141
|
+
rubyforge_project:
|
142
|
+
rubygems_version: 1.8.6
|
185
143
|
signing_key:
|
186
144
|
specification_version: 3
|
187
|
-
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
|
188
|
-
|
189
|
-
|
190
|
-
- spec/fixtures/iteh.at.response
|
191
|
-
- spec/fixtures/pagerankalert.com.response
|
192
|
-
- spec/fixtures/tea-tron.com.response
|
193
|
-
- spec/fixtures/theonion.com.response
|
194
|
-
- spec/metainspector_spec.rb
|
195
|
-
- spec/spec_helper.rb
|
145
|
+
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
|
146
|
+
with metadata from a given URL
|
147
|
+
test_files: []
|