metainspector 1.7.1 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -25,6 +25,7 @@ describe MetaInspector do
25
25
  FakeWeb.register_uri(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/", :response => fixture_file("theonion.com.response"))
26
26
  FakeWeb.register_uri(:get, "http://www.iteh.at", :response => fixture_file("iteh.at.response"))
27
27
  FakeWeb.register_uri(:get, "http://www.tea-tron.com/jbravo/blog/", :response => fixture_file("tea-tron.com.response"))
28
+ FakeWeb.register_uri(:get, "http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups", :response => fixture_file("guardian.co.uk.response"))
28
29
 
29
30
  EXPECTED_TITLE = 'PageRankAlert.com :: Track your PageRank changes'
30
31
 
@@ -45,12 +46,18 @@ describe MetaInspector do
45
46
  @m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
46
47
  @m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
47
48
  end
48
-
49
- it "should find all page images" do
49
+
50
+ it "should find all page images" do
50
51
  @m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
51
52
  @m.images == ["/images/pagerank_alert.png?1309512337"]
52
53
  end
53
54
 
55
+ it "should ignore malformed image tags" do
56
+ # There is an image tag without a source. The scraper should not fatal.
57
+ @m = MetaInspector.new("http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups")
58
+ @m.images.size.should == 11
59
+ end
60
+
54
61
  it "should have a Nokogiri::HTML::Document as parsed_document" do
55
62
  @m.parsed_document.class.should == Nokogiri::HTML::Document
56
63
  end
@@ -69,7 +76,19 @@ describe MetaInspector do
69
76
  @m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
70
77
  end
71
78
  end
72
-
79
+
80
+ context 'Page with missing meta description' do
81
+ FakeWeb.register_uri(:get, "http://theonion-no-description.com", :response => fixture_file("theonion-no-description.com.response"))
82
+
83
+ it "should find secondary description" do
84
+ @m = MetaInspector.new('http://theonion-no-description.com')
85
+ @m.description == "SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday,"+
86
+ " an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description."
87
+ end
88
+
89
+ end
90
+
91
+
73
92
  context 'Links' do
74
93
  before(:each) do
75
94
  @m = MetaInspector.new('http://pagerankalert.com')
metadata CHANGED
@@ -1,137 +1,101 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 1
7
- - 7
8
- - 1
9
- version: 1.7.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.8.2
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Jaime Iniesta
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-07-30 00:00:00 +02:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2011-11-07 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: nokogiri
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &70348297877840 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - "="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 1
30
- - 5
31
- - 0
18
+ requirements:
19
+ - - =
20
+ - !ruby/object:Gem::Version
32
21
  version: 1.5.0
33
22
  type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: charguess
37
23
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *70348297877840
25
+ - !ruby/object:Gem::Dependency
26
+ name: charguess
27
+ requirement: &70348297900640 !ruby/object:Gem::Requirement
39
28
  none: false
40
- requirements:
41
- - - "="
42
- - !ruby/object:Gem::Version
43
- segments:
44
- - 1
45
- - 3
46
- - 20110226181011
29
+ requirements:
30
+ - - =
31
+ - !ruby/object:Gem::Version
47
32
  version: 1.3.20110226181011
48
33
  type: :runtime
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: rash
52
34
  prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *70348297900640
36
+ - !ruby/object:Gem::Dependency
37
+ name: rash
38
+ requirement: &70348297900180 !ruby/object:Gem::Requirement
54
39
  none: false
55
- requirements:
56
- - - ~>
57
- - !ruby/object:Gem::Version
58
- segments:
59
- - 0
60
- - 3
61
- - 0
40
+ requirements:
41
+ - - =
42
+ - !ruby/object:Gem::Version
62
43
  version: 0.3.0
63
44
  type: :runtime
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
66
- name: rspec
67
45
  prerelease: false
68
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *70348297900180
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: &70348297899720 !ruby/object:Gem::Requirement
69
50
  none: false
70
- requirements:
71
- - - ~>
72
- - !ruby/object:Gem::Version
73
- segments:
74
- - 2
75
- - 6
76
- - 0
51
+ requirements:
52
+ - - =
53
+ - !ruby/object:Gem::Version
77
54
  version: 2.6.0
78
55
  type: :development
79
- version_requirements: *id004
80
- - !ruby/object:Gem::Dependency
81
- name: fakeweb
82
56
  prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *70348297899720
58
+ - !ruby/object:Gem::Dependency
59
+ name: fakeweb
60
+ requirement: &70348297899260 !ruby/object:Gem::Requirement
84
61
  none: false
85
- requirements:
86
- - - ~>
87
- - !ruby/object:Gem::Version
88
- segments:
89
- - 1
90
- - 3
91
- - 0
62
+ requirements:
63
+ - - =
64
+ - !ruby/object:Gem::Version
92
65
  version: 1.3.0
93
66
  type: :development
94
- version_requirements: *id005
95
- - !ruby/object:Gem::Dependency
96
- name: awesome_print
97
67
  prerelease: false
98
- requirement: &id006 !ruby/object:Gem::Requirement
68
+ version_requirements: *70348297899260
69
+ - !ruby/object:Gem::Dependency
70
+ name: awesome_print
71
+ requirement: &70348297898800 !ruby/object:Gem::Requirement
99
72
  none: false
100
- requirements:
101
- - - ~>
102
- - !ruby/object:Gem::Version
103
- segments:
104
- - 0
105
- - 4
106
- - 0
73
+ requirements:
74
+ - - =
75
+ - !ruby/object:Gem::Version
107
76
  version: 0.4.0
108
77
  type: :development
109
- version_requirements: *id006
110
- - !ruby/object:Gem::Dependency
111
- name: rake
112
78
  prerelease: false
113
- requirement: &id007 !ruby/object:Gem::Requirement
79
+ version_requirements: *70348297898800
80
+ - !ruby/object:Gem::Dependency
81
+ name: rake
82
+ requirement: &70348297898340 !ruby/object:Gem::Requirement
114
83
  none: false
115
- requirements:
116
- - - "="
117
- - !ruby/object:Gem::Version
118
- segments:
119
- - 0
120
- - 9
121
- - 2
84
+ requirements:
85
+ - - =
86
+ - !ruby/object:Gem::Version
122
87
  version: 0.9.2
123
88
  type: :development
124
- version_requirements: *id007
125
- description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
126
- email:
89
+ prerelease: false
90
+ version_requirements: *70348297898340
91
+ description: MetaInspector lets you scrape a web page and get its title, charset,
92
+ link and meta tags
93
+ email:
127
94
  - jaimeiniesta@gmail.com
128
95
  executables: []
129
-
130
96
  extensions: []
131
-
132
97
  extra_rdoc_files: []
133
-
134
- files:
98
+ files:
135
99
  - .gitignore
136
100
  - .rspec.example
137
101
  - .travis.yml
@@ -147,49 +111,37 @@ files:
147
111
  - samples/basic_scraping.rb
148
112
  - samples/spider.rb
149
113
  - spec/fixtures/alazan.com.response
114
+ - spec/fixtures/guardian.co.uk.response
150
115
  - spec/fixtures/iteh.at.response
151
116
  - spec/fixtures/pagerankalert.com.response
152
117
  - spec/fixtures/tea-tron.com.response
118
+ - spec/fixtures/theonion-no-description.com.response
153
119
  - spec/fixtures/theonion.com.response
154
120
  - spec/metainspector_spec.rb
155
121
  - spec/spec_helper.rb
156
- has_rdoc: true
157
122
  homepage: https://github.com/jaimeiniesta/metainspector
158
123
  licenses: []
159
-
160
124
  post_install_message:
161
125
  rdoc_options: []
162
-
163
- require_paths:
126
+ require_paths:
164
127
  - lib
165
- required_ruby_version: !ruby/object:Gem::Requirement
128
+ required_ruby_version: !ruby/object:Gem::Requirement
166
129
  none: false
167
- requirements:
168
- - - ">="
169
- - !ruby/object:Gem::Version
170
- segments:
171
- - 0
172
- version: "0"
173
- required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
174
135
  none: false
175
- requirements:
176
- - - ">="
177
- - !ruby/object:Gem::Version
178
- segments:
179
- - 0
180
- version: "0"
136
+ requirements:
137
+ - - ! '>='
138
+ - !ruby/object:Gem::Version
139
+ version: '0'
181
140
  requirements: []
182
-
183
- rubyforge_project: MetaInspector
184
- rubygems_version: 1.3.7
141
+ rubyforge_project:
142
+ rubygems_version: 1.8.6
185
143
  signing_key:
186
144
  specification_version: 3
187
- summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL
188
- test_files:
189
- - spec/fixtures/alazan.com.response
190
- - spec/fixtures/iteh.at.response
191
- - spec/fixtures/pagerankalert.com.response
192
- - spec/fixtures/tea-tron.com.response
193
- - spec/fixtures/theonion.com.response
194
- - spec/metainspector_spec.rb
195
- - spec/spec_helper.rb
145
+ summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
146
+ with metadata from a given URL
147
+ test_files: []