metainspector 1.7.1 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,7 @@ describe MetaInspector do
25
25
  FakeWeb.register_uri(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/", :response => fixture_file("theonion.com.response"))
26
26
  FakeWeb.register_uri(:get, "http://www.iteh.at", :response => fixture_file("iteh.at.response"))
27
27
  FakeWeb.register_uri(:get, "http://www.tea-tron.com/jbravo/blog/", :response => fixture_file("tea-tron.com.response"))
28
+ FakeWeb.register_uri(:get, "http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups", :response => fixture_file("guardian.co.uk.response"))
28
29
 
29
30
  EXPECTED_TITLE = 'PageRankAlert.com :: Track your PageRank changes'
30
31
 
@@ -45,12 +46,18 @@ describe MetaInspector do
45
46
  @m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
46
47
  @m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
47
48
  end
48
-
49
- it "should find all page images" do
49
+
50
+ it "should find all page images" do
50
51
  @m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
51
52
  @m.images == ["/images/pagerank_alert.png?1309512337"]
52
53
  end
53
54
 
55
+ it "should ignore malformed image tags" do
56
+ # There is an image tag without a source. The scraper should not fatal.
57
+ @m = MetaInspector.new("http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups")
58
+ @m.images.size.should == 11
59
+ end
60
+
54
61
  it "should have a Nokogiri::HTML::Document as parsed_document" do
55
62
  @m.parsed_document.class.should == Nokogiri::HTML::Document
56
63
  end
@@ -69,7 +76,19 @@ describe MetaInspector do
69
76
  @m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
70
77
  end
71
78
  end
72
-
79
+
80
+ context 'Page with missing meta description' do
81
+ FakeWeb.register_uri(:get, "http://theonion-no-description.com", :response => fixture_file("theonion-no-description.com.response"))
82
+
83
+ it "should find secondary description" do
84
+ @m = MetaInspector.new('http://theonion-no-description.com')
85
+ @m.description == "SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday,"+
86
+ " an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description."
87
+ end
88
+
89
+ end
90
+
91
+
73
92
  context 'Links' do
74
93
  before(:each) do
75
94
  @m = MetaInspector.new('http://pagerankalert.com')
metadata CHANGED
@@ -1,137 +1,101 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 1
7
- - 7
8
- - 1
9
- version: 1.7.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.8.2
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Jaime Iniesta
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-07-30 00:00:00 +02:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2011-11-07 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: nokogiri
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &70348297877840 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - "="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 1
30
- - 5
31
- - 0
18
+ requirements:
19
+ - - =
20
+ - !ruby/object:Gem::Version
32
21
  version: 1.5.0
33
22
  type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: charguess
37
23
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *70348297877840
25
+ - !ruby/object:Gem::Dependency
26
+ name: charguess
27
+ requirement: &70348297900640 !ruby/object:Gem::Requirement
39
28
  none: false
40
- requirements:
41
- - - "="
42
- - !ruby/object:Gem::Version
43
- segments:
44
- - 1
45
- - 3
46
- - 20110226181011
29
+ requirements:
30
+ - - =
31
+ - !ruby/object:Gem::Version
47
32
  version: 1.3.20110226181011
48
33
  type: :runtime
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: rash
52
34
  prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *70348297900640
36
+ - !ruby/object:Gem::Dependency
37
+ name: rash
38
+ requirement: &70348297900180 !ruby/object:Gem::Requirement
54
39
  none: false
55
- requirements:
56
- - - ~>
57
- - !ruby/object:Gem::Version
58
- segments:
59
- - 0
60
- - 3
61
- - 0
40
+ requirements:
41
+ - - =
42
+ - !ruby/object:Gem::Version
62
43
  version: 0.3.0
63
44
  type: :runtime
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
66
- name: rspec
67
45
  prerelease: false
68
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *70348297900180
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: &70348297899720 !ruby/object:Gem::Requirement
69
50
  none: false
70
- requirements:
71
- - - ~>
72
- - !ruby/object:Gem::Version
73
- segments:
74
- - 2
75
- - 6
76
- - 0
51
+ requirements:
52
+ - - =
53
+ - !ruby/object:Gem::Version
77
54
  version: 2.6.0
78
55
  type: :development
79
- version_requirements: *id004
80
- - !ruby/object:Gem::Dependency
81
- name: fakeweb
82
56
  prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *70348297899720
58
+ - !ruby/object:Gem::Dependency
59
+ name: fakeweb
60
+ requirement: &70348297899260 !ruby/object:Gem::Requirement
84
61
  none: false
85
- requirements:
86
- - - ~>
87
- - !ruby/object:Gem::Version
88
- segments:
89
- - 1
90
- - 3
91
- - 0
62
+ requirements:
63
+ - - =
64
+ - !ruby/object:Gem::Version
92
65
  version: 1.3.0
93
66
  type: :development
94
- version_requirements: *id005
95
- - !ruby/object:Gem::Dependency
96
- name: awesome_print
97
67
  prerelease: false
98
- requirement: &id006 !ruby/object:Gem::Requirement
68
+ version_requirements: *70348297899260
69
+ - !ruby/object:Gem::Dependency
70
+ name: awesome_print
71
+ requirement: &70348297898800 !ruby/object:Gem::Requirement
99
72
  none: false
100
- requirements:
101
- - - ~>
102
- - !ruby/object:Gem::Version
103
- segments:
104
- - 0
105
- - 4
106
- - 0
73
+ requirements:
74
+ - - =
75
+ - !ruby/object:Gem::Version
107
76
  version: 0.4.0
108
77
  type: :development
109
- version_requirements: *id006
110
- - !ruby/object:Gem::Dependency
111
- name: rake
112
78
  prerelease: false
113
- requirement: &id007 !ruby/object:Gem::Requirement
79
+ version_requirements: *70348297898800
80
+ - !ruby/object:Gem::Dependency
81
+ name: rake
82
+ requirement: &70348297898340 !ruby/object:Gem::Requirement
114
83
  none: false
115
- requirements:
116
- - - "="
117
- - !ruby/object:Gem::Version
118
- segments:
119
- - 0
120
- - 9
121
- - 2
84
+ requirements:
85
+ - - =
86
+ - !ruby/object:Gem::Version
122
87
  version: 0.9.2
123
88
  type: :development
124
- version_requirements: *id007
125
- description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
126
- email:
89
+ prerelease: false
90
+ version_requirements: *70348297898340
91
+ description: MetaInspector lets you scrape a web page and get its title, charset,
92
+ link and meta tags
93
+ email:
127
94
  - jaimeiniesta@gmail.com
128
95
  executables: []
129
-
130
96
  extensions: []
131
-
132
97
  extra_rdoc_files: []
133
-
134
- files:
98
+ files:
135
99
  - .gitignore
136
100
  - .rspec.example
137
101
  - .travis.yml
@@ -147,49 +111,37 @@ files:
147
111
  - samples/basic_scraping.rb
148
112
  - samples/spider.rb
149
113
  - spec/fixtures/alazan.com.response
114
+ - spec/fixtures/guardian.co.uk.response
150
115
  - spec/fixtures/iteh.at.response
151
116
  - spec/fixtures/pagerankalert.com.response
152
117
  - spec/fixtures/tea-tron.com.response
118
+ - spec/fixtures/theonion-no-description.com.response
153
119
  - spec/fixtures/theonion.com.response
154
120
  - spec/metainspector_spec.rb
155
121
  - spec/spec_helper.rb
156
- has_rdoc: true
157
122
  homepage: https://github.com/jaimeiniesta/metainspector
158
123
  licenses: []
159
-
160
124
  post_install_message:
161
125
  rdoc_options: []
162
-
163
- require_paths:
126
+ require_paths:
164
127
  - lib
165
- required_ruby_version: !ruby/object:Gem::Requirement
128
+ required_ruby_version: !ruby/object:Gem::Requirement
166
129
  none: false
167
- requirements:
168
- - - ">="
169
- - !ruby/object:Gem::Version
170
- segments:
171
- - 0
172
- version: "0"
173
- required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
174
135
  none: false
175
- requirements:
176
- - - ">="
177
- - !ruby/object:Gem::Version
178
- segments:
179
- - 0
180
- version: "0"
136
+ requirements:
137
+ - - ! '>='
138
+ - !ruby/object:Gem::Version
139
+ version: '0'
181
140
  requirements: []
182
-
183
- rubyforge_project: MetaInspector
184
- rubygems_version: 1.3.7
141
+ rubyforge_project:
142
+ rubygems_version: 1.8.6
185
143
  signing_key:
186
144
  specification_version: 3
187
- summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL
188
- test_files:
189
- - spec/fixtures/alazan.com.response
190
- - spec/fixtures/iteh.at.response
191
- - spec/fixtures/pagerankalert.com.response
192
- - spec/fixtures/tea-tron.com.response
193
- - spec/fixtures/theonion.com.response
194
- - spec/metainspector_spec.rb
195
- - spec/spec_helper.rb
145
+ summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
146
+ with metadata from a given URL
147
+ test_files: []