metainspector 1.9.6 → 1.9.7

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,4 +1,8 @@
1
- MetaInspector is a gem for web scraping purposes. You give it an URL, and it lets you easily get its title, links, and meta tags.
1
+ MetaInspector is a gem for web scraping purposes. You give it an URL, and it lets you easily get its title, links, images, charset, description, keywords, meta tags...
2
+
3
+ = See it in action!
4
+
5
+ You can try MetaInspector live at this little demo: https://metainspectordemo.herokuapp.com
2
6
 
3
7
  = Installation
4
8
 
@@ -149,7 +149,7 @@ module MetaInspector
149
149
  def parsed_links
150
150
  @parsed_links ||= parsed_document.search("//a") \
151
151
  .map {|link| link.attributes["href"] \
152
- .to_s.strip}.uniq rescue nil
152
+ .to_s.strip}.uniq rescue []
153
153
  end
154
154
 
155
155
  def parsed_images
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.9.6"
4
+ VERSION = "1.9.7"
5
5
  end
@@ -0,0 +1,17 @@
1
+ HTTP/1.1 200 OK
2
+ Server: nginx/0.7.67
3
+ Date: Fri, 18 Nov 2011 21:46:46 GMT
4
+ Content-Type: text/html
5
+ Connection: keep-alive
6
+ Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
7
+ Content-Length: 4987
8
+ X-Varnish: 2000423390
9
+ Age: 0
10
+ Via: 1.1 varnish
11
+
12
+ <html>
13
+ <head>
14
+ </head>
15
+ <body>
16
+ </body>
17
+ </html>
@@ -16,6 +16,7 @@ describe MetaInspector do
16
16
  FakeWeb.register_uri(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc", :response => fixture_file("youtube.response"))
17
17
  FakeWeb.register_uri(:get, "http://w3clove.com/faqs", :response => fixture_file("w3clove_faqs.response"))
18
18
  FakeWeb.register_uri(:get, "https://twitter.com/w3clove", :response => fixture_file("twitter_w3clove.response"))
19
+ FakeWeb.register_uri(:get, "https://example.com/empty", :response => fixture_file("empty_page.response"))
19
20
 
20
21
  describe 'Initialization' do
21
22
  it 'should accept an URL with a scheme' do
@@ -164,6 +165,11 @@ describe MetaInspector do
164
165
  "http://twitter.com/W3CLove",
165
166
  "http://us4.campaign-archive1.com/home/?u=6af3ab69c286561d0f0f25671&id=04a0dab609" ]
166
167
  end
168
+
169
+ it "should return empty array if no links found" do
170
+ m = MetaInspector.new('http://example.com/empty')
171
+ m.links.should == []
172
+ end
167
173
  end
168
174
 
169
175
  describe 'Non-HTTP links' do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- hash: 63
4
+ hash: 61
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 9
9
- - 6
10
- version: 1.9.6
9
+ - 7
10
+ version: 1.9.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,12 +15,10 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-07-24 00:00:00 Z
18
+ date: 2012-08-12 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
- name: nokogiri
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
21
+ version_requirements: &id001 !ruby/object:Gem::Requirement
24
22
  none: false
25
23
  requirements:
26
24
  - - ~>
@@ -30,12 +28,12 @@ dependencies:
30
28
  - 1
31
29
  - 5
32
30
  version: "1.5"
31
+ prerelease: false
33
32
  type: :runtime
34
- version_requirements: *id001
33
+ name: nokogiri
34
+ requirement: *id001
35
35
  - !ruby/object:Gem::Dependency
36
- name: charguess
37
- prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
36
+ version_requirements: &id002 !ruby/object:Gem::Requirement
39
37
  none: false
40
38
  requirements:
41
39
  - - "="
@@ -46,12 +44,12 @@ dependencies:
46
44
  - 3
47
45
  - 20111021164500
48
46
  version: 1.3.20111021164500
47
+ prerelease: false
49
48
  type: :runtime
50
- version_requirements: *id002
49
+ name: charguess
50
+ requirement: *id002
51
51
  - !ruby/object:Gem::Dependency
52
- name: rash
53
- prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
52
+ version_requirements: &id003 !ruby/object:Gem::Requirement
55
53
  none: false
56
54
  requirements:
57
55
  - - "="
@@ -62,12 +60,12 @@ dependencies:
62
60
  - 3
63
61
  - 2
64
62
  version: 0.3.2
63
+ prerelease: false
65
64
  type: :runtime
66
- version_requirements: *id003
65
+ name: rash
66
+ requirement: *id003
67
67
  - !ruby/object:Gem::Dependency
68
- name: rspec
69
- prerelease: false
70
- requirement: &id004 !ruby/object:Gem::Requirement
68
+ version_requirements: &id004 !ruby/object:Gem::Requirement
71
69
  none: false
72
70
  requirements:
73
71
  - - "="
@@ -78,12 +76,12 @@ dependencies:
78
76
  - 11
79
77
  - 0
80
78
  version: 2.11.0
79
+ prerelease: false
81
80
  type: :development
82
- version_requirements: *id004
81
+ name: rspec
82
+ requirement: *id004
83
83
  - !ruby/object:Gem::Dependency
84
- name: fakeweb
85
- prerelease: false
86
- requirement: &id005 !ruby/object:Gem::Requirement
84
+ version_requirements: &id005 !ruby/object:Gem::Requirement
87
85
  none: false
88
86
  requirements:
89
87
  - - "="
@@ -94,12 +92,12 @@ dependencies:
94
92
  - 3
95
93
  - 0
96
94
  version: 1.3.0
95
+ prerelease: false
97
96
  type: :development
98
- version_requirements: *id005
97
+ name: fakeweb
98
+ requirement: *id005
99
99
  - !ruby/object:Gem::Dependency
100
- name: awesome_print
101
- prerelease: false
102
- requirement: &id006 !ruby/object:Gem::Requirement
100
+ version_requirements: &id006 !ruby/object:Gem::Requirement
103
101
  none: false
104
102
  requirements:
105
103
  - - "="
@@ -110,12 +108,12 @@ dependencies:
110
108
  - 0
111
109
  - 2
112
110
  version: 1.0.2
111
+ prerelease: false
113
112
  type: :development
114
- version_requirements: *id006
113
+ name: awesome_print
114
+ requirement: *id006
115
115
  - !ruby/object:Gem::Dependency
116
- name: rake
117
- prerelease: false
118
- requirement: &id007 !ruby/object:Gem::Requirement
116
+ version_requirements: &id007 !ruby/object:Gem::Requirement
119
117
  none: false
120
118
  requirements:
121
119
  - - "="
@@ -127,8 +125,10 @@ dependencies:
127
125
  - 2
128
126
  - 2
129
127
  version: 0.9.2.2
128
+ prerelease: false
130
129
  type: :development
131
- version_requirements: *id007
130
+ name: rake
131
+ requirement: *id007
132
132
  description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
133
133
  email:
134
134
  - jaimeiniesta@gmail.com
@@ -154,6 +154,7 @@ files:
154
154
  - samples/basic_scraping.rb
155
155
  - samples/spider.rb
156
156
  - spec/fixtures/alazan.com.response
157
+ - spec/fixtures/empty_page.response
157
158
  - spec/fixtures/guardian.co.uk.response
158
159
  - spec/fixtures/iteh.at.response
159
160
  - spec/fixtures/nonhttp.response
@@ -196,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
196
197
  requirements: []
197
198
 
198
199
  rubyforge_project:
199
- rubygems_version: 1.8.15
200
+ rubygems_version: 1.8.24
200
201
  signing_key:
201
202
  specification_version: 3
202
203
  summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL