metainspector 1.9.6 → 1.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,4 +1,8 @@
1
- MetaInspector is a gem for web scraping purposes. You give it an URL, and it lets you easily get its title, links, and meta tags.
1
+ MetaInspector is a gem for web scraping purposes. You give it an URL, and it lets you easily get its title, links, images, charset, description, keywords, meta tags...
2
+
3
+ = See it in action!
4
+
5
+ You can try MetaInspector live at this little demo: https://metainspectordemo.herokuapp.com
2
6
 
3
7
  = Installation
4
8
 
@@ -149,7 +149,7 @@ module MetaInspector
149
149
  def parsed_links
150
150
  @parsed_links ||= parsed_document.search("//a") \
151
151
  .map {|link| link.attributes["href"] \
152
- .to_s.strip}.uniq rescue nil
152
+ .to_s.strip}.uniq rescue []
153
153
  end
154
154
 
155
155
  def parsed_images
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.9.6"
4
+ VERSION = "1.9.7"
5
5
  end
@@ -0,0 +1,17 @@
1
+ HTTP/1.1 200 OK
2
+ Server: nginx/0.7.67
3
+ Date: Fri, 18 Nov 2011 21:46:46 GMT
4
+ Content-Type: text/html
5
+ Connection: keep-alive
6
+ Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
7
+ Content-Length: 4987
8
+ X-Varnish: 2000423390
9
+ Age: 0
10
+ Via: 1.1 varnish
11
+
12
+ <html>
13
+ <head>
14
+ </head>
15
+ <body>
16
+ </body>
17
+ </html>
@@ -16,6 +16,7 @@ describe MetaInspector do
16
16
  FakeWeb.register_uri(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc", :response => fixture_file("youtube.response"))
17
17
  FakeWeb.register_uri(:get, "http://w3clove.com/faqs", :response => fixture_file("w3clove_faqs.response"))
18
18
  FakeWeb.register_uri(:get, "https://twitter.com/w3clove", :response => fixture_file("twitter_w3clove.response"))
19
+ FakeWeb.register_uri(:get, "https://example.com/empty", :response => fixture_file("empty_page.response"))
19
20
 
20
21
  describe 'Initialization' do
21
22
  it 'should accept an URL with a scheme' do
@@ -164,6 +165,11 @@ describe MetaInspector do
164
165
  "http://twitter.com/W3CLove",
165
166
  "http://us4.campaign-archive1.com/home/?u=6af3ab69c286561d0f0f25671&id=04a0dab609" ]
166
167
  end
168
+
169
+ it "should return empty array if no links found" do
170
+ m = MetaInspector.new('http://example.com/empty')
171
+ m.links.should == []
172
+ end
167
173
  end
168
174
 
169
175
  describe 'Non-HTTP links' do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- hash: 63
4
+ hash: 61
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 9
9
- - 6
10
- version: 1.9.6
9
+ - 7
10
+ version: 1.9.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,12 +15,10 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-07-24 00:00:00 Z
18
+ date: 2012-08-12 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
- name: nokogiri
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
21
+ version_requirements: &id001 !ruby/object:Gem::Requirement
24
22
  none: false
25
23
  requirements:
26
24
  - - ~>
@@ -30,12 +28,12 @@ dependencies:
30
28
  - 1
31
29
  - 5
32
30
  version: "1.5"
31
+ prerelease: false
33
32
  type: :runtime
34
- version_requirements: *id001
33
+ name: nokogiri
34
+ requirement: *id001
35
35
  - !ruby/object:Gem::Dependency
36
- name: charguess
37
- prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
36
+ version_requirements: &id002 !ruby/object:Gem::Requirement
39
37
  none: false
40
38
  requirements:
41
39
  - - "="
@@ -46,12 +44,12 @@ dependencies:
46
44
  - 3
47
45
  - 20111021164500
48
46
  version: 1.3.20111021164500
47
+ prerelease: false
49
48
  type: :runtime
50
- version_requirements: *id002
49
+ name: charguess
50
+ requirement: *id002
51
51
  - !ruby/object:Gem::Dependency
52
- name: rash
53
- prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
52
+ version_requirements: &id003 !ruby/object:Gem::Requirement
55
53
  none: false
56
54
  requirements:
57
55
  - - "="
@@ -62,12 +60,12 @@ dependencies:
62
60
  - 3
63
61
  - 2
64
62
  version: 0.3.2
63
+ prerelease: false
65
64
  type: :runtime
66
- version_requirements: *id003
65
+ name: rash
66
+ requirement: *id003
67
67
  - !ruby/object:Gem::Dependency
68
- name: rspec
69
- prerelease: false
70
- requirement: &id004 !ruby/object:Gem::Requirement
68
+ version_requirements: &id004 !ruby/object:Gem::Requirement
71
69
  none: false
72
70
  requirements:
73
71
  - - "="
@@ -78,12 +76,12 @@ dependencies:
78
76
  - 11
79
77
  - 0
80
78
  version: 2.11.0
79
+ prerelease: false
81
80
  type: :development
82
- version_requirements: *id004
81
+ name: rspec
82
+ requirement: *id004
83
83
  - !ruby/object:Gem::Dependency
84
- name: fakeweb
85
- prerelease: false
86
- requirement: &id005 !ruby/object:Gem::Requirement
84
+ version_requirements: &id005 !ruby/object:Gem::Requirement
87
85
  none: false
88
86
  requirements:
89
87
  - - "="
@@ -94,12 +92,12 @@ dependencies:
94
92
  - 3
95
93
  - 0
96
94
  version: 1.3.0
95
+ prerelease: false
97
96
  type: :development
98
- version_requirements: *id005
97
+ name: fakeweb
98
+ requirement: *id005
99
99
  - !ruby/object:Gem::Dependency
100
- name: awesome_print
101
- prerelease: false
102
- requirement: &id006 !ruby/object:Gem::Requirement
100
+ version_requirements: &id006 !ruby/object:Gem::Requirement
103
101
  none: false
104
102
  requirements:
105
103
  - - "="
@@ -110,12 +108,12 @@ dependencies:
110
108
  - 0
111
109
  - 2
112
110
  version: 1.0.2
111
+ prerelease: false
113
112
  type: :development
114
- version_requirements: *id006
113
+ name: awesome_print
114
+ requirement: *id006
115
115
  - !ruby/object:Gem::Dependency
116
- name: rake
117
- prerelease: false
118
- requirement: &id007 !ruby/object:Gem::Requirement
116
+ version_requirements: &id007 !ruby/object:Gem::Requirement
119
117
  none: false
120
118
  requirements:
121
119
  - - "="
@@ -127,8 +125,10 @@ dependencies:
127
125
  - 2
128
126
  - 2
129
127
  version: 0.9.2.2
128
+ prerelease: false
130
129
  type: :development
131
- version_requirements: *id007
130
+ name: rake
131
+ requirement: *id007
132
132
  description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
133
133
  email:
134
134
  - jaimeiniesta@gmail.com
@@ -154,6 +154,7 @@ files:
154
154
  - samples/basic_scraping.rb
155
155
  - samples/spider.rb
156
156
  - spec/fixtures/alazan.com.response
157
+ - spec/fixtures/empty_page.response
157
158
  - spec/fixtures/guardian.co.uk.response
158
159
  - spec/fixtures/iteh.at.response
159
160
  - spec/fixtures/nonhttp.response
@@ -196,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
196
197
  requirements: []
197
198
 
198
199
  rubyforge_project:
199
- rubygems_version: 1.8.15
200
+ rubygems_version: 1.8.24
200
201
  signing_key:
201
202
  specification_version: 3
202
203
  summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL