link_oracle 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +35 -1
- data/lib/link_oracle/request.rb +14 -6
- data/lib/link_oracle/version.rb +1 -1
- data/lib/link_oracle.rb +2 -2
- data/link_preview.gemspec +2 -1
- data/spec/link_oracle/request_spec.rb +43 -43
- data/spec/link_preview_spec.rb +7 -6
- data/spec/spec_helper.rb +1 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 110e466f12d3efec41aec60490aca1c9377c04a0
|
4
|
+
data.tar.gz: f004c197875373848602feb923a0e663e081257f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fdc3d962d3d294a41ff82623a964eaf8d1e9ce0990c9ec88c0bd5178a1c17e7f1a11449f266de2d57635bad5094a914e973c87f719be006de77fe28cad6d45e
|
7
|
+
data.tar.gz: 8a3f2936855992f32ad6d2140164361e0763d7dc3d3182b1a8e8c801080acd1bd8d44c2dc1ef3df5d0feb9cbe94cd64b3a0db291cce020c8518dcdf14282cebb
|
data/README.md
CHANGED
@@ -18,7 +18,41 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
|
21
|
+
To parse a link and extract information:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
link_data = LinkOracle.extract_from('http://example.com')
|
25
|
+
```
|
26
|
+
|
27
|
+
This will return a LinkData object. This object makes a a semi-intelligent guess about which image, title, and description to use. To access these defaults:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
title = link_data.title
|
31
|
+
image_url = link_data.image_url
|
32
|
+
description = link_data.description
|
33
|
+
```
|
34
|
+
|
35
|
+
The LinkData object also contains the parsed data:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
parsed_data = link_data.parsed_data
|
39
|
+
```
|
40
|
+
|
41
|
+
Finally, the LinkData object contains the results from individual sections broken into OpenGraph, Meta, and Body. If you are only interested in OpenGraph results:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
found_og_data = link_data.og
|
45
|
+
title = found_og_data.title
|
46
|
+
image_url = found_og_data.image
|
47
|
+
description = found_og_data.description
|
48
|
+
```
|
49
|
+
|
50
|
+
For Meta or body:
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
found_meta_data = link_data.meta
|
54
|
+
found_body_data = link_data.body
|
55
|
+
```
|
22
56
|
|
23
57
|
## Contributing
|
24
58
|
|
data/lib/link_oracle/request.rb
CHANGED
@@ -26,7 +26,7 @@ class LinkOracle
|
|
26
26
|
def validate_url
|
27
27
|
!!URI.parse(url)
|
28
28
|
rescue URI::InvalidURIError
|
29
|
-
raise InvalidUrl
|
29
|
+
raise InvalidUrl, url
|
30
30
|
end
|
31
31
|
|
32
32
|
def response
|
@@ -34,19 +34,27 @@ class LinkOracle
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def request
|
37
|
-
::
|
37
|
+
c = ::Curl::Easy.new(url)
|
38
|
+
c.follow_location = true
|
39
|
+
begin
|
40
|
+
c.perform
|
41
|
+
rescue ::Curl::Err::SSLConnectError
|
42
|
+
c.ssl_version = 3
|
43
|
+
c.perform
|
44
|
+
end
|
45
|
+
c
|
38
46
|
end
|
39
47
|
|
40
48
|
def error_class
|
41
|
-
return if response.
|
49
|
+
return if response.response_code == 200
|
42
50
|
{
|
43
51
|
404 => PageNotFound,
|
44
52
|
403 => PermissionDenied
|
45
|
-
}[response.
|
53
|
+
}[response.response_code] || BadThingsHappened
|
46
54
|
end
|
47
55
|
|
48
56
|
def parsed_data
|
49
|
-
::Nokogiri::HTML.parse(response.
|
57
|
+
::Nokogiri::HTML.parse(response.body_str)
|
50
58
|
rescue
|
51
59
|
raise ParsingError
|
52
60
|
end
|
@@ -57,4 +65,4 @@ class LinkOracle
|
|
57
65
|
class BadThingsHappened < StandardError; end
|
58
66
|
class InvalidUrl < StandardError; end
|
59
67
|
class ParsingError < StandardError; end
|
60
|
-
end
|
68
|
+
end
|
data/lib/link_oracle/version.rb
CHANGED
data/lib/link_oracle.rb
CHANGED
data/link_preview.gemspec
CHANGED
@@ -20,9 +20,10 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.require_paths = ["lib"]
|
21
21
|
|
22
22
|
spec.add_dependency 'nokogiri'
|
23
|
-
spec.add_dependency '
|
23
|
+
spec.add_dependency 'curb'
|
24
24
|
|
25
25
|
spec.add_development_dependency "bundler", "~> 1.3"
|
26
26
|
spec.add_development_dependency "rake"
|
27
27
|
spec.add_development_dependency "rspec"
|
28
|
+
spec.add_development_dependency "webmock"
|
28
29
|
end
|
@@ -6,7 +6,7 @@ describe LinkOracle::Request do
|
|
6
6
|
let(:code) { 200 }
|
7
7
|
let(:response_hash) {
|
8
8
|
{
|
9
|
-
|
9
|
+
status: code,
|
10
10
|
body: body,
|
11
11
|
headers: {}
|
12
12
|
}
|
@@ -26,47 +26,59 @@ describe LinkOracle::Request do
|
|
26
26
|
</html>"
|
27
27
|
}
|
28
28
|
|
29
|
+
|
29
30
|
describe 'perform' do
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
response_hash
|
35
|
-
)
|
36
|
-
)
|
37
|
-
end
|
31
|
+
context 'request failures' do
|
32
|
+
before do
|
33
|
+
stub_request(:any, url).to_return(response_hash)
|
34
|
+
end
|
38
35
|
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
context 'invalid url' do
|
37
|
+
context 'response code is 404' do
|
38
|
+
let(:code) { 404 }
|
42
39
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
40
|
+
it 'should raise PageNotFound' do
|
41
|
+
expect {
|
42
|
+
requester.parsed_url
|
43
|
+
}.to raise_error(LinkOracle::PageNotFound)
|
44
|
+
end
|
47
45
|
end
|
48
|
-
end
|
49
46
|
|
50
|
-
|
51
|
-
|
47
|
+
context 'response code is 403' do
|
48
|
+
let(:code) { 403 }
|
52
49
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
50
|
+
it 'should raise PermissionDenied' do
|
51
|
+
expect {
|
52
|
+
requester.parsed_url
|
53
|
+
}.to raise_error(LinkOracle::PermissionDenied)
|
54
|
+
end
|
57
55
|
end
|
58
|
-
end
|
59
56
|
|
60
|
-
|
61
|
-
|
57
|
+
context 'response code is weird' do
|
58
|
+
let(:code) { 42 }
|
62
59
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
60
|
+
it 'should raise BadThingsHappened' do
|
61
|
+
expect {
|
62
|
+
requester.parsed_url
|
63
|
+
}.to raise_error(LinkOracle::BadThingsHappened)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
context 'parsing goes awry' do
|
68
|
+
before do
|
69
|
+
::Nokogiri::HTML.should_receive(:parse).and_raise(ArgumentError)
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'should raise ParsingError' do
|
73
|
+
expect {
|
74
|
+
requester.parsed_url
|
75
|
+
}.to raise_error(LinkOracle::ParsingError)
|
76
|
+
end
|
67
77
|
end
|
68
78
|
end
|
79
|
+
end
|
69
80
|
|
81
|
+
context 'malformed url' do
|
70
82
|
context 'url is invalid' do
|
71
83
|
let(:url) { nil }
|
72
84
|
|
@@ -76,18 +88,6 @@ describe LinkOracle::Request do
|
|
76
88
|
}.to raise_error(LinkOracle::InvalidUrl)
|
77
89
|
end
|
78
90
|
end
|
79
|
-
|
80
|
-
context 'parsing goes awry' do
|
81
|
-
before do
|
82
|
-
::Nokogiri::HTML.should_receive(:parse).and_raise(ArgumentError)
|
83
|
-
end
|
84
|
-
|
85
|
-
it 'should raise ParsingError' do
|
86
|
-
expect {
|
87
|
-
requester.parsed_url
|
88
|
-
}.to raise_error(LinkOracle::ParsingError)
|
89
|
-
end
|
90
|
-
end
|
91
91
|
end
|
92
92
|
end
|
93
|
-
end
|
93
|
+
end
|
data/spec/link_preview_spec.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe LinkOracle do
|
4
|
-
let(:link_data) { LinkOracle.extract_from(
|
4
|
+
let(:link_data) { LinkOracle.extract_from(url) }
|
5
|
+
let(:url) { 'http://someurl.com' }
|
5
6
|
let(:response) {
|
6
|
-
|
7
|
-
|
7
|
+
{
|
8
|
+
status: 200,
|
8
9
|
body: body,
|
9
10
|
headers: {}
|
10
|
-
|
11
|
+
}
|
11
12
|
}
|
12
13
|
|
13
14
|
let(:body) {
|
@@ -39,7 +40,7 @@ describe LinkOracle do
|
|
39
40
|
HTML
|
40
41
|
}
|
41
42
|
before do
|
42
|
-
|
43
|
+
stub_request(:any, url).to_return(response)
|
43
44
|
end
|
44
45
|
|
45
46
|
describe '.extract_from' do
|
@@ -59,4 +60,4 @@ describe LinkOracle do
|
|
59
60
|
link_data.description.should == 'A description for your face'
|
60
61
|
end
|
61
62
|
end
|
62
|
-
end
|
63
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -2,6 +2,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
2
2
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
3
|
|
4
4
|
require 'rspec'
|
5
|
+
require 'webmock/rspec'
|
5
6
|
# Requires supporting files with custom matchers and macros, etc,
|
6
7
|
# in ./support/ and its subdirectories.
|
7
8
|
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: link_oracle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ian Cooper
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2014-04-23 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: nokogiri
|
@@ -28,7 +28,7 @@ dependencies:
|
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
|
-
name:
|
31
|
+
name: curb
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
requirements:
|
34
34
|
- - '>='
|
@@ -83,6 +83,20 @@ dependencies:
|
|
83
83
|
- - '>='
|
84
84
|
- !ruby/object:Gem::Version
|
85
85
|
version: '0'
|
86
|
+
- !ruby/object:Gem::Dependency
|
87
|
+
name: webmock
|
88
|
+
requirement: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
type: :development
|
94
|
+
prerelease: false
|
95
|
+
version_requirements: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
86
100
|
description: Scrapes pages for open graph, meta, and lastly, body preview data
|
87
101
|
email:
|
88
102
|
- developers@socialchorus.com
|