link_oracle 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +7 -0
- data/lib/link_oracle/extractor/base.rb +29 -0
- data/lib/link_oracle/extractor/body.rb +37 -0
- data/lib/link_oracle/extractor/meta.rb +26 -0
- data/lib/link_oracle/extractor/og.rb +21 -0
- data/lib/link_oracle/link_data/data.rb +24 -0
- data/lib/link_oracle/link_data.rb +34 -0
- data/lib/link_oracle/request.rb +53 -0
- data/lib/link_oracle/version.rb +3 -0
- data/lib/link_oracle.rb +11 -0
- data/link_preview.gemspec +28 -0
- data/spec/link_oracle/extractor/body_spec.rb +78 -0
- data/spec/link_oracle/extractor/meta_spec.rb +49 -0
- data/spec/link_oracle/extractor/og_spec.rb +52 -0
- data/spec/link_oracle/request_spec.rb +93 -0
- data/spec/link_preview_spec.rb +63 -0
- data/spec/spec_helper.rb +13 -0
- metadata +143 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OWViOGJkNzViMDA0NWFjYTE1YmE5MmY1ZjIwYTdhMWEyMTdmMWY4Yw==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ODFiZjNiYTNmNzQ5NDM3MzE4ZmYyNGE1NDBkZTBmNWFlMTE4NDJlNQ==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
OWY2M2E0MjZhN2IxNjEwNzY5YmQ1ZTBhODFjM2QyNzBjNTJkMjEzODhhN2Q2
|
10
|
+
ZWUyNGY5NTIxOGMzYTllNDdlYmI2Y2NhOTdkZjY3Njk3MjIzNWM5MzMyMzky
|
11
|
+
NzczM2Y5ZmQ5OTU3MGJiOGJiYmFjMzA0ZGU3MGY4NGY4MTkxNTc=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZDk2NmM2MDRkYjYxMmE2OGQ5ZjU0NWVkYzdhMGRjMTk2ZDBlNGE1ZWM4Mjhl
|
14
|
+
MTBjMDY2MjE5ODk4ZGRmNTRmNmM2YjczNTRkNzlkOWFjNzM2NjJmMjdiZTAw
|
15
|
+
NTU5ODdlYThmMDczYzkwNWEwOWQ2NDhiN2I3YjIxYThmYWE1MjE=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Ian Cooper
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# LinkOracle
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'link_oracle'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install link_oracle
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
class LinkOracle
|
2
|
+
module Extractor
|
3
|
+
class Base
|
4
|
+
attr_reader :parsed_body, :link_data
|
5
|
+
|
6
|
+
def initialize(parsed_body)
|
7
|
+
@parsed_body = parsed_body
|
8
|
+
@link_data = LinkData::Data.new
|
9
|
+
end
|
10
|
+
|
11
|
+
def type
|
12
|
+
raise "implement me"
|
13
|
+
end
|
14
|
+
|
15
|
+
def perform
|
16
|
+
link_data.assign({
|
17
|
+
titles: title,
|
18
|
+
image_urls: image,
|
19
|
+
descriptions: description
|
20
|
+
})
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_content(selector)
|
24
|
+
found = parsed_body.xpath(selector).first
|
25
|
+
found ? [found[:content]] : []
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class LinkOracle
|
2
|
+
module Extractor
|
3
|
+
class Body
|
4
|
+
attr_reader :parsed_body, :link_data
|
5
|
+
|
6
|
+
def initialize(parsed_body)
|
7
|
+
@parsed_body = parsed_body
|
8
|
+
@link_data = LinkData::Data.new
|
9
|
+
end
|
10
|
+
|
11
|
+
def perform
|
12
|
+
link_data.assign({
|
13
|
+
titles: titles,
|
14
|
+
image_urls: images,
|
15
|
+
descriptions: descriptions
|
16
|
+
})
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
def titles
|
21
|
+
@titles ||= parsed_body.xpath(
|
22
|
+
"//h1/text() | //h2/text() | //h3/text()"
|
23
|
+
).first(3).compact.map{ |text| text.content }
|
24
|
+
end
|
25
|
+
|
26
|
+
def images
|
27
|
+
@images ||= parsed_body.xpath(
|
28
|
+
"//img[@src[contains(.,'://') and not(contains(.,'ads.') or contains(.,'ad.') or contains(.,'?'))]]"
|
29
|
+
).first(3).compact.map{ |node| node['src'] }
|
30
|
+
end
|
31
|
+
|
32
|
+
def descriptions
|
33
|
+
@description ||= parsed_body.xpath("//p/text()").first(3).compact.map{ |text| text.content }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class LinkOracle
|
2
|
+
module Extractor
|
3
|
+
class Meta < Base
|
4
|
+
def type
|
5
|
+
:meta
|
6
|
+
end
|
7
|
+
|
8
|
+
def title
|
9
|
+
found = parsed_body.at_xpath("/html/head/title/text()")
|
10
|
+
found ? [found.content] : []
|
11
|
+
end
|
12
|
+
|
13
|
+
def image
|
14
|
+
get_content("/html/head/meta[contains(@name, 'thumbnail')]")
|
15
|
+
end
|
16
|
+
|
17
|
+
def description
|
18
|
+
get_content("/html/head/meta[translate(
|
19
|
+
@name,
|
20
|
+
'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
|
21
|
+
'abcdefghijklmnopqrstuvwxyz'
|
22
|
+
) = 'description']")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class LinkOracle
|
2
|
+
module Extractor
|
3
|
+
class OG < Base
|
4
|
+
def type
|
5
|
+
:og
|
6
|
+
end
|
7
|
+
|
8
|
+
def title
|
9
|
+
get_content("/html/head/meta[@property='og:title']")
|
10
|
+
end
|
11
|
+
|
12
|
+
def image
|
13
|
+
get_content("/html/head/meta[@property='og:image']")
|
14
|
+
end
|
15
|
+
|
16
|
+
def description
|
17
|
+
get_content("/html/head/meta[@property='og:description']")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class LinkOracle
|
2
|
+
class LinkData
|
3
|
+
class Data
|
4
|
+
attr_accessor :titles, :image_urls, :descriptions
|
5
|
+
|
6
|
+
def assign(hash)
|
7
|
+
hash.each {|key, value| self.send("#{key}=", value) }
|
8
|
+
self
|
9
|
+
end
|
10
|
+
|
11
|
+
def image_url
|
12
|
+
image_urls.first
|
13
|
+
end
|
14
|
+
|
15
|
+
def title
|
16
|
+
titles.first
|
17
|
+
end
|
18
|
+
|
19
|
+
def description
|
20
|
+
descriptions.first
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class LinkOracle
|
2
|
+
class LinkData
|
3
|
+
attr_reader :parsed_data
|
4
|
+
|
5
|
+
def initialize(parsed_data)
|
6
|
+
@parsed_data = parsed_data
|
7
|
+
end
|
8
|
+
|
9
|
+
#TODO: Need to write tests for these
|
10
|
+
def title
|
11
|
+
og.title || meta.title || body.title
|
12
|
+
end
|
13
|
+
|
14
|
+
def description
|
15
|
+
og.description || meta.description|| body.description
|
16
|
+
end
|
17
|
+
|
18
|
+
def image_url
|
19
|
+
og.image_url || meta.image_url || body.image_url
|
20
|
+
end
|
21
|
+
|
22
|
+
def og
|
23
|
+
@og ||= Extractor::OG.new(parsed_data).perform
|
24
|
+
end
|
25
|
+
|
26
|
+
def meta
|
27
|
+
@meta ||= Extractor::Meta.new(parsed_data).perform
|
28
|
+
end
|
29
|
+
|
30
|
+
def body
|
31
|
+
@body ||= Extractor::Body.new(parsed_data).perform
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class LinkOracle
|
2
|
+
class Request
|
3
|
+
attr_reader :url
|
4
|
+
|
5
|
+
def initialize(url)
|
6
|
+
@url = url
|
7
|
+
end
|
8
|
+
|
9
|
+
def parsed_data
|
10
|
+
validate_url
|
11
|
+
validate_request
|
12
|
+
parsed_body
|
13
|
+
end
|
14
|
+
|
15
|
+
def validate_request
|
16
|
+
raise error_class if error_class
|
17
|
+
end
|
18
|
+
|
19
|
+
def validate_url
|
20
|
+
!!URI.parse(url)
|
21
|
+
rescue URI::InvalidURIError
|
22
|
+
raise InvalidUrl
|
23
|
+
end
|
24
|
+
|
25
|
+
def response
|
26
|
+
@response ||= request
|
27
|
+
end
|
28
|
+
|
29
|
+
def request
|
30
|
+
::RestClient.get url
|
31
|
+
end
|
32
|
+
|
33
|
+
def error_class
|
34
|
+
return if response.code == 200
|
35
|
+
{
|
36
|
+
404 => PageNotFound,
|
37
|
+
403 => PermissionDenied
|
38
|
+
}[response.code] || BadThingsHappened
|
39
|
+
end
|
40
|
+
|
41
|
+
def parsed_body
|
42
|
+
::Nokogiri::HTML.parse(response.body)
|
43
|
+
rescue
|
44
|
+
raise ParsingError
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class PageNotFound < StandardError; end
|
49
|
+
class PermissionDenied < StandardError; end
|
50
|
+
class BadThingsHappened < StandardError; end
|
51
|
+
class InvalidUrl < StandardError; end
|
52
|
+
class ParsingError < StandardError; end
|
53
|
+
end
|
data/lib/link_oracle.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'link_oracle/extractor/base'
|
5
|
+
require 'link_oracle/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = "link_oracle"
|
9
|
+
spec.version = LinkOracle::VERSION
|
10
|
+
spec.authors = ["Ian Cooper", 'Fito von Zastrow', 'Kane Baccigalupi']
|
11
|
+
spec.email = ["developers@socialchorus.com"]
|
12
|
+
spec.description = %q{Scrapes pages for open graph, meta, and lastly, body preview data}
|
13
|
+
spec.summary = %q{Scrapes pages for open graph, meta, and lastly, body preview data}
|
14
|
+
spec.homepage = "http://github.com/socialchorus/link_oracle"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files`.split($/)
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency 'nokogiri'
|
23
|
+
spec.add_dependency 'rest-client'
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
26
|
+
spec.add_development_dependency "rake"
|
27
|
+
spec.add_development_dependency "rspec"
|
28
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LinkOracle::Extractor::Body do
|
4
|
+
let(:parsed_body) { ::Nokogiri::HTML.parse(body) }
|
5
|
+
let(:link_data) { LinkOracle::Extractor::Body.new(parsed_body).perform }
|
6
|
+
|
7
|
+
let(:body) {
|
8
|
+
<<-HTML
|
9
|
+
<html>
|
10
|
+
<head>
|
11
|
+
<meta property=\"og:title\" content=\"This is a title\">
|
12
|
+
<meta property=\"og:description\" content=\"A description for your face\">
|
13
|
+
<meta property=\"og:image\" content=\"http://imageurl.com\">
|
14
|
+
<meta name=\"Description\" content=\"Here is a description not for facebook\">
|
15
|
+
<meta name=\"KEYWORDS\" content=\"Keywords, Keywords everywhere\">
|
16
|
+
<title>TITLE!</title>
|
17
|
+
<meta itemprop='thumbnailUrl' name='thumbnail' content='http://imageurlfrommeta.com'>
|
18
|
+
</head>
|
19
|
+
<body>
|
20
|
+
<img src='http://ads.berkin.com'>
|
21
|
+
<img src='http://berkin.com'>
|
22
|
+
<img src='/berkin/cherbin.jpg'>
|
23
|
+
<img src='http://cherbin.com'>
|
24
|
+
<img src='http://flerbin.com'>
|
25
|
+
<h1>Berkin</h1>
|
26
|
+
<h2>Derbin</h2>
|
27
|
+
<h3>Cherbin</h3>
|
28
|
+
<p>paragraph 1</p>
|
29
|
+
<p>paragraph 2</p>
|
30
|
+
<p>paragraph 3</p>
|
31
|
+
</body>
|
32
|
+
</html>
|
33
|
+
HTML
|
34
|
+
}
|
35
|
+
|
36
|
+
describe 'perform' do
|
37
|
+
context 'there is no suitable stuff in the body' do
|
38
|
+
let(:body) {
|
39
|
+
"<html>
|
40
|
+
<head>
|
41
|
+
</head>
|
42
|
+
<body>
|
43
|
+
</body>
|
44
|
+
</html>"
|
45
|
+
}
|
46
|
+
|
47
|
+
it 'should fail quietly' do
|
48
|
+
expect { link_data }.to_not raise_error
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'there are body attributes' do
|
53
|
+
it 'should populate link_data titles' do
|
54
|
+
link_data.titles.should == [
|
55
|
+
'Berkin',
|
56
|
+
'Derbin',
|
57
|
+
'Cherbin'
|
58
|
+
]
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'should populate link_data image_urls' do
|
62
|
+
link_data.image_urls.should == [
|
63
|
+
"http://berkin.com",
|
64
|
+
"http://cherbin.com",
|
65
|
+
"http://flerbin.com"
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'should populate link_data descriptions' do
|
70
|
+
link_data.descriptions.should == [
|
71
|
+
"paragraph 1",
|
72
|
+
"paragraph 2",
|
73
|
+
"paragraph 3"
|
74
|
+
]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LinkOracle::Extractor::Meta do
|
4
|
+
let(:parsed_body) { ::Nokogiri::HTML.parse(body) }
|
5
|
+
let(:link_data) { LinkOracle::Extractor::Meta.new(parsed_body).perform }
|
6
|
+
|
7
|
+
let(:body) {
|
8
|
+
"<html>
|
9
|
+
<head>
|
10
|
+
<meta property=\"og:title\" content=\"This is a title\">
|
11
|
+
<meta property=\"og:description\" content=\"A description for your face\">
|
12
|
+
<meta property=\"og:image\" content=\"http://imageurl.com\">
|
13
|
+
<meta name=\"Description\" content=\"Here is a description not for facebook\">
|
14
|
+
<meta name=\"KEYWORDS\" content=\"Keywords, Keywords everywhere\">
|
15
|
+
<title>TITLE!</title>
|
16
|
+
<meta itemprop='thumbnailUrl' name='thumbnail' content='http://imageurlfrommeta.com'>
|
17
|
+
</head>
|
18
|
+
</html>"
|
19
|
+
}
|
20
|
+
|
21
|
+
describe 'perform' do
|
22
|
+
context 'there is no suitable meta data' do
|
23
|
+
let(:body) {
|
24
|
+
"<html>
|
25
|
+
<head>
|
26
|
+
</head>
|
27
|
+
</html>"
|
28
|
+
}
|
29
|
+
|
30
|
+
it 'should fail quietly' do
|
31
|
+
expect { link_data }.to_not raise_error
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context 'there is meta data' do
|
36
|
+
it 'should populate link_data title' do
|
37
|
+
link_data.title.should == 'TITLE!'
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should populate link_data image_url' do
|
41
|
+
link_data.image_url.should == "http://imageurlfrommeta.com"
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'should populate link_data description' do
|
45
|
+
link_data.description.should == 'Here is a description not for facebook'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LinkOracle::Extractor::OG do
|
4
|
+
let(:parsed_body) { ::Nokogiri::HTML.parse(body) }
|
5
|
+
let(:link_data) { LinkOracle::Extractor::OG.new(parsed_body).perform }
|
6
|
+
|
7
|
+
let(:body) {
|
8
|
+
"<html>
|
9
|
+
<head>
|
10
|
+
<meta property=\"og:title\" content=\"This is a title\">
|
11
|
+
<meta property=\"og:description\" content=\"A description for your face\">
|
12
|
+
<meta property=\"og:image\" content=\"http://imageurl.com\">
|
13
|
+
<meta name=\"Description\" content=\" \tHere is a description not for facebook\t\">
|
14
|
+
<meta name=\"KEYWORDS\" content=\" \tKeywords, Keywords everywhere \t\">
|
15
|
+
<title>TITLE!</title>
|
16
|
+
</head>
|
17
|
+
</html>"
|
18
|
+
}
|
19
|
+
|
20
|
+
describe 'perform' do
|
21
|
+
context 'there is no og_data' do
|
22
|
+
let(:body) {
|
23
|
+
"<html>
|
24
|
+
<head>
|
25
|
+
<meta name=\"Description\" content=\" \tHere is a description not for facebook\t\">
|
26
|
+
<meta name=\"KEYWORDS\" content=\" \tKeywords, Keywords everywhere \t\">
|
27
|
+
<title>TITLE!</title>
|
28
|
+
</head>
|
29
|
+
</html>"
|
30
|
+
}
|
31
|
+
|
32
|
+
it 'should fail quietly' do
|
33
|
+
expect { link_data }.to_not raise_error
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context 'there is og_data' do
|
38
|
+
|
39
|
+
it 'should populate link_data title' do
|
40
|
+
link_data.title.should == 'This is a title'
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should populate link_data image_url' do
|
44
|
+
link_data.image_url.should == "http://imageurl.com"
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should populate link_data description' do
|
48
|
+
link_data.description.should == 'A description for your face'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LinkOracle::Request do
|
4
|
+
let(:requester) { LinkOracle::Request.new(url) }
|
5
|
+
let(:url) { 'http://someurl.com' }
|
6
|
+
let(:code) { 200 }
|
7
|
+
let(:response_hash) {
|
8
|
+
{
|
9
|
+
code: code,
|
10
|
+
body: body,
|
11
|
+
headers: {}
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
let(:body) {
|
16
|
+
"<html>
|
17
|
+
<head>
|
18
|
+
<meta property=\"og:title\" content=\"This is a title\">
|
19
|
+
<meta property=\"og:description\" content=\"A description for your face\">
|
20
|
+
<meta property=\"og:image\" content=\"http://imageurl.com\">
|
21
|
+
<meta name=\"Description\" content=\"Here is a description not for facebook\">
|
22
|
+
<meta name=\"KEYWORDS\" content=\"Keywords, Keywords everywhere\">
|
23
|
+
<title>TITLE!</title>
|
24
|
+
<meta itemprop='thumbnailUrl' name='thumbnail' content='http://imageurlfrommeta.com'>
|
25
|
+
</head>
|
26
|
+
</html>"
|
27
|
+
}
|
28
|
+
|
29
|
+
describe 'perform' do
|
30
|
+
before do
|
31
|
+
RestClient.stub(:get).and_return(
|
32
|
+
double(
|
33
|
+
'response',
|
34
|
+
response_hash
|
35
|
+
)
|
36
|
+
)
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'invalid url' do
|
40
|
+
context 'response code is 404' do
|
41
|
+
let(:code) { 404 }
|
42
|
+
|
43
|
+
it 'should raise PageNotFound' do
|
44
|
+
expect {
|
45
|
+
requester.parsed_data
|
46
|
+
}.to raise_error(LinkOracle::PageNotFound)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context 'response code is 403' do
|
51
|
+
let(:code) { 403 }
|
52
|
+
|
53
|
+
it 'should raise PermissionDenied' do
|
54
|
+
expect {
|
55
|
+
requester.parsed_data
|
56
|
+
}.to raise_error(LinkOracle::PermissionDenied)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
context 'response code is weird' do
|
61
|
+
let(:code) { 42 }
|
62
|
+
|
63
|
+
it 'should raise BadThingsHappened' do
|
64
|
+
expect {
|
65
|
+
requester.parsed_data
|
66
|
+
}.to raise_error(LinkOracle::BadThingsHappened)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
context 'url is invalid' do
|
71
|
+
let(:url) { nil }
|
72
|
+
|
73
|
+
it 'should raise InvalidUrl' do
|
74
|
+
expect {
|
75
|
+
requester.parsed_data
|
76
|
+
}.to raise_error(LinkOracle::InvalidUrl)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
context 'parsing goes awry' do
|
81
|
+
before do
|
82
|
+
::Nokogiri::HTML.should_receive(:parse).and_raise(ArgumentError)
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should raise ParsingError' do
|
86
|
+
expect {
|
87
|
+
requester.parsed_data
|
88
|
+
}.to raise_error(LinkOracle::ParsingError)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LinkOracle do
|
4
|
+
let(:link_data) { LinkOracle.extract_from('http://someurl.com') }
|
5
|
+
let(:response) {
|
6
|
+
double(
|
7
|
+
code: 200,
|
8
|
+
body: body,
|
9
|
+
headers: {}
|
10
|
+
)
|
11
|
+
}
|
12
|
+
|
13
|
+
let(:body) {
|
14
|
+
<<-HTML
|
15
|
+
<html>
|
16
|
+
<head>
|
17
|
+
<meta property="og:title" content="This is a title">
|
18
|
+
<meta property="og:description" content="A description for your face">
|
19
|
+
<meta property="og:image" content="http://imageurl.com">
|
20
|
+
<meta name="Description" content="Here is a description not for facebook">
|
21
|
+
<meta name="KEYWORDS" content="Keywords, Keywords everywhere">
|
22
|
+
<title>TITLE!</title>
|
23
|
+
<meta itemprop='thumbnailUrl' name='thumbnail' content='http://imageurlfrommeta.com'>
|
24
|
+
</head>
|
25
|
+
<body>
|
26
|
+
<img src='http://ads.berkin.com'>
|
27
|
+
<img src='http://berkin.com'>
|
28
|
+
<img src='/berkin/cherbin.jpg'>
|
29
|
+
<img src='http://cherbin.com'>
|
30
|
+
<img src='http://flerbin.com'>
|
31
|
+
<h1>Berkin</h1>
|
32
|
+
<h2>Derbin</h2>
|
33
|
+
<h3>Cherbin</h3>
|
34
|
+
<p>paragraph 1</p>
|
35
|
+
<p>paragraph 2</p>
|
36
|
+
<p>paragraph 3</p>
|
37
|
+
</body>
|
38
|
+
</html>
|
39
|
+
HTML
|
40
|
+
}
|
41
|
+
before do
|
42
|
+
RestClient.stub(:get).and_return(response)
|
43
|
+
end
|
44
|
+
|
45
|
+
describe '.extract_from' do
|
46
|
+
it "returns a link data object" do
|
47
|
+
link_data.should be_a(LinkOracle::LinkData)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "defaults to the og title" do
|
51
|
+
link_data.title.should == "This is a title"
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'defaults to the og image' do
|
55
|
+
link_data.image_url.should == 'http://imageurl.com'
|
56
|
+
end
|
57
|
+
|
58
|
+
it "defaults to the og description" do
|
59
|
+
link_data.description.should == 'A description for your face'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
|
4
|
+
require 'rspec'
|
5
|
+
# Requires supporting files with custom matchers and macros, etc,
|
6
|
+
# in ./support/ and its subdirectories.
|
7
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
8
|
+
|
9
|
+
Dir["#{File.dirname(__FILE__)}/../lib/**/*.rb"].each {|f| require f}
|
10
|
+
|
11
|
+
|
12
|
+
RSpec.configure do |config|
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: link_oracle
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ian Cooper
|
8
|
+
- Fito von Zastrow
|
9
|
+
- Kane Baccigalupi
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2013-08-26 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
requirements:
|
26
|
+
- - ! '>='
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
version: '0'
|
29
|
+
- !ruby/object:Gem::Dependency
|
30
|
+
name: rest-client
|
31
|
+
requirement: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
type: :runtime
|
37
|
+
prerelease: false
|
38
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ! '>='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: bundler
|
45
|
+
requirement: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ~>
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '1.3'
|
50
|
+
type: :development
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ~>
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '1.3'
|
57
|
+
- !ruby/object:Gem::Dependency
|
58
|
+
name: rake
|
59
|
+
requirement: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
type: :development
|
65
|
+
prerelease: false
|
66
|
+
version_requirements: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
- !ruby/object:Gem::Dependency
|
72
|
+
name: rspec
|
73
|
+
requirement: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
type: :development
|
79
|
+
prerelease: false
|
80
|
+
version_requirements: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ! '>='
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
description: Scrapes pages for open graph, meta, and lastly, body preview data
|
86
|
+
email:
|
87
|
+
- developers@socialchorus.com
|
88
|
+
executables: []
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files: []
|
91
|
+
files:
|
92
|
+
- .gitignore
|
93
|
+
- Gemfile
|
94
|
+
- LICENSE.txt
|
95
|
+
- README.md
|
96
|
+
- Rakefile
|
97
|
+
- lib/link_oracle.rb
|
98
|
+
- lib/link_oracle/extractor/base.rb
|
99
|
+
- lib/link_oracle/extractor/body.rb
|
100
|
+
- lib/link_oracle/extractor/meta.rb
|
101
|
+
- lib/link_oracle/extractor/og.rb
|
102
|
+
- lib/link_oracle/link_data.rb
|
103
|
+
- lib/link_oracle/link_data/data.rb
|
104
|
+
- lib/link_oracle/request.rb
|
105
|
+
- lib/link_oracle/version.rb
|
106
|
+
- link_preview.gemspec
|
107
|
+
- spec/link_oracle/extractor/body_spec.rb
|
108
|
+
- spec/link_oracle/extractor/meta_spec.rb
|
109
|
+
- spec/link_oracle/extractor/og_spec.rb
|
110
|
+
- spec/link_oracle/request_spec.rb
|
111
|
+
- spec/link_preview_spec.rb
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
homepage: http://github.com/socialchorus/link_oracle
|
114
|
+
licenses:
|
115
|
+
- MIT
|
116
|
+
metadata: {}
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options: []
|
119
|
+
require_paths:
|
120
|
+
- lib
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ! '>='
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
requirements: []
|
132
|
+
rubyforge_project:
|
133
|
+
rubygems_version: 2.0.7
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: Scrapes pages for open graph, meta, and lastly, body preview data
|
137
|
+
test_files:
|
138
|
+
- spec/link_oracle/extractor/body_spec.rb
|
139
|
+
- spec/link_oracle/extractor/meta_spec.rb
|
140
|
+
- spec/link_oracle/extractor/og_spec.rb
|
141
|
+
- spec/link_oracle/request_spec.rb
|
142
|
+
- spec/link_preview_spec.rb
|
143
|
+
- spec/spec_helper.rb
|