linkser 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/linkser/object.rb +12 -0
- data/lib/linkser/objects/html.rb +38 -61
- data/lib/linkser/parser.rb +13 -25
- data/lib/linkser/version.rb +1 -1
- data/lib/linkser.rb +7 -4
- data/linkser.gemspec +1 -0
- metadata +27 -9
data/lib/linkser/object.rb
CHANGED
@@ -1,10 +1,22 @@
|
|
1
1
|
module Linkser
|
2
2
|
class Object
|
3
|
+
extend ActiveSupport::Memoizable
|
4
|
+
|
3
5
|
attr_reader :url, :head
|
4
6
|
|
5
7
|
def initialize url, head, options={}
|
6
8
|
@url = url
|
7
9
|
@heade = head
|
8
10
|
end
|
11
|
+
|
12
|
+
def body
|
13
|
+
open(url)
|
14
|
+
end
|
15
|
+
|
16
|
+
memoize :body
|
17
|
+
|
18
|
+
|
9
19
|
end
|
10
20
|
end
|
21
|
+
|
22
|
+
require 'linkser/objects/html'
|
data/lib/linkser/objects/html.rb
CHANGED
@@ -7,92 +7,69 @@ require 'opengraph'
|
|
7
7
|
module Linkser
|
8
8
|
module Objects
|
9
9
|
class HTML < Linkser::Object
|
10
|
-
attr_reader :body, :nokogiri
|
11
|
-
attr_reader :title, :description, :images, :ogp
|
12
|
-
def initialize url, head, options={}
|
13
|
-
super url, head, options
|
14
|
-
end
|
15
10
|
|
16
11
|
def title
|
17
|
-
|
18
|
-
|
19
|
-
@title = ogp.title
|
20
|
-
else
|
21
|
-
nokogiri.css('title').each do |title|
|
22
|
-
@title = title.text
|
23
|
-
end
|
24
|
-
end
|
25
|
-
@title
|
26
|
-
end
|
27
|
-
|
28
|
-
def body
|
29
|
-
return @body unless @body.nil?
|
30
|
-
@body = open(url)
|
12
|
+
ogp && ogp.title ||
|
13
|
+
(e = nokogiri.css('title')).first && e.text
|
31
14
|
end
|
32
15
|
|
33
16
|
def description
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
nokogiri.css('meta').each do |meta|
|
39
|
-
if meta.get_attribute("name").eql? "description"
|
40
|
-
@description = meta.get_attribute("content")
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
@description
|
17
|
+
ogp && ogp.description ||
|
18
|
+
(e = nokogiri.css('meta').find { |meta|
|
19
|
+
meta.get_attribute("name").eql? "description"
|
20
|
+
}) && e.get_attribute("content")
|
45
21
|
end
|
46
22
|
|
47
23
|
def images
|
48
|
-
|
49
|
-
|
50
|
-
if ogp and ogp.image
|
51
|
-
begin
|
52
|
-
img_spec = ImageSpec.new(ogp.image)
|
53
|
-
if valid_img? img_spec.width.to_f, img_spec.height.to_f
|
54
|
-
@images << {:img => ogp.image, :width => img_spec.width, :height => img_spec.height}
|
55
|
-
end
|
56
|
-
rescue
|
57
|
-
end
|
58
|
-
end
|
59
|
-
nokogiri.css('img').each do |img|
|
60
|
-
img_src = img.get_attribute("src")
|
61
|
-
img_src = complete_url img_src, url
|
62
|
-
img_uri = URI.parse(img_src)
|
63
|
-
img_ext = File.extname(img_uri.path)
|
64
|
-
img_name = File.basename(img_uri.path,img_ext)
|
65
|
-
if [".jpg", ".jpeg", ".png"].include? img_ext
|
24
|
+
Array.new.tap do |images|
|
25
|
+
if ogp and ogp.image
|
66
26
|
begin
|
67
|
-
img_spec = ImageSpec.new(
|
27
|
+
img_spec = ImageSpec.new(ogp.image)
|
68
28
|
if valid_img? img_spec.width.to_f, img_spec.height.to_f
|
69
|
-
|
29
|
+
images << {:img => ogp.image, :width => img_spec.width, :height => img_spec.height}
|
70
30
|
end
|
71
31
|
rescue
|
72
32
|
end
|
33
|
+
end
|
34
|
+
|
35
|
+
nokogiri.css('img').each do |img|
|
36
|
+
img_src = img.get_attribute("src")
|
37
|
+
img_src = complete_url img_src, url
|
38
|
+
img_uri = URI.parse(img_src)
|
39
|
+
img_ext = File.extname(img_uri.path)
|
40
|
+
img_name = File.basename(img_uri.path,img_ext)
|
41
|
+
|
42
|
+
if [".jpg", ".jpeg", ".png"].include? img_ext
|
43
|
+
begin
|
44
|
+
img_spec = ImageSpec.new(img_src)
|
45
|
+
if valid_img? img_spec.width.to_f, img_spec.height.to_f
|
46
|
+
images << {:img => img_src, :width => img_spec.width, :height => img_spec.height}
|
47
|
+
end
|
48
|
+
rescue
|
49
|
+
end
|
50
|
+
end
|
73
51
|
end
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
52
|
+
end
|
53
|
+
end
|
77
54
|
|
78
55
|
def nokogiri
|
79
|
-
|
80
|
-
@nokogiri = Nokogiri::HTML(body)
|
56
|
+
Nokogiri::HTML(body)
|
81
57
|
end
|
82
58
|
|
83
59
|
def ogp
|
84
|
-
|
85
|
-
@ogp = OpenGraph::Object.new
|
60
|
+
ogp = OpenGraph::Object.new
|
86
61
|
nokogiri.css('meta').each do |m|
|
87
62
|
if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i)
|
88
|
-
|
63
|
+
ogp[$1.gsub('-','_')] = m.attribute('content').to_s
|
89
64
|
end
|
90
65
|
end
|
91
|
-
|
92
|
-
|
93
|
-
@ogp
|
66
|
+
ogp = false if ogp.keys.empty? || !ogp.valid?
|
67
|
+
ogp
|
94
68
|
end
|
95
69
|
|
70
|
+
memoize :nokogiri, :ogp,
|
71
|
+
:title, :description, :images
|
72
|
+
|
96
73
|
private
|
97
74
|
|
98
75
|
def complete_url src, url
|
data/lib/linkser/parser.rb
CHANGED
@@ -2,23 +2,24 @@ require 'open-uri'
|
|
2
2
|
require 'net/http'
|
3
3
|
|
4
4
|
module Linkser
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
5
|
+
class Parser
|
6
|
+
attr_reader :object
|
7
|
+
|
8
|
+
def initialize url, options={}
|
10
9
|
head = get_head url
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
10
|
+
|
11
|
+
@object =
|
12
|
+
case head.content_type
|
13
|
+
when "text/html"
|
14
|
+
Linkser::Objects::HTML.new url, head
|
15
|
+
else
|
16
|
+
Linkser::Object.new url, head
|
17
|
+
end
|
17
18
|
end
|
18
19
|
|
19
20
|
private
|
20
21
|
|
21
|
-
def
|
22
|
+
def get_head url, limit = 10
|
22
23
|
raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit' if (limit==0)
|
23
24
|
uri = URI.parse url
|
24
25
|
http = Net::HTTP.start uri.host, uri.port
|
@@ -34,19 +35,6 @@ module Linkser
|
|
34
35
|
raise 'The HTTP request has a ' + response.code + ' code'
|
35
36
|
end
|
36
37
|
end
|
37
|
-
|
38
|
-
def self.is_valid_url? url
|
39
|
-
begin
|
40
|
-
uri = URI.parse(url)
|
41
|
-
if [:scheme, :host].any? { |i| uri.send(i).blank? }
|
42
|
-
raise URI::InvalidURIError
|
43
|
-
end
|
44
|
-
return true
|
45
|
-
rescue URI::InvalidURIError => e
|
46
|
-
warn e.to_s
|
47
|
-
return false
|
48
|
-
end
|
49
|
-
end
|
50
38
|
end
|
51
39
|
end
|
52
40
|
|
data/lib/linkser/version.rb
CHANGED
data/lib/linkser.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
require 'active_support'
|
1
2
|
require 'linkser/version'
|
2
3
|
|
3
4
|
module Linkser
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
class << self
|
6
|
+
def parse(url, options = {})
|
7
|
+
Linkser::Parser.new(url, options).object
|
8
|
+
end
|
8
9
|
end
|
9
10
|
end
|
10
11
|
|
12
|
+
require 'linkser/parser'
|
13
|
+
require 'linkser/object'
|
data/linkser.gemspec
CHANGED
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
|
|
25
25
|
s.add_runtime_dependency('rmagick', '~> 2.13.1')
|
26
26
|
s.add_runtime_dependency('ruby-imagespec', "~> 0.2.0")
|
27
27
|
s.add_runtime_dependency('opengraph', "~> 0.0.4")
|
28
|
+
s.add_runtime_dependency('activesupport', "> 2.2.1")
|
28
29
|
|
29
30
|
# Development Gem dependencies
|
30
31
|
#
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Eduardo Casanova
|
@@ -15,7 +15,8 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-11-21 00:00:00
|
18
|
+
date: 2011-11-21 00:00:00 +01:00
|
19
|
+
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
21
22
|
name: rake
|
@@ -96,9 +97,25 @@ dependencies:
|
|
96
97
|
type: :runtime
|
97
98
|
version_requirements: *id005
|
98
99
|
- !ruby/object:Gem::Dependency
|
99
|
-
name:
|
100
|
+
name: activesupport
|
100
101
|
prerelease: false
|
101
102
|
requirement: &id006 !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ">"
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
hash: 5
|
108
|
+
segments:
|
109
|
+
- 2
|
110
|
+
- 2
|
111
|
+
- 1
|
112
|
+
version: 2.2.1
|
113
|
+
type: :runtime
|
114
|
+
version_requirements: *id006
|
115
|
+
- !ruby/object:Gem::Dependency
|
116
|
+
name: ruby-debug
|
117
|
+
prerelease: false
|
118
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
102
119
|
none: false
|
103
120
|
requirements:
|
104
121
|
- - ">="
|
@@ -110,11 +127,11 @@ dependencies:
|
|
110
127
|
- 3
|
111
128
|
version: 0.10.3
|
112
129
|
type: :development
|
113
|
-
version_requirements: *
|
130
|
+
version_requirements: *id007
|
114
131
|
- !ruby/object:Gem::Dependency
|
115
132
|
name: rspec
|
116
133
|
prerelease: false
|
117
|
-
requirement: &
|
134
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
118
135
|
none: false
|
119
136
|
requirements:
|
120
137
|
- - ">="
|
@@ -126,7 +143,7 @@ dependencies:
|
|
126
143
|
- 0
|
127
144
|
version: 2.7.0
|
128
145
|
type: :development
|
129
|
-
version_requirements: *
|
146
|
+
version_requirements: *id008
|
130
147
|
description: Linkser is a link parser for Ruby. It gets an URI, tries to dereference it and returns the relevant information about the resource.
|
131
148
|
email:
|
132
149
|
- ecasanovac@gmail.com
|
@@ -152,6 +169,7 @@ files:
|
|
152
169
|
- linkser.gemspec
|
153
170
|
- spec/linkser_spec.rb
|
154
171
|
- spec/spec_helper.rb
|
172
|
+
has_rdoc: true
|
155
173
|
homepage: https://github.com/ging/linkser
|
156
174
|
licenses: []
|
157
175
|
|
@@ -181,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
199
|
requirements: []
|
182
200
|
|
183
201
|
rubyforge_project:
|
184
|
-
rubygems_version: 1.
|
202
|
+
rubygems_version: 1.6.2
|
185
203
|
signing_key:
|
186
204
|
specification_version: 3
|
187
205
|
summary: A link parser for Ruby
|