linkser 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/linkser/object.rb +12 -0
- data/lib/linkser/objects/html.rb +38 -61
- data/lib/linkser/parser.rb +13 -25
- data/lib/linkser/version.rb +1 -1
- data/lib/linkser.rb +7 -4
- data/linkser.gemspec +1 -0
- metadata +27 -9
data/lib/linkser/object.rb
CHANGED
@@ -1,10 +1,22 @@
|
|
1
1
|
module Linkser
|
2
2
|
class Object
|
3
|
+
extend ActiveSupport::Memoizable
|
4
|
+
|
3
5
|
attr_reader :url, :head
|
4
6
|
|
5
7
|
def initialize url, head, options={}
|
6
8
|
@url = url
|
7
9
|
@heade = head
|
8
10
|
end
|
11
|
+
|
12
|
+
def body
|
13
|
+
open(url)
|
14
|
+
end
|
15
|
+
|
16
|
+
memoize :body
|
17
|
+
|
18
|
+
|
9
19
|
end
|
10
20
|
end
|
21
|
+
|
22
|
+
require 'linkser/objects/html'
|
data/lib/linkser/objects/html.rb
CHANGED
@@ -7,92 +7,69 @@ require 'opengraph'
|
|
7
7
|
module Linkser
|
8
8
|
module Objects
|
9
9
|
class HTML < Linkser::Object
|
10
|
-
attr_reader :body, :nokogiri
|
11
|
-
attr_reader :title, :description, :images, :ogp
|
12
|
-
def initialize url, head, options={}
|
13
|
-
super url, head, options
|
14
|
-
end
|
15
10
|
|
16
11
|
def title
|
17
|
-
|
18
|
-
|
19
|
-
@title = ogp.title
|
20
|
-
else
|
21
|
-
nokogiri.css('title').each do |title|
|
22
|
-
@title = title.text
|
23
|
-
end
|
24
|
-
end
|
25
|
-
@title
|
26
|
-
end
|
27
|
-
|
28
|
-
def body
|
29
|
-
return @body unless @body.nil?
|
30
|
-
@body = open(url)
|
12
|
+
ogp && ogp.title ||
|
13
|
+
(e = nokogiri.css('title')).first && e.text
|
31
14
|
end
|
32
15
|
|
33
16
|
def description
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
nokogiri.css('meta').each do |meta|
|
39
|
-
if meta.get_attribute("name").eql? "description"
|
40
|
-
@description = meta.get_attribute("content")
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
@description
|
17
|
+
ogp && ogp.description ||
|
18
|
+
(e = nokogiri.css('meta').find { |meta|
|
19
|
+
meta.get_attribute("name").eql? "description"
|
20
|
+
}) && e.get_attribute("content")
|
45
21
|
end
|
46
22
|
|
47
23
|
def images
|
48
|
-
|
49
|
-
|
50
|
-
if ogp and ogp.image
|
51
|
-
begin
|
52
|
-
img_spec = ImageSpec.new(ogp.image)
|
53
|
-
if valid_img? img_spec.width.to_f, img_spec.height.to_f
|
54
|
-
@images << {:img => ogp.image, :width => img_spec.width, :height => img_spec.height}
|
55
|
-
end
|
56
|
-
rescue
|
57
|
-
end
|
58
|
-
end
|
59
|
-
nokogiri.css('img').each do |img|
|
60
|
-
img_src = img.get_attribute("src")
|
61
|
-
img_src = complete_url img_src, url
|
62
|
-
img_uri = URI.parse(img_src)
|
63
|
-
img_ext = File.extname(img_uri.path)
|
64
|
-
img_name = File.basename(img_uri.path,img_ext)
|
65
|
-
if [".jpg", ".jpeg", ".png"].include? img_ext
|
24
|
+
Array.new.tap do |images|
|
25
|
+
if ogp and ogp.image
|
66
26
|
begin
|
67
|
-
img_spec = ImageSpec.new(
|
27
|
+
img_spec = ImageSpec.new(ogp.image)
|
68
28
|
if valid_img? img_spec.width.to_f, img_spec.height.to_f
|
69
|
-
|
29
|
+
images << {:img => ogp.image, :width => img_spec.width, :height => img_spec.height}
|
70
30
|
end
|
71
31
|
rescue
|
72
32
|
end
|
33
|
+
end
|
34
|
+
|
35
|
+
nokogiri.css('img').each do |img|
|
36
|
+
img_src = img.get_attribute("src")
|
37
|
+
img_src = complete_url img_src, url
|
38
|
+
img_uri = URI.parse(img_src)
|
39
|
+
img_ext = File.extname(img_uri.path)
|
40
|
+
img_name = File.basename(img_uri.path,img_ext)
|
41
|
+
|
42
|
+
if [".jpg", ".jpeg", ".png"].include? img_ext
|
43
|
+
begin
|
44
|
+
img_spec = ImageSpec.new(img_src)
|
45
|
+
if valid_img? img_spec.width.to_f, img_spec.height.to_f
|
46
|
+
images << {:img => img_src, :width => img_spec.width, :height => img_spec.height}
|
47
|
+
end
|
48
|
+
rescue
|
49
|
+
end
|
50
|
+
end
|
73
51
|
end
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
52
|
+
end
|
53
|
+
end
|
77
54
|
|
78
55
|
def nokogiri
|
79
|
-
|
80
|
-
@nokogiri = Nokogiri::HTML(body)
|
56
|
+
Nokogiri::HTML(body)
|
81
57
|
end
|
82
58
|
|
83
59
|
def ogp
|
84
|
-
|
85
|
-
@ogp = OpenGraph::Object.new
|
60
|
+
ogp = OpenGraph::Object.new
|
86
61
|
nokogiri.css('meta').each do |m|
|
87
62
|
if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i)
|
88
|
-
|
63
|
+
ogp[$1.gsub('-','_')] = m.attribute('content').to_s
|
89
64
|
end
|
90
65
|
end
|
91
|
-
|
92
|
-
|
93
|
-
@ogp
|
66
|
+
ogp = false if ogp.keys.empty? || !ogp.valid?
|
67
|
+
ogp
|
94
68
|
end
|
95
69
|
|
70
|
+
memoize :nokogiri, :ogp,
|
71
|
+
:title, :description, :images
|
72
|
+
|
96
73
|
private
|
97
74
|
|
98
75
|
def complete_url src, url
|
data/lib/linkser/parser.rb
CHANGED
@@ -2,23 +2,24 @@ require 'open-uri'
|
|
2
2
|
require 'net/http'
|
3
3
|
|
4
4
|
module Linkser
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
5
|
+
class Parser
|
6
|
+
attr_reader :object
|
7
|
+
|
8
|
+
def initialize url, options={}
|
10
9
|
head = get_head url
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
10
|
+
|
11
|
+
@object =
|
12
|
+
case head.content_type
|
13
|
+
when "text/html"
|
14
|
+
Linkser::Objects::HTML.new url, head
|
15
|
+
else
|
16
|
+
Linkser::Object.new url, head
|
17
|
+
end
|
17
18
|
end
|
18
19
|
|
19
20
|
private
|
20
21
|
|
21
|
-
def
|
22
|
+
def get_head url, limit = 10
|
22
23
|
raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit' if (limit==0)
|
23
24
|
uri = URI.parse url
|
24
25
|
http = Net::HTTP.start uri.host, uri.port
|
@@ -34,19 +35,6 @@ module Linkser
|
|
34
35
|
raise 'The HTTP request has a ' + response.code + ' code'
|
35
36
|
end
|
36
37
|
end
|
37
|
-
|
38
|
-
def self.is_valid_url? url
|
39
|
-
begin
|
40
|
-
uri = URI.parse(url)
|
41
|
-
if [:scheme, :host].any? { |i| uri.send(i).blank? }
|
42
|
-
raise URI::InvalidURIError
|
43
|
-
end
|
44
|
-
return true
|
45
|
-
rescue URI::InvalidURIError => e
|
46
|
-
warn e.to_s
|
47
|
-
return false
|
48
|
-
end
|
49
|
-
end
|
50
38
|
end
|
51
39
|
end
|
52
40
|
|
data/lib/linkser/version.rb
CHANGED
data/lib/linkser.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
require 'active_support'
|
1
2
|
require 'linkser/version'
|
2
3
|
|
3
4
|
module Linkser
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
class << self
|
6
|
+
def parse(url, options = {})
|
7
|
+
Linkser::Parser.new(url, options).object
|
8
|
+
end
|
8
9
|
end
|
9
10
|
end
|
10
11
|
|
12
|
+
require 'linkser/parser'
|
13
|
+
require 'linkser/object'
|
data/linkser.gemspec
CHANGED
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
|
|
25
25
|
s.add_runtime_dependency('rmagick', '~> 2.13.1')
|
26
26
|
s.add_runtime_dependency('ruby-imagespec', "~> 0.2.0")
|
27
27
|
s.add_runtime_dependency('opengraph', "~> 0.0.4")
|
28
|
+
s.add_runtime_dependency('activesupport', "> 2.2.1")
|
28
29
|
|
29
30
|
# Development Gem dependencies
|
30
31
|
#
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Eduardo Casanova
|
@@ -15,7 +15,8 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-11-21 00:00:00
|
18
|
+
date: 2011-11-21 00:00:00 +01:00
|
19
|
+
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
21
22
|
name: rake
|
@@ -96,9 +97,25 @@ dependencies:
|
|
96
97
|
type: :runtime
|
97
98
|
version_requirements: *id005
|
98
99
|
- !ruby/object:Gem::Dependency
|
99
|
-
name:
|
100
|
+
name: activesupport
|
100
101
|
prerelease: false
|
101
102
|
requirement: &id006 !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ">"
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
hash: 5
|
108
|
+
segments:
|
109
|
+
- 2
|
110
|
+
- 2
|
111
|
+
- 1
|
112
|
+
version: 2.2.1
|
113
|
+
type: :runtime
|
114
|
+
version_requirements: *id006
|
115
|
+
- !ruby/object:Gem::Dependency
|
116
|
+
name: ruby-debug
|
117
|
+
prerelease: false
|
118
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
102
119
|
none: false
|
103
120
|
requirements:
|
104
121
|
- - ">="
|
@@ -110,11 +127,11 @@ dependencies:
|
|
110
127
|
- 3
|
111
128
|
version: 0.10.3
|
112
129
|
type: :development
|
113
|
-
version_requirements: *
|
130
|
+
version_requirements: *id007
|
114
131
|
- !ruby/object:Gem::Dependency
|
115
132
|
name: rspec
|
116
133
|
prerelease: false
|
117
|
-
requirement: &
|
134
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
118
135
|
none: false
|
119
136
|
requirements:
|
120
137
|
- - ">="
|
@@ -126,7 +143,7 @@ dependencies:
|
|
126
143
|
- 0
|
127
144
|
version: 2.7.0
|
128
145
|
type: :development
|
129
|
-
version_requirements: *
|
146
|
+
version_requirements: *id008
|
130
147
|
description: Linkser is a link parser for Ruby. It gets an URI, tries to dereference it and returns the relevant information about the resource.
|
131
148
|
email:
|
132
149
|
- ecasanovac@gmail.com
|
@@ -152,6 +169,7 @@ files:
|
|
152
169
|
- linkser.gemspec
|
153
170
|
- spec/linkser_spec.rb
|
154
171
|
- spec/spec_helper.rb
|
172
|
+
has_rdoc: true
|
155
173
|
homepage: https://github.com/ging/linkser
|
156
174
|
licenses: []
|
157
175
|
|
@@ -181,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
199
|
requirements: []
|
182
200
|
|
183
201
|
rubyforge_project:
|
184
|
-
rubygems_version: 1.
|
202
|
+
rubygems_version: 1.6.2
|
185
203
|
signing_key:
|
186
204
|
specification_version: 3
|
187
205
|
summary: A link parser for Ruby
|