linkser 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/linkser/object.rb +25 -6
- data/lib/linkser/objects/html.rb +9 -6
- data/lib/linkser/parser.rb +29 -16
- data/lib/linkser/resource.rb +25 -0
- data/lib/linkser/version.rb +1 -1
- data/lib/linkser.rb +1 -0
- metadata +68 -136
data/lib/linkser/object.rb
CHANGED
@@ -1,21 +1,40 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'net/https'
|
3
|
+
|
1
4
|
module Linkser
|
2
5
|
class Object
|
3
6
|
extend ActiveSupport::Memoizable
|
4
7
|
|
5
|
-
attr_reader :url, :head
|
6
|
-
|
7
|
-
def initialize url, head, options={}
|
8
|
+
attr_reader :url, :last_url, :head
|
9
|
+
def initialize url, last_url, head, options={}
|
8
10
|
@url = url
|
9
|
-
@
|
11
|
+
@last_url = last_url
|
12
|
+
@head = head
|
13
|
+
@options = options
|
10
14
|
end
|
11
15
|
|
12
16
|
def body
|
13
|
-
|
17
|
+
uri = URI.parse last_url
|
18
|
+
if uri.scheme and (uri.scheme.eql? "http" or uri.scheme.eql? "https")
|
19
|
+
http = Net::HTTP.new uri.host, uri.port
|
20
|
+
if uri.scheme.eql? "https"
|
21
|
+
unless @options[:ssl_verify]==true
|
22
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
23
|
+
end
|
24
|
+
http.use_ssl = true
|
25
|
+
end
|
26
|
+
else
|
27
|
+
raise 'URI ' + uri.to_s + ' is not supported by Linkser'
|
28
|
+
end
|
29
|
+
http.start do |agent|
|
30
|
+
request = Net::HTTP::Get.new uri.request_uri
|
31
|
+
response = http.request request
|
32
|
+
return response.body
|
33
|
+
end
|
14
34
|
end
|
15
35
|
|
16
36
|
memoize :body
|
17
37
|
|
18
|
-
|
19
38
|
end
|
20
39
|
end
|
21
40
|
|
data/lib/linkser/objects/html.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
require 'open-uri'
|
3
|
-
require 'net/http'
|
4
2
|
require 'image_spec'
|
5
3
|
require 'opengraph'
|
6
4
|
|
@@ -26,13 +24,14 @@ module Linkser
|
|
26
24
|
begin
|
27
25
|
img_spec = ImageSpec.new(ogp.image)
|
28
26
|
if valid_img? img_spec.width.to_f, img_spec.height.to_f
|
29
|
-
images << {:
|
27
|
+
images << Linkser::Resource.new({:type => "image", :url => ogp.image, :width => img_spec.width, :height => img_spec.height})
|
30
28
|
end
|
31
29
|
rescue
|
32
30
|
end
|
33
31
|
end
|
34
32
|
|
35
33
|
nokogiri.css('img').each do |img|
|
34
|
+
break if images.length >= 5
|
36
35
|
img_src = img.get_attribute("src")
|
37
36
|
img_src = complete_url img_src, url
|
38
37
|
img_uri = URI.parse(img_src)
|
@@ -43,7 +42,7 @@ module Linkser
|
|
43
42
|
begin
|
44
43
|
img_spec = ImageSpec.new(img_src)
|
45
44
|
if valid_img? img_spec.width.to_f, img_spec.height.to_f
|
46
|
-
images << {:
|
45
|
+
images << Linkser::Resource.new({:type => "image", :url => img_src, :width => img_spec.width, :height => img_spec.height})
|
47
46
|
end
|
48
47
|
rescue
|
49
48
|
end
|
@@ -66,9 +65,13 @@ module Linkser
|
|
66
65
|
ogp = false if ogp.keys.empty? || !ogp.valid?
|
67
66
|
ogp
|
68
67
|
end
|
68
|
+
|
69
|
+
def resource
|
70
|
+
Linkser::Resource.new ogp
|
71
|
+
end
|
69
72
|
|
70
73
|
memoize :nokogiri, :ogp,
|
71
|
-
:title, :description, :images
|
74
|
+
:title, :description, :images, :resource
|
72
75
|
|
73
76
|
private
|
74
77
|
|
@@ -86,7 +89,7 @@ module Linkser
|
|
86
89
|
end
|
87
90
|
|
88
91
|
def valid_img? w, h
|
89
|
-
if w > 199 or
|
92
|
+
if w > 199 or h > 199
|
90
93
|
if ((w > 0 and h > 0 and ((w / h) < 3) and ((w / h) > 0.2)) or (w > 0 and h == 0 and w < 700) or (w == 0 and h > 0 and h < 700))
|
91
94
|
return true
|
92
95
|
end
|
data/lib/linkser/parser.rb
CHANGED
@@ -1,38 +1,51 @@
|
|
1
|
-
require 'open-uri'
|
2
1
|
require 'net/http'
|
2
|
+
require 'net/https'
|
3
3
|
|
4
4
|
module Linkser
|
5
5
|
class Parser
|
6
|
-
attr_reader :object
|
6
|
+
attr_reader :object, :last_url
|
7
7
|
|
8
8
|
def initialize url, options={}
|
9
|
-
head = get_head url
|
9
|
+
head = get_head url, options
|
10
10
|
|
11
11
|
@object =
|
12
12
|
case head.content_type
|
13
13
|
when "text/html"
|
14
|
-
Linkser::Objects::HTML.new url, head
|
14
|
+
Linkser::Objects::HTML.new url, last_url, head
|
15
15
|
else
|
16
|
-
Linkser::Object.new url, head
|
16
|
+
Linkser::Object.new url, last_url, head
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
|
-
def get_head url, limit = 10
|
22
|
+
def get_head url, options, limit = 10
|
23
23
|
raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit' if (limit==0)
|
24
|
+
@last_url = url
|
24
25
|
uri = URI.parse url
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
return get_head location, limit - 1
|
26
|
+
if uri.scheme and (uri.scheme.eql? "http" or uri.scheme.eql? "https")
|
27
|
+
http = Net::HTTP.new uri.host, uri.port
|
28
|
+
if uri.scheme.eql? "https"
|
29
|
+
unless options[:ssl_verify]==true
|
30
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
31
|
+
end
|
32
|
+
http.use_ssl = true
|
33
|
+
end
|
34
34
|
else
|
35
|
-
raise '
|
35
|
+
raise 'URI ' + uri.to_s + ' is not supported by Linkser'
|
36
|
+
end
|
37
|
+
http.start do |agent|
|
38
|
+
response = agent.head uri.request_uri
|
39
|
+
case response
|
40
|
+
when Net::HTTPSuccess then
|
41
|
+
return response
|
42
|
+
when Net::HTTPRedirection then
|
43
|
+
location = response['location']
|
44
|
+
warn "Redirecting to #{location}"
|
45
|
+
return get_head location, options, limit - 1
|
46
|
+
else
|
47
|
+
raise 'The HTTP request has a ' + response.code + ' code'
|
48
|
+
end
|
36
49
|
end
|
37
50
|
end
|
38
51
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Linkser
|
2
|
+
class Resource
|
3
|
+
attr_accessor :type, :url, :width, :height
|
4
|
+
|
5
|
+
def initialize obj
|
6
|
+
case obj
|
7
|
+
when OpenGraph::Object
|
8
|
+
@type = obj["type"].split(".")[0] if obj["type"]
|
9
|
+
if @type
|
10
|
+
@url = obj[@type] if obj[@type]
|
11
|
+
@url = obj[@type + ":url"] if @url.nil? and obj[@type + ":url"]
|
12
|
+
@width = obj[@type + ":width"] if obj[@type + ":width"]
|
13
|
+
@height = obj[@type + ":height"] if obj[@type + ":height"]
|
14
|
+
end
|
15
|
+
when Hash
|
16
|
+
@type = obj[:type] if obj[:type]
|
17
|
+
@url = obj[:url] if obj[:url]
|
18
|
+
@width = obj[:width] if obj[:width]
|
19
|
+
@height = obj[:height] if obj[:height]
|
20
|
+
else
|
21
|
+
raise 'Error creating the Linkser::Resource. Expecting Hash or OpenGraph::Object but got ' + obj.class.to_s
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/linkser/version.rb
CHANGED
data/lib/linkser.rb
CHANGED
metadata
CHANGED
@@ -1,159 +1,101 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkser
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 0
|
9
|
-
- 4
|
10
|
-
version: 0.0.4
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Eduardo Casanova
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-12-03 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
22
15
|
name: rake
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &81316060 !ruby/object:Gem::Requirement
|
25
17
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
|
30
|
-
segments:
|
31
|
-
- 0
|
32
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
33
22
|
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: nokogiri
|
37
23
|
prerelease: false
|
38
|
-
|
24
|
+
version_requirements: *81316060
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: nokogiri
|
27
|
+
requirement: &81315670 !ruby/object:Gem::Requirement
|
39
28
|
none: false
|
40
|
-
requirements:
|
29
|
+
requirements:
|
41
30
|
- - ~>
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
hash: 3
|
44
|
-
segments:
|
45
|
-
- 1
|
46
|
-
- 4
|
47
|
-
- 2
|
31
|
+
- !ruby/object:Gem::Version
|
48
32
|
version: 1.4.2
|
49
33
|
type: :runtime
|
50
|
-
version_requirements: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
|
-
name: rmagick
|
53
34
|
prerelease: false
|
54
|
-
|
35
|
+
version_requirements: *81315670
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rmagick
|
38
|
+
requirement: &81315340 !ruby/object:Gem::Requirement
|
55
39
|
none: false
|
56
|
-
requirements:
|
40
|
+
requirements:
|
57
41
|
- - ~>
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
hash: 57
|
60
|
-
segments:
|
61
|
-
- 2
|
62
|
-
- 13
|
63
|
-
- 1
|
42
|
+
- !ruby/object:Gem::Version
|
64
43
|
version: 2.13.1
|
65
44
|
type: :runtime
|
66
|
-
version_requirements: *id003
|
67
|
-
- !ruby/object:Gem::Dependency
|
68
|
-
name: ruby-imagespec
|
69
45
|
prerelease: false
|
70
|
-
|
46
|
+
version_requirements: *81315340
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: ruby-imagespec
|
49
|
+
requirement: &81314800 !ruby/object:Gem::Requirement
|
71
50
|
none: false
|
72
|
-
requirements:
|
51
|
+
requirements:
|
73
52
|
- - ~>
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
hash: 23
|
76
|
-
segments:
|
77
|
-
- 0
|
78
|
-
- 2
|
79
|
-
- 0
|
53
|
+
- !ruby/object:Gem::Version
|
80
54
|
version: 0.2.0
|
81
55
|
type: :runtime
|
82
|
-
version_requirements: *id004
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: opengraph
|
85
56
|
prerelease: false
|
86
|
-
|
57
|
+
version_requirements: *81314800
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: opengraph
|
60
|
+
requirement: &81313800 !ruby/object:Gem::Requirement
|
87
61
|
none: false
|
88
|
-
requirements:
|
62
|
+
requirements:
|
89
63
|
- - ~>
|
90
|
-
- !ruby/object:Gem::Version
|
91
|
-
hash: 23
|
92
|
-
segments:
|
93
|
-
- 0
|
94
|
-
- 0
|
95
|
-
- 4
|
64
|
+
- !ruby/object:Gem::Version
|
96
65
|
version: 0.0.4
|
97
66
|
type: :runtime
|
98
|
-
version_requirements: *id005
|
99
|
-
- !ruby/object:Gem::Dependency
|
100
|
-
name: activesupport
|
101
67
|
prerelease: false
|
102
|
-
|
68
|
+
version_requirements: *81313800
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: activesupport
|
71
|
+
requirement: &81313180 !ruby/object:Gem::Requirement
|
103
72
|
none: false
|
104
|
-
requirements:
|
105
|
-
- -
|
106
|
-
- !ruby/object:Gem::Version
|
107
|
-
hash: 5
|
108
|
-
segments:
|
109
|
-
- 2
|
110
|
-
- 2
|
111
|
-
- 1
|
73
|
+
requirements:
|
74
|
+
- - ! '>'
|
75
|
+
- !ruby/object:Gem::Version
|
112
76
|
version: 2.2.1
|
113
77
|
type: :runtime
|
114
|
-
version_requirements: *id006
|
115
|
-
- !ruby/object:Gem::Dependency
|
116
|
-
name: ruby-debug
|
117
78
|
prerelease: false
|
118
|
-
|
119
|
-
|
120
|
-
requirements:
|
121
|
-
- - ">="
|
122
|
-
- !ruby/object:Gem::Version
|
123
|
-
hash: 49
|
124
|
-
segments:
|
125
|
-
- 0
|
126
|
-
- 10
|
127
|
-
- 3
|
128
|
-
version: 0.10.3
|
129
|
-
type: :development
|
130
|
-
version_requirements: *id007
|
131
|
-
- !ruby/object:Gem::Dependency
|
79
|
+
version_requirements: *81313180
|
80
|
+
- !ruby/object:Gem::Dependency
|
132
81
|
name: rspec
|
133
|
-
|
134
|
-
requirement: &id008 !ruby/object:Gem::Requirement
|
82
|
+
requirement: &81312750 !ruby/object:Gem::Requirement
|
135
83
|
none: false
|
136
|
-
requirements:
|
137
|
-
- -
|
138
|
-
- !ruby/object:Gem::Version
|
139
|
-
hash: 19
|
140
|
-
segments:
|
141
|
-
- 2
|
142
|
-
- 7
|
143
|
-
- 0
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
144
87
|
version: 2.7.0
|
145
88
|
type: :development
|
146
|
-
|
147
|
-
|
148
|
-
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *81312750
|
91
|
+
description: Linkser is a link parser for Ruby. It gets an URI, tries to dereference
|
92
|
+
it and returns the relevant information about the resource.
|
93
|
+
email:
|
149
94
|
- ecasanovac@gmail.com
|
150
95
|
executables: []
|
151
|
-
|
152
96
|
extensions: []
|
153
|
-
|
154
97
|
extra_rdoc_files: []
|
155
|
-
|
156
|
-
files:
|
98
|
+
files:
|
157
99
|
- .gitignore
|
158
100
|
- .rspec
|
159
101
|
- .travis.yml
|
@@ -165,43 +107,33 @@ files:
|
|
165
107
|
- lib/linkser/object.rb
|
166
108
|
- lib/linkser/objects/html.rb
|
167
109
|
- lib/linkser/parser.rb
|
110
|
+
- lib/linkser/resource.rb
|
168
111
|
- lib/linkser/version.rb
|
169
112
|
- linkser.gemspec
|
170
113
|
- spec/linkser_spec.rb
|
171
114
|
- spec/spec_helper.rb
|
172
|
-
has_rdoc: true
|
173
115
|
homepage: https://github.com/ging/linkser
|
174
116
|
licenses: []
|
175
|
-
|
176
117
|
post_install_message:
|
177
118
|
rdoc_options: []
|
178
|
-
|
179
|
-
require_paths:
|
119
|
+
require_paths:
|
180
120
|
- lib
|
181
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
182
122
|
none: false
|
183
|
-
requirements:
|
184
|
-
- -
|
185
|
-
- !ruby/object:Gem::Version
|
186
|
-
|
187
|
-
|
188
|
-
- 0
|
189
|
-
version: "0"
|
190
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ! '>='
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
191
128
|
none: false
|
192
|
-
requirements:
|
193
|
-
- -
|
194
|
-
- !ruby/object:Gem::Version
|
195
|
-
|
196
|
-
segments:
|
197
|
-
- 0
|
198
|
-
version: "0"
|
129
|
+
requirements:
|
130
|
+
- - ! '>='
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '0'
|
199
133
|
requirements: []
|
200
|
-
|
201
134
|
rubyforge_project:
|
202
|
-
rubygems_version: 1.
|
135
|
+
rubygems_version: 1.8.10
|
203
136
|
signing_key:
|
204
137
|
specification_version: 3
|
205
138
|
summary: A link parser for Ruby
|
206
139
|
test_files: []
|
207
|
-
|