linkser 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ module Linkser
2
+ class Object
3
+ attr_reader :url, :head
4
+
5
+ def initialize url, head, options={}
6
+ @url = url
7
+ @heade = head
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,121 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'net/http'
4
+ require 'image_spec'
5
+ require 'opengraph'
6
+
7
+ module Linkser
8
+ module Objects
9
+ class HTML < Linkser::Object
10
+ attr_reader :body, :nokogiri
11
+ attr_reader :title, :description, :images, :ogp
12
+ def initialize url, head, options={}
13
+ super url, head, options
14
+ end
15
+
16
+ def title
17
+ return @title unless @title.nil?
18
+ if ogp and ogp.title
19
+ @title = ogp.title
20
+ else
21
+ nokogiri.css('title').each do |title|
22
+ @title = title.text
23
+ end
24
+ end
25
+ @title
26
+ end
27
+
28
+ def body
29
+ return @body unless @body.nil?
30
+ @body = open(url)
31
+ end
32
+
33
+ def description
34
+ return @description unless @description.nil?
35
+ if ogp and ogp.description
36
+ @description = ogp.description
37
+ else
38
+ nokogiri.css('meta').each do |meta|
39
+ if meta.get_attribute("name").eql? "description"
40
+ @description = meta.get_attribute("content")
41
+ end
42
+ end
43
+ end
44
+ @description
45
+ end
46
+
47
+ def images
48
+ return @images unless @images.nil?
49
+ @images = Array.new
50
+ if ogp and ogp.image
51
+ begin
52
+ img_spec = ImageSpec.new(ogp.image)
53
+ if valid_img? img_spec.width.to_f, img_spec.height.to_f
54
+ @images << {:img => ogp.image, :width => img_spec.width, :height => img_spec.height}
55
+ end
56
+ rescue
57
+ end
58
+ end
59
+ nokogiri.css('img').each do |img|
60
+ img_src = img.get_attribute("src")
61
+ img_src = complete_url img_src, url
62
+ img_uri = URI.parse(img_src)
63
+ img_ext = File.extname(img_uri.path)
64
+ img_name = File.basename(img_uri.path,img_ext)
65
+ if [".jpg", ".jpeg", ".png"].include? img_ext
66
+ begin
67
+ img_spec = ImageSpec.new(img_src)
68
+ if valid_img? img_spec.width.to_f, img_spec.height.to_f
69
+ @images << {:img => img_src, :width => img_spec.width, :height => img_spec.height}
70
+ end
71
+ rescue
72
+ end
73
+ end
74
+ end
75
+ @images
76
+ end
77
+
78
+ def nokogiri
79
+ return @nokogiri unless @nokogiri.nil?
80
+ @nokogiri = Nokogiri::HTML(body)
81
+ end
82
+
83
+ def ogp
84
+ return @ogp unless @ogp.nil?
85
+ @ogp = OpenGraph::Object.new
86
+ nokogiri.css('meta').each do |m|
87
+ if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i)
88
+ @ogp[$1.gsub('-','_')] = m.attribute('content').to_s
89
+ end
90
+ end
91
+ @ogp = false if @ogp.keys.empty?
92
+ @ogp = false unless @ogp.valid?
93
+ @ogp
94
+ end
95
+
96
+ private
97
+
98
+ def complete_url src, url
99
+ uri = URI.parse(url)
100
+ base_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "")
101
+ relative_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "") + uri.path
102
+ if src.index("http://")==0
103
+ src
104
+ elsif src.index("/")==0
105
+ base_url + src
106
+ else
107
+ relative_url + src
108
+ end
109
+ end
110
+
111
+ def valid_img? w, h
112
+ if w > 199 or w > 199
113
+ if ((w > 0 and h > 0 and ((w / h) < 3) and ((w / h) > 0.2)) or (w > 0 and h == 0 and w < 700) or (w == 0 and h > 0 and h < 700))
114
+ return true
115
+ end
116
+ end
117
+ false
118
+ end
119
+ end
120
+ end
121
+ end
@@ -5,23 +5,21 @@ module Linkser
5
5
  module Parser
6
6
  def self.parse url, options={}
7
7
  if !is_valid_url? url
8
- raise "Invalid URL"
8
+ raise "Invalid URL"
9
9
  end
10
10
  head = get_head url
11
11
  case head.content_type
12
12
  when "text/html"
13
- Linkser::Parser::HTML.new.parse url
13
+ Linkser::Objects::HTML.new url, head
14
14
  else
15
15
  raise "I have no idea on how to parse a '" + head.content_type + "'"
16
16
  end
17
17
  end
18
18
 
19
- #private
19
+ private
20
20
 
21
21
  def self.get_head url, limit = 10
22
- if (limit==0)
23
- raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit'
24
- end
22
+ raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit' if (limit==0)
25
23
  uri = URI.parse url
26
24
  http = Net::HTTP.start uri.host, uri.port
27
25
  response = http.head uri.request_uri
@@ -33,7 +31,7 @@ module Linkser
33
31
  warn "Redirecting to #{location}"
34
32
  return get_head location, limit - 1
35
33
  else
36
- raise 'The HTTP responded with an ' + response.code + ' code'
34
+ raise 'The HTTP request has a ' + response.code + ' code'
37
35
  end
38
36
  end
39
37
 
@@ -41,10 +39,11 @@ module Linkser
41
39
  begin
42
40
  uri = URI.parse(url)
43
41
  if [:scheme, :host].any? { |i| uri.send(i).blank? }
44
- raise(URI::InvalidURIError)
42
+ raise URI::InvalidURIError
45
43
  end
46
44
  return true
47
45
  rescue URI::InvalidURIError => e
46
+ warn e.to_s
48
47
  return false
49
48
  end
50
49
  end
@@ -1,3 +1,3 @@
1
1
  module Linkser
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/linkser.rb CHANGED
@@ -2,8 +2,9 @@ require 'linkser/version'
2
2
 
3
3
  module Linkser
4
4
  autoload :Parser, 'linkser/parser'
5
- module Parser
6
- autoload :HTML, 'linkser/parser/html'
5
+ autoload :Object, 'linkser/object'
6
+ module Objects
7
+ autoload :HTML, 'linkser/objects/html'
7
8
  end
8
9
  end
9
10
 
data/linkser.gemspec CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |s|
24
24
  s.add_runtime_dependency('nokogiri', '~> 1.4.2')
25
25
  s.add_runtime_dependency('rmagick', '~> 2.13.1')
26
26
  s.add_runtime_dependency('ruby-imagespec', "~> 0.2.0")
27
+ s.add_runtime_dependency('opengraph', "~> 0.0.4")
27
28
 
28
29
  # Development Gem dependencies
29
30
  #
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Eduardo Casanova
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-11-18 00:00:00 +01:00
19
- default_executable:
18
+ date: 2011-11-21 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rake
@@ -81,9 +80,25 @@ dependencies:
81
80
  type: :runtime
82
81
  version_requirements: *id004
83
82
  - !ruby/object:Gem::Dependency
84
- name: ruby-debug
83
+ name: opengraph
85
84
  prerelease: false
86
85
  requirement: &id005 !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ~>
89
+ - !ruby/object:Gem::Version
90
+ hash: 23
91
+ segments:
92
+ - 0
93
+ - 0
94
+ - 4
95
+ version: 0.0.4
96
+ type: :runtime
97
+ version_requirements: *id005
98
+ - !ruby/object:Gem::Dependency
99
+ name: ruby-debug
100
+ prerelease: false
101
+ requirement: &id006 !ruby/object:Gem::Requirement
87
102
  none: false
88
103
  requirements:
89
104
  - - ">="
@@ -95,11 +110,11 @@ dependencies:
95
110
  - 3
96
111
  version: 0.10.3
97
112
  type: :development
98
- version_requirements: *id005
113
+ version_requirements: *id006
99
114
  - !ruby/object:Gem::Dependency
100
115
  name: rspec
101
116
  prerelease: false
102
- requirement: &id006 !ruby/object:Gem::Requirement
117
+ requirement: &id007 !ruby/object:Gem::Requirement
103
118
  none: false
104
119
  requirements:
105
120
  - - ">="
@@ -111,7 +126,7 @@ dependencies:
111
126
  - 0
112
127
  version: 2.7.0
113
128
  type: :development
114
- version_requirements: *id006
129
+ version_requirements: *id007
115
130
  description: Linkser is a link parser for Ruby. It gets an URI, tries to dereference it and returns the relevant information about the resource.
116
131
  email:
117
132
  - ecasanovac@gmail.com
@@ -130,13 +145,13 @@ files:
130
145
  - README.textile
131
146
  - Rakefile
132
147
  - lib/linkser.rb
148
+ - lib/linkser/object.rb
149
+ - lib/linkser/objects/html.rb
133
150
  - lib/linkser/parser.rb
134
- - lib/linkser/parser/html.rb
135
151
  - lib/linkser/version.rb
136
152
  - linkser.gemspec
137
153
  - spec/linkser_spec.rb
138
154
  - spec/spec_helper.rb
139
- has_rdoc: true
140
155
  homepage: https://github.com/ging/linkser
141
156
  licenses: []
142
157
 
@@ -166,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
181
  requirements: []
167
182
 
168
183
  rubyforge_project:
169
- rubygems_version: 1.6.2
184
+ rubygems_version: 1.8.10
170
185
  signing_key:
171
186
  specification_version: 3
172
187
  summary: A link parser for Ruby
@@ -1,73 +0,0 @@
1
- require 'nokogiri'
2
- require 'open-uri'
3
- require 'net/http'
4
- require 'image_spec'
5
-
6
- module Linkser
7
- module Parser
8
- class HTML
9
- def parse url, options={}
10
- parsed_page = Hash.new
11
-
12
- doc = Nokogiri::HTML(open(url))
13
-
14
- doc.css('title').each do |title|
15
- parsed_page.update({:title => title.text})
16
- end
17
-
18
- doc.css('meta').each do |meta|
19
- if meta.get_attribute("name").eql? "description"
20
- parsed_page.update({:description => meta.get_attribute("content")})
21
- end
22
- end
23
-
24
- images = Array.new
25
-
26
- doc.css('img').each do |img|
27
- img_src = img.get_attribute("src")
28
- img_src = get_complete_url img_src, url
29
- img_uri = URI.parse(img_src)
30
- img_ext = File.extname(img_uri.path)
31
- img_name = File.basename(img_uri.path,img_ext)
32
- if [".jpg", ".jpeg", ".png"].include? img_ext
33
- begin
34
- img_spec = ImageSpec.new(img_src)
35
- w = img_spec.width.to_f
36
- h = img_spec.height.to_f
37
- if w > 199 or w > 199
38
- if ((w > 0 and h > 0 and ((w / h) < 3) and ((w / h) > 0.2)) or (w > 0 and h == 0 and w < 700) or (w == 0 and h > 0 and h < 700)) and img_name.index("logo").nil?
39
- image = {:img => img_src, :width => w.to_i, :height => h.to_i}
40
- images << image
41
- end
42
- end
43
- rescue
44
- end
45
- end
46
- end
47
-
48
- if images!=[]
49
- parsed_page.update({:images => images})
50
- end
51
-
52
- return parsed_page
53
- end
54
-
55
- private
56
-
57
- def get_complete_url src, url
58
- uri = URI.parse(url)
59
- base_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "")
60
- relative_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "") + uri.path
61
- if src.index("http://")==0
62
- src = src
63
- #stays the same
64
- elsif src.index("/")==0
65
- src = base_url + src
66
- else
67
- src = relative_url + src
68
- end
69
- end
70
- end
71
- end
72
- end
73
-