linkser 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ module Linkser
2
+ class Object
3
+ attr_reader :url, :head
4
+
5
+ def initialize url, head, options={}
6
+ @url = url
7
+ @heade = head
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,121 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'net/http'
4
+ require 'image_spec'
5
+ require 'opengraph'
6
+
7
+ module Linkser
8
+ module Objects
9
+ class HTML < Linkser::Object
10
+ attr_reader :body, :nokogiri
11
+ attr_reader :title, :description, :images, :ogp
12
+ def initialize url, head, options={}
13
+ super url, head, options
14
+ end
15
+
16
+ def title
17
+ return @title unless @title.nil?
18
+ if ogp and ogp.title
19
+ @title = ogp.title
20
+ else
21
+ nokogiri.css('title').each do |title|
22
+ @title = title.text
23
+ end
24
+ end
25
+ @title
26
+ end
27
+
28
+ def body
29
+ return @body unless @body.nil?
30
+ @body = open(url)
31
+ end
32
+
33
+ def description
34
+ return @description unless @description.nil?
35
+ if ogp and ogp.description
36
+ @description = ogp.description
37
+ else
38
+ nokogiri.css('meta').each do |meta|
39
+ if meta.get_attribute("name").eql? "description"
40
+ @description = meta.get_attribute("content")
41
+ end
42
+ end
43
+ end
44
+ @description
45
+ end
46
+
47
+ def images
48
+ return @images unless @images.nil?
49
+ @images = Array.new
50
+ if ogp and ogp.image
51
+ begin
52
+ img_spec = ImageSpec.new(ogp.image)
53
+ if valid_img? img_spec.width.to_f, img_spec.height.to_f
54
+ @images << {:img => ogp.image, :width => img_spec.width, :height => img_spec.height}
55
+ end
56
+ rescue
57
+ end
58
+ end
59
+ nokogiri.css('img').each do |img|
60
+ img_src = img.get_attribute("src")
61
+ img_src = complete_url img_src, url
62
+ img_uri = URI.parse(img_src)
63
+ img_ext = File.extname(img_uri.path)
64
+ img_name = File.basename(img_uri.path,img_ext)
65
+ if [".jpg", ".jpeg", ".png"].include? img_ext
66
+ begin
67
+ img_spec = ImageSpec.new(img_src)
68
+ if valid_img? img_spec.width.to_f, img_spec.height.to_f
69
+ @images << {:img => img_src, :width => img_spec.width, :height => img_spec.height}
70
+ end
71
+ rescue
72
+ end
73
+ end
74
+ end
75
+ @images
76
+ end
77
+
78
+ def nokogiri
79
+ return @nokogiri unless @nokogiri.nil?
80
+ @nokogiri = Nokogiri::HTML(body)
81
+ end
82
+
83
+ def ogp
84
+ return @ogp unless @ogp.nil?
85
+ @ogp = OpenGraph::Object.new
86
+ nokogiri.css('meta').each do |m|
87
+ if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i)
88
+ @ogp[$1.gsub('-','_')] = m.attribute('content').to_s
89
+ end
90
+ end
91
+ @ogp = false if @ogp.keys.empty?
92
+ @ogp = false unless @ogp.valid?
93
+ @ogp
94
+ end
95
+
96
+ private
97
+
98
+ def complete_url src, url
99
+ uri = URI.parse(url)
100
+ base_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "")
101
+ relative_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "") + uri.path
102
+ if src.index("http://")==0
103
+ src
104
+ elsif src.index("/")==0
105
+ base_url + src
106
+ else
107
+ relative_url + src
108
+ end
109
+ end
110
+
111
+ def valid_img? w, h
112
+ if w > 199 or w > 199
113
+ if ((w > 0 and h > 0 and ((w / h) < 3) and ((w / h) > 0.2)) or (w > 0 and h == 0 and w < 700) or (w == 0 and h > 0 and h < 700))
114
+ return true
115
+ end
116
+ end
117
+ false
118
+ end
119
+ end
120
+ end
121
+ end
@@ -5,23 +5,21 @@ module Linkser
5
5
  module Parser
6
6
  def self.parse url, options={}
7
7
  if !is_valid_url? url
8
- raise "Invalid URL"
8
+ raise "Invalid URL"
9
9
  end
10
10
  head = get_head url
11
11
  case head.content_type
12
12
  when "text/html"
13
- Linkser::Parser::HTML.new.parse url
13
+ Linkser::Objects::HTML.new url, head
14
14
  else
15
15
  raise "I have no idea on how to parse a '" + head.content_type + "'"
16
16
  end
17
17
  end
18
18
 
19
- #private
19
+ private
20
20
 
21
21
  def self.get_head url, limit = 10
22
- if (limit==0)
23
- raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit'
24
- end
22
+ raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit' if (limit==0)
25
23
  uri = URI.parse url
26
24
  http = Net::HTTP.start uri.host, uri.port
27
25
  response = http.head uri.request_uri
@@ -33,7 +31,7 @@ module Linkser
33
31
  warn "Redirecting to #{location}"
34
32
  return get_head location, limit - 1
35
33
  else
36
- raise 'The HTTP responded with an ' + response.code + ' code'
34
+ raise 'The HTTP request has a ' + response.code + ' code'
37
35
  end
38
36
  end
39
37
 
@@ -41,10 +39,11 @@ module Linkser
41
39
  begin
42
40
  uri = URI.parse(url)
43
41
  if [:scheme, :host].any? { |i| uri.send(i).blank? }
44
- raise(URI::InvalidURIError)
42
+ raise URI::InvalidURIError
45
43
  end
46
44
  return true
47
45
  rescue URI::InvalidURIError => e
46
+ warn e.to_s
48
47
  return false
49
48
  end
50
49
  end
@@ -1,3 +1,3 @@
1
1
  module Linkser
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/linkser.rb CHANGED
@@ -2,8 +2,9 @@ require 'linkser/version'
2
2
 
3
3
  module Linkser
4
4
  autoload :Parser, 'linkser/parser'
5
- module Parser
6
- autoload :HTML, 'linkser/parser/html'
5
+ autoload :Object, 'linkser/object'
6
+ module Objects
7
+ autoload :HTML, 'linkser/objects/html'
7
8
  end
8
9
  end
9
10
 
data/linkser.gemspec CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |s|
24
24
  s.add_runtime_dependency('nokogiri', '~> 1.4.2')
25
25
  s.add_runtime_dependency('rmagick', '~> 2.13.1')
26
26
  s.add_runtime_dependency('ruby-imagespec', "~> 0.2.0")
27
+ s.add_runtime_dependency('opengraph', "~> 0.0.4")
27
28
 
28
29
  # Development Gem dependencies
29
30
  #
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Eduardo Casanova
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-11-18 00:00:00 +01:00
19
- default_executable:
18
+ date: 2011-11-21 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rake
@@ -81,9 +80,25 @@ dependencies:
81
80
  type: :runtime
82
81
  version_requirements: *id004
83
82
  - !ruby/object:Gem::Dependency
84
- name: ruby-debug
83
+ name: opengraph
85
84
  prerelease: false
86
85
  requirement: &id005 !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ~>
89
+ - !ruby/object:Gem::Version
90
+ hash: 23
91
+ segments:
92
+ - 0
93
+ - 0
94
+ - 4
95
+ version: 0.0.4
96
+ type: :runtime
97
+ version_requirements: *id005
98
+ - !ruby/object:Gem::Dependency
99
+ name: ruby-debug
100
+ prerelease: false
101
+ requirement: &id006 !ruby/object:Gem::Requirement
87
102
  none: false
88
103
  requirements:
89
104
  - - ">="
@@ -95,11 +110,11 @@ dependencies:
95
110
  - 3
96
111
  version: 0.10.3
97
112
  type: :development
98
- version_requirements: *id005
113
+ version_requirements: *id006
99
114
  - !ruby/object:Gem::Dependency
100
115
  name: rspec
101
116
  prerelease: false
102
- requirement: &id006 !ruby/object:Gem::Requirement
117
+ requirement: &id007 !ruby/object:Gem::Requirement
103
118
  none: false
104
119
  requirements:
105
120
  - - ">="
@@ -111,7 +126,7 @@ dependencies:
111
126
  - 0
112
127
  version: 2.7.0
113
128
  type: :development
114
- version_requirements: *id006
129
+ version_requirements: *id007
115
130
  description: Linkser is a link parser for Ruby. It gets an URI, tries to dereference it and returns the relevant information about the resource.
116
131
  email:
117
132
  - ecasanovac@gmail.com
@@ -130,13 +145,13 @@ files:
130
145
  - README.textile
131
146
  - Rakefile
132
147
  - lib/linkser.rb
148
+ - lib/linkser/object.rb
149
+ - lib/linkser/objects/html.rb
133
150
  - lib/linkser/parser.rb
134
- - lib/linkser/parser/html.rb
135
151
  - lib/linkser/version.rb
136
152
  - linkser.gemspec
137
153
  - spec/linkser_spec.rb
138
154
  - spec/spec_helper.rb
139
- has_rdoc: true
140
155
  homepage: https://github.com/ging/linkser
141
156
  licenses: []
142
157
 
@@ -166,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
181
  requirements: []
167
182
 
168
183
  rubyforge_project:
169
- rubygems_version: 1.6.2
184
+ rubygems_version: 1.8.10
170
185
  signing_key:
171
186
  specification_version: 3
172
187
  summary: A link parser for Ruby
@@ -1,73 +0,0 @@
1
- require 'nokogiri'
2
- require 'open-uri'
3
- require 'net/http'
4
- require 'image_spec'
5
-
6
- module Linkser
7
- module Parser
8
- class HTML
9
- def parse url, options={}
10
- parsed_page = Hash.new
11
-
12
- doc = Nokogiri::HTML(open(url))
13
-
14
- doc.css('title').each do |title|
15
- parsed_page.update({:title => title.text})
16
- end
17
-
18
- doc.css('meta').each do |meta|
19
- if meta.get_attribute("name").eql? "description"
20
- parsed_page.update({:description => meta.get_attribute("content")})
21
- end
22
- end
23
-
24
- images = Array.new
25
-
26
- doc.css('img').each do |img|
27
- img_src = img.get_attribute("src")
28
- img_src = get_complete_url img_src, url
29
- img_uri = URI.parse(img_src)
30
- img_ext = File.extname(img_uri.path)
31
- img_name = File.basename(img_uri.path,img_ext)
32
- if [".jpg", ".jpeg", ".png"].include? img_ext
33
- begin
34
- img_spec = ImageSpec.new(img_src)
35
- w = img_spec.width.to_f
36
- h = img_spec.height.to_f
37
- if w > 199 or w > 199
38
- if ((w > 0 and h > 0 and ((w / h) < 3) and ((w / h) > 0.2)) or (w > 0 and h == 0 and w < 700) or (w == 0 and h > 0 and h < 700)) and img_name.index("logo").nil?
39
- image = {:img => img_src, :width => w.to_i, :height => h.to_i}
40
- images << image
41
- end
42
- end
43
- rescue
44
- end
45
- end
46
- end
47
-
48
- if images!=[]
49
- parsed_page.update({:images => images})
50
- end
51
-
52
- return parsed_page
53
- end
54
-
55
- private
56
-
57
- def get_complete_url src, url
58
- uri = URI.parse(url)
59
- base_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "")
60
- relative_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "") + uri.path
61
- if src.index("http://")==0
62
- src = src
63
- #stays the same
64
- elsif src.index("/")==0
65
- src = base_url + src
66
- else
67
- src = relative_url + src
68
- end
69
- end
70
- end
71
- end
72
- end
73
-