linkser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,16 @@
1
+ .bundle/
2
+ log/*.log
3
+ pkg/
4
+ spec/dummy/db/*.sqlite3
5
+ spec/dummy/log/*.log
6
+ spec/dummy/tmp/
7
+ spec/dummy/.project
8
+ **.tmp_*
9
+ Gemfile.lock
10
+ .idea
11
+ .project
12
+ .document
13
+ .settings/
14
+ rdoc/
15
+ doc/
16
+ .yardoc/*
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - rbx
5
+ - ree
6
+
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in linkser.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Eduardo Casanova Cuesta
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.textile ADDED
@@ -0,0 +1,21 @@
1
+ h1. Linkser "!https://secure.travis-ci.org/ging/linkser.png!":http://travis-ci.org/ging/linkser
2
+
3
+ Linkser is a link parser for Ruby. It gets an URI, tries to dereference it and returns the relevant information about the resource.
4
+
5
+ h2. Installation
6
+
7
+ #TODO
8
+
9
+ h2. Using Mailboxer.
10
+
11
+ #TODO
12
+
13
+ h2. License
14
+
15
+ Copyright (c) 2011 Universidad Politécnica de Madrid
16
+
17
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rspec/core'
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task :default => :spec
@@ -0,0 +1,73 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'net/http'
4
+ require 'image_spec'
5
+
6
+ module Linkser
7
+ module Parser
8
+ class HTML
9
+ def parse url, options={}
10
+ parsed_page = Hash.new
11
+
12
+ doc = Nokogiri::HTML(open(url))
13
+
14
+ doc.css('title').each do |title|
15
+ parsed_page.update({:title => title.text})
16
+ end
17
+
18
+ doc.css('meta').each do |meta|
19
+ if meta.get_attribute("name").eql? "description"
20
+ parsed_page.update({:description => meta.get_attribute("content")})
21
+ end
22
+ end
23
+
24
+ images = Array.new
25
+
26
+ doc.css('img').each do |img|
27
+ img_src = img.get_attribute("src")
28
+ img_src = get_complete_url img_src, url
29
+ img_uri = URI.parse(img_src)
30
+ img_ext = File.extname(img_uri.path)
31
+ img_name = File.basename(img_uri.path,img_ext)
32
+ if [".jpg", ".jpeg", ".png"].include? img_ext
33
+ begin
34
+ img_spec = ImageSpec.new(img_src)
35
+ w = img_spec.width.to_f
36
+ h = img_spec.height.to_f
37
+ if w > 199 or w > 199
38
+ if ((w > 0 and h > 0 and ((w / h) < 3) and ((w / h) > 0.2)) or (w > 0 and h == 0 and w < 700) or (w == 0 and h > 0 and h < 700)) and img_name.index("logo").nil?
39
+ image = {:img => img_src, :width => w.to_i, :height => h.to_i}
40
+ images << image
41
+ end
42
+ end
43
+ rescue
44
+ end
45
+ end
46
+ end
47
+
48
+ if images!=[]
49
+ parsed_page.update({:images => images})
50
+ end
51
+
52
+ return parsed_page
53
+ end
54
+
55
+ private
56
+
57
+ def get_complete_url src, url
58
+ uri = URI.parse(url)
59
+ base_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "")
60
+ relative_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "") + uri.path
61
+ if src.index("http://")==0
62
+ src = src
63
+ #stays the same
64
+ elsif src.index("/")==0
65
+ src = base_url + src
66
+ else
67
+ src = relative_url + src
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
@@ -0,0 +1,53 @@
1
+ require 'open-uri'
2
+ require 'net/http'
3
+
4
+ module Linkser
5
+ module Parser
6
+ def self.parse url, options={}
7
+ if !is_valid_url? url
8
+ raise "Invalid URL"
9
+ end
10
+ head = get_head url
11
+ case head.content_type
12
+ when "text/html"
13
+ Linkser::Parser::HTML.new.parse url
14
+ else
15
+ raise "I have no idea on how to parse a '" + head.content_type + "'"
16
+ end
17
+ end
18
+
19
+ #private
20
+
21
+ def self.get_head url, limit = 10
22
+ if (limit==0)
23
+ raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit'
24
+ end
25
+ uri = URI.parse url
26
+ http = Net::HTTP.start uri.host, uri.port
27
+ response = http.head uri.request_uri
28
+ case response
29
+ when Net::HTTPSuccess then
30
+ return response
31
+ when Net::HTTPRedirection then
32
+ location = response['location']
33
+ warn "Redirecting to #{location}"
34
+ return get_head location, limit - 1
35
+ else
36
+ raise 'The HTTP responded with an ' + response.code + ' code'
37
+ end
38
+ end
39
+
40
+ def self.is_valid_url? url
41
+ begin
42
+ uri = URI.parse(url)
43
+ if [:scheme, :host].any? { |i| uri.send(i).blank? }
44
+ raise(URI::InvalidURIError)
45
+ end
46
+ return true
47
+ rescue URI::InvalidURIError => e
48
+ return false
49
+ end
50
+ end
51
+ end
52
+ end
53
+
@@ -0,0 +1,3 @@
1
+ module Linkser
2
+ VERSION = "0.0.1"
3
+ end
data/lib/linkser.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'linkser/version'
2
+
3
+ module Linkser
4
+ autoload :Parser, 'linkser/parser'
5
+ module Parser
6
+ autoload :HTML, 'linkser/parser/html'
7
+ end
8
+ end
9
+
data/linkser.gemspec ADDED
@@ -0,0 +1,36 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "linkser/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "linkser"
7
+ s.version = Linkser::VERSION
8
+ s.authors = ["Eduardo Casanova"]
9
+ s.email = ["ecasanovac@gmail.com"]
10
+ s.homepage = "https://github.com/ging/linkser"
11
+ s.summary = "A link parser for Ruby"
12
+ s.description = "Linkser is a link parser for Ruby. It gets an URI, tries to dereference it and returns the relevant information about the resource."
13
+
14
+ # s.rubyforge_project = "linkser"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # Gem dependencies
22
+ #
23
+ s.add_runtime_dependency('rake')
24
+ s.add_runtime_dependency('nokogiri', '~> 1.4.2')
25
+ s.add_runtime_dependency('rmagick', '~> 2.13.1')
26
+ s.add_runtime_dependency('ruby-imagespec', "~> 0.2.0")
27
+
28
+ # Development Gem dependencies
29
+ #
30
+ # Debugging
31
+ if RUBY_VERSION < '1.9'
32
+ s.add_development_dependency('ruby-debug', '>= 0.10.3')
33
+ end
34
+ # Specs
35
+ s.add_development_dependency('rspec', '>= 2.7.0')
36
+ end
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+ require 'linkser'
3
+
4
+ describe Linkser do
5
+ it "should be valid" do
6
+ Linkser.should be_a(Module)
7
+ end
8
+ end
@@ -0,0 +1,12 @@
1
+ # Load support files
2
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
3
+
4
+ RSpec.configure do |config|
5
+ # Remove this line if you don't want RSpec's should and should_not
6
+ # methods or matchers
7
+ require 'rspec/expectations'
8
+ config.include RSpec::Matchers
9
+
10
+ # == Mock Framework
11
+ config.mock_with :rspec
12
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: linkser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Eduardo Casanova
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-18 00:00:00.000000000 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake
17
+ requirement: &86390860 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *86390860
26
+ - !ruby/object:Gem::Dependency
27
+ name: nokogiri
28
+ requirement: &86390610 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.4.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: *86390610
37
+ - !ruby/object:Gem::Dependency
38
+ name: rmagick
39
+ requirement: &86390360 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ~>
43
+ - !ruby/object:Gem::Version
44
+ version: 2.13.1
45
+ type: :runtime
46
+ prerelease: false
47
+ version_requirements: *86390360
48
+ - !ruby/object:Gem::Dependency
49
+ name: ruby-imagespec
50
+ requirement: &86390130 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ version: 0.2.0
56
+ type: :runtime
57
+ prerelease: false
58
+ version_requirements: *86390130
59
+ - !ruby/object:Gem::Dependency
60
+ name: rspec
61
+ requirement: &86389890 !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: 2.7.0
67
+ type: :development
68
+ prerelease: false
69
+ version_requirements: *86389890
70
+ description: Linkser is a link parser for Ruby. It gets an URI, tries to dereference
71
+ it and returns the relevant information about the resource.
72
+ email:
73
+ - ecasanovac@gmail.com
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - .gitignore
79
+ - .rspec
80
+ - .travis.yml
81
+ - Gemfile
82
+ - LICENSE.txt
83
+ - README.textile
84
+ - Rakefile
85
+ - lib/linkser.rb
86
+ - lib/linkser/parser.rb
87
+ - lib/linkser/parser/html.rb
88
+ - lib/linkser/version.rb
89
+ - linkser.gemspec
90
+ - spec/linkser_spec.rb
91
+ - spec/spec_helper.rb
92
+ has_rdoc: true
93
+ homepage: https://github.com/ging/linkser
94
+ licenses: []
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ none: false
101
+ requirements:
102
+ - - ! '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubyforge_project:
113
+ rubygems_version: 1.6.2
114
+ signing_key:
115
+ specification_version: 3
116
+ summary: A link parser for Ruby
117
+ test_files: []