linkser 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/linkser/object.rb +10 -0
 - data/lib/linkser/objects/html.rb +121 -0
 - data/lib/linkser/parser.rb +7 -8
 - data/lib/linkser/version.rb +1 -1
 - data/lib/linkser.rb +3 -2
 - data/linkser.gemspec +1 -0
 - metadata +27 -12
 - data/lib/linkser/parser/html.rb +0 -73
 
| 
         @@ -0,0 +1,121 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'nokogiri'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'open-uri'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'net/http'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'image_spec'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'opengraph'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            module Linkser
         
     | 
| 
      
 8 
     | 
    
         
            +
              module Objects
         
     | 
| 
      
 9 
     | 
    
         
            +
                class HTML < Linkser::Object
         
     | 
| 
      
 10 
     | 
    
         
            +
                  attr_reader :body, :nokogiri
         
     | 
| 
      
 11 
     | 
    
         
            +
                  attr_reader :title, :description, :images, :ogp
         
     | 
| 
      
 12 
     | 
    
         
            +
                  def initialize url, head, options={}
         
     | 
| 
      
 13 
     | 
    
         
            +
                    super url, head, options
         
     | 
| 
      
 14 
     | 
    
         
            +
                  end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                  def title
         
     | 
| 
      
 17 
     | 
    
         
            +
                    return @title unless @title.nil?
         
     | 
| 
      
 18 
     | 
    
         
            +
                    if ogp and ogp.title
         
     | 
| 
      
 19 
     | 
    
         
            +
                       @title = ogp.title
         
     | 
| 
      
 20 
     | 
    
         
            +
                    else
         
     | 
| 
      
 21 
     | 
    
         
            +
                      nokogiri.css('title').each do |title|
         
     | 
| 
      
 22 
     | 
    
         
            +
                        @title = title.text
         
     | 
| 
      
 23 
     | 
    
         
            +
                      end
         
     | 
| 
      
 24 
     | 
    
         
            +
                    end          
         
     | 
| 
      
 25 
     | 
    
         
            +
                    @title
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                  def body
         
     | 
| 
      
 29 
     | 
    
         
            +
                    return @body unless @body.nil?
         
     | 
| 
      
 30 
     | 
    
         
            +
                    @body = open(url)
         
     | 
| 
      
 31 
     | 
    
         
            +
                  end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                  def description
         
     | 
| 
      
 34 
     | 
    
         
            +
                    return @description unless @description.nil?
         
     | 
| 
      
 35 
     | 
    
         
            +
                    if ogp and ogp.description
         
     | 
| 
      
 36 
     | 
    
         
            +
                       @description = ogp.description
         
     | 
| 
      
 37 
     | 
    
         
            +
                    else
         
     | 
| 
      
 38 
     | 
    
         
            +
                      nokogiri.css('meta').each do |meta|
         
     | 
| 
      
 39 
     | 
    
         
            +
                        if meta.get_attribute("name").eql? "description"
         
     | 
| 
      
 40 
     | 
    
         
            +
                          @description = meta.get_attribute("content")
         
     | 
| 
      
 41 
     | 
    
         
            +
                        end
         
     | 
| 
      
 42 
     | 
    
         
            +
                      end
         
     | 
| 
      
 43 
     | 
    
         
            +
                    end
         
     | 
| 
      
 44 
     | 
    
         
            +
                    @description
         
     | 
| 
      
 45 
     | 
    
         
            +
                  end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                  def images
         
     | 
| 
      
 48 
     | 
    
         
            +
                    return @images unless @images.nil?
         
     | 
| 
      
 49 
     | 
    
         
            +
                    @images = Array.new
         
     | 
| 
      
 50 
     | 
    
         
            +
                    if ogp and ogp.image
         
     | 
| 
      
 51 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 52 
     | 
    
         
            +
                        img_spec = ImageSpec.new(ogp.image)
         
     | 
| 
      
 53 
     | 
    
         
            +
                        if valid_img? img_spec.width.to_f, img_spec.height.to_f
         
     | 
| 
      
 54 
     | 
    
         
            +
                          @images << {:img => ogp.image, :width => img_spec.width, :height => img_spec.height}
         
     | 
| 
      
 55 
     | 
    
         
            +
                        end
         
     | 
| 
      
 56 
     | 
    
         
            +
                      rescue
         
     | 
| 
      
 57 
     | 
    
         
            +
                      end
         
     | 
| 
      
 58 
     | 
    
         
            +
                    end        
         
     | 
| 
      
 59 
     | 
    
         
            +
                    nokogiri.css('img').each do |img|
         
     | 
| 
      
 60 
     | 
    
         
            +
                      img_src = img.get_attribute("src")
         
     | 
| 
      
 61 
     | 
    
         
            +
                      img_src = complete_url img_src, url
         
     | 
| 
      
 62 
     | 
    
         
            +
                      img_uri = URI.parse(img_src)
         
     | 
| 
      
 63 
     | 
    
         
            +
                      img_ext = File.extname(img_uri.path)
         
     | 
| 
      
 64 
     | 
    
         
            +
                      img_name = File.basename(img_uri.path,img_ext)
         
     | 
| 
      
 65 
     | 
    
         
            +
                      if [".jpg", ".jpeg", ".png"].include? img_ext
         
     | 
| 
      
 66 
     | 
    
         
            +
                        begin
         
     | 
| 
      
 67 
     | 
    
         
            +
                          img_spec = ImageSpec.new(img_src)
         
     | 
| 
      
 68 
     | 
    
         
            +
                          if valid_img? img_spec.width.to_f, img_spec.height.to_f
         
     | 
| 
      
 69 
     | 
    
         
            +
                            @images << {:img => img_src, :width => img_spec.width, :height => img_spec.height}
         
     | 
| 
      
 70 
     | 
    
         
            +
                          end
         
     | 
| 
      
 71 
     | 
    
         
            +
                        rescue
         
     | 
| 
      
 72 
     | 
    
         
            +
                        end
         
     | 
| 
      
 73 
     | 
    
         
            +
                      end
         
     | 
| 
      
 74 
     | 
    
         
            +
                    end
         
     | 
| 
      
 75 
     | 
    
         
            +
                    @images
         
     | 
| 
      
 76 
     | 
    
         
            +
                  end      
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                  def nokogiri
         
     | 
| 
      
 79 
     | 
    
         
            +
                    return @nokogiri unless @nokogiri.nil?
         
     | 
| 
      
 80 
     | 
    
         
            +
                    @nokogiri = Nokogiri::HTML(body)
         
     | 
| 
      
 81 
     | 
    
         
            +
                  end
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                  def ogp
         
     | 
| 
      
 84 
     | 
    
         
            +
                    return @ogp unless @ogp.nil?
         
     | 
| 
      
 85 
     | 
    
         
            +
                    @ogp = OpenGraph::Object.new
         
     | 
| 
      
 86 
     | 
    
         
            +
                    nokogiri.css('meta').each do |m|
         
     | 
| 
      
 87 
     | 
    
         
            +
                      if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i)
         
     | 
| 
      
 88 
     | 
    
         
            +
                        @ogp[$1.gsub('-','_')] = m.attribute('content').to_s
         
     | 
| 
      
 89 
     | 
    
         
            +
                      end
         
     | 
| 
      
 90 
     | 
    
         
            +
                    end
         
     | 
| 
      
 91 
     | 
    
         
            +
                    @ogp = false if @ogp.keys.empty?
         
     | 
| 
      
 92 
     | 
    
         
            +
                    @ogp = false unless @ogp.valid?  
         
     | 
| 
      
 93 
     | 
    
         
            +
                    @ogp 
         
     | 
| 
      
 94 
     | 
    
         
            +
                  end
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                  private
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
                  def complete_url src, url
         
     | 
| 
      
 99 
     | 
    
         
            +
                    uri = URI.parse(url)
         
     | 
| 
      
 100 
     | 
    
         
            +
                    base_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "")
         
     | 
| 
      
 101 
     | 
    
         
            +
                    relative_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "") + uri.path
         
     | 
| 
      
 102 
     | 
    
         
            +
                    if src.index("http://")==0
         
     | 
| 
      
 103 
     | 
    
         
            +
                      src
         
     | 
| 
      
 104 
     | 
    
         
            +
                    elsif src.index("/")==0
         
     | 
| 
      
 105 
     | 
    
         
            +
                      base_url + src
         
     | 
| 
      
 106 
     | 
    
         
            +
                    else
         
     | 
| 
      
 107 
     | 
    
         
            +
                      relative_url + src
         
     | 
| 
      
 108 
     | 
    
         
            +
                    end
         
     | 
| 
      
 109 
     | 
    
         
            +
                  end
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
                  def valid_img? w, h
         
     | 
| 
      
 112 
     | 
    
         
            +
                    if w > 199 or w > 199
         
     | 
| 
      
 113 
     | 
    
         
            +
                      if ((w > 0 and h > 0 and ((w / h) < 3) and ((w / h) > 0.2)) or (w > 0 and h == 0 and w < 700) or (w == 0 and h > 0 and h < 700))
         
     | 
| 
      
 114 
     | 
    
         
            +
                      return true
         
     | 
| 
      
 115 
     | 
    
         
            +
                      end
         
     | 
| 
      
 116 
     | 
    
         
            +
                    end
         
     | 
| 
      
 117 
     | 
    
         
            +
                    false
         
     | 
| 
      
 118 
     | 
    
         
            +
                  end
         
     | 
| 
      
 119 
     | 
    
         
            +
                end
         
     | 
| 
      
 120 
     | 
    
         
            +
              end
         
     | 
| 
      
 121 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/linkser/parser.rb
    CHANGED
    
    | 
         @@ -5,23 +5,21 @@ module Linkser 
     | 
|
| 
       5 
5 
     | 
    
         
             
              module Parser
         
     | 
| 
       6 
6 
     | 
    
         
             
                def self.parse url, options={}
         
     | 
| 
       7 
7 
     | 
    
         
             
                  if !is_valid_url? url
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
      
 8 
     | 
    
         
            +
                    raise "Invalid URL"
         
     | 
| 
       9 
9 
     | 
    
         
             
                  end
         
     | 
| 
       10 
10 
     | 
    
         
             
                  head = get_head url
         
     | 
| 
       11 
11 
     | 
    
         
             
                  case head.content_type
         
     | 
| 
       12 
12 
     | 
    
         
             
                  when "text/html"
         
     | 
| 
       13 
     | 
    
         
            -
                    Linkser:: 
     | 
| 
      
 13 
     | 
    
         
            +
                    Linkser::Objects::HTML.new url, head
         
     | 
| 
       14 
14 
     | 
    
         
             
                  else
         
     | 
| 
       15 
15 
     | 
    
         
             
                  raise "I have no idea on how to parse a '" + head.content_type + "'"
         
     | 
| 
       16 
16 
     | 
    
         
             
                  end
         
     | 
| 
       17 
17 
     | 
    
         
             
                end
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
                 
     | 
| 
      
 19 
     | 
    
         
            +
                private
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
       21 
21 
     | 
    
         
             
                def self.get_head url, limit = 10
         
     | 
| 
       22 
     | 
    
         
            -
                  if (limit==0)
         
     | 
| 
       23 
     | 
    
         
            -
                  raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit'
         
     | 
| 
       24 
     | 
    
         
            -
                  end
         
     | 
| 
      
 22 
     | 
    
         
            +
                  raise 'Too many HTTP redirects. URL was not reacheable within the HTTP redirects limit' if (limit==0)
         
     | 
| 
       25 
23 
     | 
    
         
             
                  uri = URI.parse url
         
     | 
| 
       26 
24 
     | 
    
         
             
                  http = Net::HTTP.start uri.host, uri.port
         
     | 
| 
       27 
25 
     | 
    
         
             
                  response = http.head uri.request_uri
         
     | 
| 
         @@ -33,7 +31,7 @@ module Linkser 
     | 
|
| 
       33 
31 
     | 
    
         
             
                    warn "Redirecting to #{location}"
         
     | 
| 
       34 
32 
     | 
    
         
             
                    return get_head location, limit - 1
         
     | 
| 
       35 
33 
     | 
    
         
             
                  else
         
     | 
| 
       36 
     | 
    
         
            -
                  raise 'The HTTP  
     | 
| 
      
 34 
     | 
    
         
            +
                  raise 'The HTTP request has a ' + response.code + ' code'
         
     | 
| 
       37 
35 
     | 
    
         
             
                  end
         
     | 
| 
       38 
36 
     | 
    
         
             
                end
         
     | 
| 
       39 
37 
     | 
    
         | 
| 
         @@ -41,10 +39,11 @@ module Linkser 
     | 
|
| 
       41 
39 
     | 
    
         
             
                  begin
         
     | 
| 
       42 
40 
     | 
    
         
             
                    uri = URI.parse(url)
         
     | 
| 
       43 
41 
     | 
    
         
             
                    if [:scheme, :host].any? { |i| uri.send(i).blank? }
         
     | 
| 
       44 
     | 
    
         
            -
                    raise 
     | 
| 
      
 42 
     | 
    
         
            +
                    raise URI::InvalidURIError 
         
     | 
| 
       45 
43 
     | 
    
         
             
                    end
         
     | 
| 
       46 
44 
     | 
    
         
             
                    return true
         
     | 
| 
       47 
45 
     | 
    
         
             
                  rescue URI::InvalidURIError => e
         
     | 
| 
      
 46 
     | 
    
         
            +
                     warn e.to_s
         
     | 
| 
       48 
47 
     | 
    
         
             
                  return false
         
     | 
| 
       49 
48 
     | 
    
         
             
                  end
         
     | 
| 
       50 
49 
     | 
    
         
             
                end
         
     | 
    
        data/lib/linkser/version.rb
    CHANGED
    
    
    
        data/lib/linkser.rb
    CHANGED
    
    
    
        data/linkser.gemspec
    CHANGED
    
    | 
         @@ -24,6 +24,7 @@ Gem::Specification.new do |s| 
     | 
|
| 
       24 
24 
     | 
    
         
             
              s.add_runtime_dependency('nokogiri', '~> 1.4.2')
         
     | 
| 
       25 
25 
     | 
    
         
             
              s.add_runtime_dependency('rmagick', '~> 2.13.1')
         
     | 
| 
       26 
26 
     | 
    
         
             
              s.add_runtime_dependency('ruby-imagespec', "~> 0.2.0")  
         
     | 
| 
      
 27 
     | 
    
         
            +
              s.add_runtime_dependency('opengraph', "~> 0.0.4")
         
     | 
| 
       27 
28 
     | 
    
         | 
| 
       28 
29 
     | 
    
         
             
              # Development Gem dependencies
         
     | 
| 
       29 
30 
     | 
    
         
             
              #
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: linkser
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
              hash:  
     | 
| 
      
 4 
     | 
    
         
            +
              hash: 25
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
              segments: 
         
     | 
| 
       7 
7 
     | 
    
         
             
              - 0
         
     | 
| 
       8 
8 
     | 
    
         
             
              - 0
         
     | 
| 
       9 
     | 
    
         
            -
              -  
     | 
| 
       10 
     | 
    
         
            -
              version: 0.0. 
     | 
| 
      
 9 
     | 
    
         
            +
              - 3
         
     | 
| 
      
 10 
     | 
    
         
            +
              version: 0.0.3
         
     | 
| 
       11 
11 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       12 
12 
     | 
    
         
             
            authors: 
         
     | 
| 
       13 
13 
     | 
    
         
             
            - Eduardo Casanova
         
     | 
| 
         @@ -15,8 +15,7 @@ autorequire: 
     | 
|
| 
       15 
15 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       16 
16 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
            date: 2011-11- 
     | 
| 
       19 
     | 
    
         
            -
            default_executable: 
         
     | 
| 
      
 18 
     | 
    
         
            +
            date: 2011-11-21 00:00:00 Z
         
     | 
| 
       20 
19 
     | 
    
         
             
            dependencies: 
         
     | 
| 
       21 
20 
     | 
    
         
             
            - !ruby/object:Gem::Dependency 
         
     | 
| 
       22 
21 
     | 
    
         
             
              name: rake
         
     | 
| 
         @@ -81,9 +80,25 @@ dependencies: 
     | 
|
| 
       81 
80 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       82 
81 
     | 
    
         
             
              version_requirements: *id004
         
     | 
| 
       83 
82 
     | 
    
         
             
            - !ruby/object:Gem::Dependency 
         
     | 
| 
       84 
     | 
    
         
            -
              name:  
     | 
| 
      
 83 
     | 
    
         
            +
              name: opengraph
         
     | 
| 
       85 
84 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       86 
85 
     | 
    
         
             
              requirement: &id005 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 86 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 87 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 88 
     | 
    
         
            +
                - - ~>
         
     | 
| 
      
 89 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 90 
     | 
    
         
            +
                    hash: 23
         
     | 
| 
      
 91 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 92 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 93 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 94 
     | 
    
         
            +
                    - 4
         
     | 
| 
      
 95 
     | 
    
         
            +
                    version: 0.0.4
         
     | 
| 
      
 96 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 97 
     | 
    
         
            +
              version_requirements: *id005
         
     | 
| 
      
 98 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 99 
     | 
    
         
            +
              name: ruby-debug
         
     | 
| 
      
 100 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 101 
     | 
    
         
            +
              requirement: &id006 !ruby/object:Gem::Requirement 
         
     | 
| 
       87 
102 
     | 
    
         
             
                none: false
         
     | 
| 
       88 
103 
     | 
    
         
             
                requirements: 
         
     | 
| 
       89 
104 
     | 
    
         
             
                - - ">="
         
     | 
| 
         @@ -95,11 +110,11 @@ dependencies: 
     | 
|
| 
       95 
110 
     | 
    
         
             
                    - 3
         
     | 
| 
       96 
111 
     | 
    
         
             
                    version: 0.10.3
         
     | 
| 
       97 
112 
     | 
    
         
             
              type: :development
         
     | 
| 
       98 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 113 
     | 
    
         
            +
              version_requirements: *id006
         
     | 
| 
       99 
114 
     | 
    
         
             
            - !ruby/object:Gem::Dependency 
         
     | 
| 
       100 
115 
     | 
    
         
             
              name: rspec
         
     | 
| 
       101 
116 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       102 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 117 
     | 
    
         
            +
              requirement: &id007 !ruby/object:Gem::Requirement 
         
     | 
| 
       103 
118 
     | 
    
         
             
                none: false
         
     | 
| 
       104 
119 
     | 
    
         
             
                requirements: 
         
     | 
| 
       105 
120 
     | 
    
         
             
                - - ">="
         
     | 
| 
         @@ -111,7 +126,7 @@ dependencies: 
     | 
|
| 
       111 
126 
     | 
    
         
             
                    - 0
         
     | 
| 
       112 
127 
     | 
    
         
             
                    version: 2.7.0
         
     | 
| 
       113 
128 
     | 
    
         
             
              type: :development
         
     | 
| 
       114 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 129 
     | 
    
         
            +
              version_requirements: *id007
         
     | 
| 
       115 
130 
     | 
    
         
             
            description: Linkser is a link parser for Ruby. It gets an URI, tries to dereference it and returns the relevant information about the resource.
         
     | 
| 
       116 
131 
     | 
    
         
             
            email: 
         
     | 
| 
       117 
132 
     | 
    
         
             
            - ecasanovac@gmail.com
         
     | 
| 
         @@ -130,13 +145,13 @@ files: 
     | 
|
| 
       130 
145 
     | 
    
         
             
            - README.textile
         
     | 
| 
       131 
146 
     | 
    
         
             
            - Rakefile
         
     | 
| 
       132 
147 
     | 
    
         
             
            - lib/linkser.rb
         
     | 
| 
      
 148 
     | 
    
         
            +
            - lib/linkser/object.rb
         
     | 
| 
      
 149 
     | 
    
         
            +
            - lib/linkser/objects/html.rb
         
     | 
| 
       133 
150 
     | 
    
         
             
            - lib/linkser/parser.rb
         
     | 
| 
       134 
     | 
    
         
            -
            - lib/linkser/parser/html.rb
         
     | 
| 
       135 
151 
     | 
    
         
             
            - lib/linkser/version.rb
         
     | 
| 
       136 
152 
     | 
    
         
             
            - linkser.gemspec
         
     | 
| 
       137 
153 
     | 
    
         
             
            - spec/linkser_spec.rb
         
     | 
| 
       138 
154 
     | 
    
         
             
            - spec/spec_helper.rb
         
     | 
| 
       139 
     | 
    
         
            -
            has_rdoc: true
         
     | 
| 
       140 
155 
     | 
    
         
             
            homepage: https://github.com/ging/linkser
         
     | 
| 
       141 
156 
     | 
    
         
             
            licenses: []
         
     | 
| 
       142 
157 
     | 
    
         | 
| 
         @@ -166,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       166 
181 
     | 
    
         
             
            requirements: []
         
     | 
| 
       167 
182 
     | 
    
         | 
| 
       168 
183 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       169 
     | 
    
         
            -
            rubygems_version: 1. 
     | 
| 
      
 184 
     | 
    
         
            +
            rubygems_version: 1.8.10
         
     | 
| 
       170 
185 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       171 
186 
     | 
    
         
             
            specification_version: 3
         
     | 
| 
       172 
187 
     | 
    
         
             
            summary: A link parser for Ruby
         
     | 
    
        data/lib/linkser/parser/html.rb
    DELETED
    
    | 
         @@ -1,73 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require 'nokogiri'
         
     | 
| 
       2 
     | 
    
         
            -
            require 'open-uri'
         
     | 
| 
       3 
     | 
    
         
            -
            require 'net/http'
         
     | 
| 
       4 
     | 
    
         
            -
            require 'image_spec'
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
            module Linkser
         
     | 
| 
       7 
     | 
    
         
            -
              module Parser
         
     | 
| 
       8 
     | 
    
         
            -
                class HTML
         
     | 
| 
       9 
     | 
    
         
            -
                  def parse url, options={}
         
     | 
| 
       10 
     | 
    
         
            -
                    parsed_page = Hash.new
         
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
                    doc = Nokogiri::HTML(open(url))
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
                    doc.css('title').each do |title|
         
     | 
| 
       15 
     | 
    
         
            -
                      parsed_page.update({:title => title.text})
         
     | 
| 
       16 
     | 
    
         
            -
                    end
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
                    doc.css('meta').each do |meta|
         
     | 
| 
       19 
     | 
    
         
            -
                      if meta.get_attribute("name").eql? "description"
         
     | 
| 
       20 
     | 
    
         
            -
                        parsed_page.update({:description => meta.get_attribute("content")})
         
     | 
| 
       21 
     | 
    
         
            -
                      end
         
     | 
| 
       22 
     | 
    
         
            -
                    end
         
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
                    images = Array.new
         
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
                    doc.css('img').each do |img|
         
     | 
| 
       27 
     | 
    
         
            -
                      img_src = img.get_attribute("src")
         
     | 
| 
       28 
     | 
    
         
            -
                      img_src = get_complete_url img_src, url
         
     | 
| 
       29 
     | 
    
         
            -
                      img_uri = URI.parse(img_src)
         
     | 
| 
       30 
     | 
    
         
            -
                      img_ext = File.extname(img_uri.path)
         
     | 
| 
       31 
     | 
    
         
            -
                      img_name = File.basename(img_uri.path,img_ext)
         
     | 
| 
       32 
     | 
    
         
            -
                      if [".jpg", ".jpeg", ".png"].include? img_ext
         
     | 
| 
       33 
     | 
    
         
            -
                        begin
         
     | 
| 
       34 
     | 
    
         
            -
                          img_spec = ImageSpec.new(img_src)
         
     | 
| 
       35 
     | 
    
         
            -
                          w = img_spec.width.to_f
         
     | 
| 
       36 
     | 
    
         
            -
                          h = img_spec.height.to_f
         
     | 
| 
       37 
     | 
    
         
            -
                          if w > 199 or w > 199
         
     | 
| 
       38 
     | 
    
         
            -
                            if ((w > 0 and h > 0 and ((w / h) < 3) and ((w / h) > 0.2)) or (w > 0 and h == 0 and w < 700) or (w == 0 and h > 0 and h < 700)) and img_name.index("logo").nil?
         
     | 
| 
       39 
     | 
    
         
            -
                              image = {:img => img_src, :width => w.to_i, :height => h.to_i}
         
     | 
| 
       40 
     | 
    
         
            -
                            images << image
         
     | 
| 
       41 
     | 
    
         
            -
                            end
         
     | 
| 
       42 
     | 
    
         
            -
                          end
         
     | 
| 
       43 
     | 
    
         
            -
                        rescue
         
     | 
| 
       44 
     | 
    
         
            -
                        end
         
     | 
| 
       45 
     | 
    
         
            -
                      end
         
     | 
| 
       46 
     | 
    
         
            -
                    end
         
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
                    if images!=[]
         
     | 
| 
       49 
     | 
    
         
            -
                      parsed_page.update({:images => images})
         
     | 
| 
       50 
     | 
    
         
            -
                    end
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                    return parsed_page
         
     | 
| 
       53 
     | 
    
         
            -
                  end
         
     | 
| 
       54 
     | 
    
         
            -
             
     | 
| 
       55 
     | 
    
         
            -
                  private
         
     | 
| 
       56 
     | 
    
         
            -
             
     | 
| 
       57 
     | 
    
         
            -
                  def get_complete_url src, url
         
     | 
| 
       58 
     | 
    
         
            -
                    uri = URI.parse(url)
         
     | 
| 
       59 
     | 
    
         
            -
                    base_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "")
         
     | 
| 
       60 
     | 
    
         
            -
                    relative_url = "http://" + uri.host + (uri.port!=80 ? ":" + uri.port.to_s : "") + uri.path
         
     | 
| 
       61 
     | 
    
         
            -
                    if src.index("http://")==0
         
     | 
| 
       62 
     | 
    
         
            -
                    src = src
         
     | 
| 
       63 
     | 
    
         
            -
                    #stays the same
         
     | 
| 
       64 
     | 
    
         
            -
                    elsif src.index("/")==0
         
     | 
| 
       65 
     | 
    
         
            -
                    src = base_url + src
         
     | 
| 
       66 
     | 
    
         
            -
                    else
         
     | 
| 
       67 
     | 
    
         
            -
                    src = relative_url + src
         
     | 
| 
       68 
     | 
    
         
            -
                    end
         
     | 
| 
       69 
     | 
    
         
            -
                  end
         
     | 
| 
       70 
     | 
    
         
            -
                end
         
     | 
| 
       71 
     | 
    
         
            -
              end
         
     | 
| 
       72 
     | 
    
         
            -
            end
         
     | 
| 
       73 
     | 
    
         
            -
             
     |