cinch-url-scraper 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cinch/plugins/urlscraper.rb +1 -1
 - metadata +2 -3
 - data/lib/cinch/plugins/urlscraper.rb~ +0 -168
 
| 
         @@ -71,7 +71,7 @@ module Cinch 
     | 
|
| 
       71 
71 
     | 
    
         
             
                          page = @agent.get(link + "&nofeather=True")
         
     | 
| 
       72 
72 
     | 
    
         | 
| 
       73 
73 
     | 
    
         
             
                          # Get page hits
         
     | 
| 
       74 
     | 
    
         
            -
                          hits = page.search("// 
     | 
| 
      
 74 
     | 
    
         
            +
                          hits = page.search("//div[@id='watch7-views-info']//div[@class='watch-view-count']")
         
     | 
| 
       75 
75 
     | 
    
         
             
                          hits = hits.text.gsub(/[.,]/, ",")
         
     | 
| 
       76 
76 
     | 
    
         | 
| 
       77 
77 
     | 
    
         
             
                          # Get likes
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: cinch-url-scraper
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1.3. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.3.2
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -10,7 +10,7 @@ authors: 
     | 
|
| 
       10 
10 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       11 
11 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       12 
12 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       13 
     | 
    
         
            -
            date: 2014-07- 
     | 
| 
      
 13 
     | 
    
         
            +
            date: 2014-07-19 00:00:00.000000000 Z
         
     | 
| 
       14 
14 
     | 
    
         
             
            dependencies:
         
     | 
| 
       15 
15 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       16 
16 
     | 
    
         
             
              name: cinch
         
     | 
| 
         @@ -54,7 +54,6 @@ extra_rdoc_files: [] 
     | 
|
| 
       54 
54 
     | 
    
         
             
            files:
         
     | 
| 
       55 
55 
     | 
    
         
             
            - LICENSE
         
     | 
| 
       56 
56 
     | 
    
         
             
            - README.md
         
     | 
| 
       57 
     | 
    
         
            -
            - lib/cinch/plugins/urlscraper.rb~
         
     | 
| 
       58 
57 
     | 
    
         
             
            - lib/cinch/plugins/urlscraper.rb
         
     | 
| 
       59 
58 
     | 
    
         
             
            homepage: https://github.com/mpapis/cinch-url-scraper
         
     | 
| 
       60 
59 
     | 
    
         
             
            licenses:
         
     | 
| 
         @@ -1,168 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # source: http://subforge.org/projects/shreds/repository/entry/bot/cinch.rb#L396
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
            # @copyright (c) 2010-2012, Christoph Kappel <unexist@dorfelite.net>
         
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
            require "json"
         
     | 
| 
       6 
     | 
    
         
            -
            require "mechanize"
         
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
            module Cinch
         
     | 
| 
       9 
     | 
    
         
            -
              module Plugins
         
     | 
| 
       10 
     | 
    
         
            -
                class UrlScraper
         
     | 
| 
       11 
     | 
    
         
            -
                  include Cinch::Plugin
         
     | 
| 
       12 
     | 
    
         
            -
                  include Cinch::Helpers
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
                  listen_to :channel
         
     | 
| 
       15 
     | 
    
         
            -
                  set :plugin_name, 'urlscraper'
         
     | 
| 
       16 
     | 
    
         
            -
                  set :help, <<-USAGE.gsub(/^ {6}/, '')
         
     | 
| 
       17 
     | 
    
         
            -
                    If enabled, this plugin will return the title of the webpage that you or another user posts in the channel. For YouTube and IMDB there are special outputs for relevent information.
         
     | 
| 
       18 
     | 
    
         
            -
                    Enable/Disable Usage:
         
     | 
| 
       19 
     | 
    
         
            -
                    - !url [on/off]: This command will turn the URL Scraper on or off for the channel you use this command in.
         
     | 
| 
       20 
     | 
    
         
            -
                    USAGE
         
     | 
| 
       21 
     | 
    
         
            -
                  
         
     | 
| 
       22 
     | 
    
         
            -
                  def listen(m)
         
     | 
| 
       23 
     | 
    
         
            -
                    return if m.message.include? "nospoil"
         
     | 
| 
       24 
     | 
    
         
            -
                    return if config[:enabled_channels] && ! config[:enabled_channels].include?(m.channel.name)
         
     | 
| 
       25 
     | 
    
         
            -
                    # Create mechanize agent
         
     | 
| 
       26 
     | 
    
         
            -
                    if @agent.nil?
         
     | 
| 
       27 
     | 
    
         
            -
                      @agent = Mechanize.new
         
     | 
| 
       28 
     | 
    
         
            -
                      @agent.user_agent_alias = "Linux Mozilla"
         
     | 
| 
       29 
     | 
    
         
            -
                      @agent.max_history      = 0
         
     | 
| 
       30 
     | 
    
         
            -
                    end
         
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
                    URI.extract(m.message.gsub(/git@(gist.github.com):/,'git://\1/'), ["http", "https", "git"]).map do |link|
         
     | 
| 
       33 
     | 
    
         
            -
                      link=~/^(.*?)[:;,\)]?$/
         
     | 
| 
       34 
     | 
    
         
            -
                      $1
         
     | 
| 
       35 
     | 
    
         
            -
                    end.each do |link|
         
     | 
| 
       36 
     | 
    
         
            -
                      # Fetch data
         
     | 
| 
       37 
     | 
    
         
            -
                      begin
         
     | 
| 
       38 
     | 
    
         
            -
                        if git = link =~ /^git:\/\/(gist.github.com\/.*)\.git$/
         
     | 
| 
       39 
     | 
    
         
            -
                          link = "https://#{$1}"
         
     | 
| 
       40 
     | 
    
         
            -
                        end
         
     | 
| 
       41 
     | 
    
         
            -
                        uri  = URI.parse(link)
         
     | 
| 
       42 
     | 
    
         
            -
                        page = @agent.get(link)
         
     | 
| 
       43 
     | 
    
         
            -
                      rescue Mechanize::ResponseCodeError
         
     | 
| 
       44 
     | 
    
         
            -
                        if "www.youtube.com" == uri.host
         
     | 
| 
       45 
     | 
    
         
            -
                          m.reply "Thank you, GEMA!"
         
     | 
| 
       46 
     | 
    
         
            -
                        else
         
     | 
| 
       47 
     | 
    
         
            -
                          m.reply "Y U POST BROKEN LINKS?", true
         
     | 
| 
       48 
     | 
    
         
            -
                        end
         
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
                        next
         
     | 
| 
       51 
     | 
    
         
            -
                      end
         
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
       53 
     | 
    
         
            -
                      # Replace strange characters
         
     | 
| 
       54 
     | 
    
         
            -
                      title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil
         
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                      # Check host
         
     | 
| 
       57 
     | 
    
         
            -
                      case uri.host
         
     | 
| 
       58 
     | 
    
         
            -
                        when "www.imdb.com"
         
     | 
| 
       59 
     | 
    
         
            -
                          # Get user rating
         
     | 
| 
       60 
     | 
    
         
            -
                          rating = page.search("//strong/span[@itemprop='ratingValue']").text
         
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
                          # Get votes
         
     | 
| 
       63 
     | 
    
         
            -
                          votes = page.search("//a/span[@itemprop='ratingCount']").text
         
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
                          m.reply "#{m.user.nick}'s IMDB Title: %s (Rating: %s/10 from %s users)" % [
         
     | 
| 
       66 
     | 
    
         
            -
                            title, rating, votes
         
     | 
| 
       67 
     | 
    
         
            -
                          ]
         
     | 
| 
       68 
     | 
    
         
            -
                          
         
     | 
| 
       69 
     | 
    
         
            -
                        when "www.youtube.com"
         
     | 
| 
       70 
     | 
    
         
            -
                          # Reload with nofeather
         
     | 
| 
       71 
     | 
    
         
            -
                          page = @agent.get(link + "&nofeather=True")
         
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
                          # Get page hits
         
     | 
| 
       74 
     | 
    
         
            -
                          hits = page.search("//span[@class='watch-view-count ']")
         
     | 
| 
       75 
     | 
    
         
            -
                          hits = hits.text.gsub(/[.,]/, ",")
         
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
                          # Get likes
         
     | 
| 
       78 
     | 
    
         
            -
                          likes = page.search("//span[@class='likes-count']")
         
     | 
| 
       79 
     | 
    
         
            -
                          likes = likes.text.gsub(/[.,]/, ",")
         
     | 
| 
       80 
     | 
    
         
            -
                          
         
     | 
| 
       81 
     | 
    
         
            -
                          # Get dislikes
         
     | 
| 
       82 
     | 
    
         
            -
                          dislikes = page.search("//span[@class='dislikes-count']")
         
     | 
| 
       83 
     | 
    
         
            -
                          dislikes = dislikes.text.gsub(/[.,]/, ",")
         
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
                          m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
         
     | 
| 
       86 
     | 
    
         
            -
                            title, hits.strip, likes.strip, dislikes.strip
         
     | 
| 
       87 
     | 
    
         
            -
                          ]
         
     | 
| 
       88 
     | 
    
         
            -
                          
         
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
                        when "gist.github.com"
         
     | 
| 
       91 
     | 
    
         
            -
                          # Get owner
         
     | 
| 
       92 
     | 
    
         
            -
                          owner = page.search("//div[@class='name']/a").inner_html
         
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
                          # Get time
         
     | 
| 
       95 
     | 
    
         
            -
                          age = page.search("//span[@class='date']/time")
         
     | 
| 
       96 
     | 
    
         
            -
                          age = age.first[:datetime] rescue age.text if age
         
     | 
| 
       97 
     | 
    
         
            -
                          age = Time.parse(age) rescue nil
         
     | 
| 
       98 
     | 
    
         
            -
                          age = age.strftime("%Y-%m-%d %H:%M") if age
         
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
                          if git
         
     | 
| 
       101 
     | 
    
         
            -
                            m.reply "Title: %s (at %s, %s on %s), Url: %s" % [
         
     | 
| 
       102 
     | 
    
         
            -
                              title, uri.host, owner, age, link
         
     | 
| 
       103 
     | 
    
         
            -
                            ]
         
     | 
| 
       104 
     | 
    
         
            -
                          else
         
     | 
| 
       105 
     | 
    
         
            -
                            m.reply "Title: %s (at %s, %s on %s)" % [
         
     | 
| 
       106 
     | 
    
         
            -
                              title, uri.host, owner, age
         
     | 
| 
       107 
     | 
    
         
            -
                            ]
         
     | 
| 
       108 
     | 
    
         
            -
                          end
         
     | 
| 
       109 
     | 
    
         
            -
                        when "pastie.org"
         
     | 
| 
       110 
     | 
    
         
            -
                          # Get time
         
     | 
| 
       111 
     | 
    
         
            -
                          age = Time.parse(page.search("//span[@class='typo_date']").text)
         
     | 
| 
       112 
     | 
    
         
            -
                          age = age.strftime("%Y-%m-%d %H:%M")
         
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
       114 
     | 
    
         
            -
                          m.reply "Title: %s (at %s, on %s)" % [
         
     | 
| 
       115 
     | 
    
         
            -
                            title, uri.host, age
         
     | 
| 
       116 
     | 
    
         
            -
                          ]
         
     | 
| 
       117 
     | 
    
         
            -
                        when "subforge.org", "subtle.de"
         
     | 
| 
       118 
     | 
    
         
            -
                          m.reply "Title: %s (at %s)" % [ title, uri.host ]
         
     | 
| 
       119 
     | 
    
         
            -
                          
         
     | 
| 
       120 
     | 
    
         
            -
                        when "twitter.com"
         
     | 
| 
       121 
     | 
    
         
            -
                          if link =~ /\/status\/(\d+)$/
         
     | 
| 
       122 
     | 
    
         
            -
                            json      = @agent.get("https://api.twitter.com/1/statuses/show/#{$1}.json?trim_user=1").body
         
     | 
| 
       123 
     | 
    
         
            -
                            tweet     = JSON.parse(json)
         
     | 
| 
       124 
     | 
    
         
            -
                            unescaped = CGI.unescapeHTML(tweet["text"])
         
     | 
| 
       125 
     | 
    
         
            -
             
     | 
| 
       126 
     | 
    
         
            -
                            m.reply "@%s: %s" % [ tweet["user"]["screen_name"], unescaped ]
         
     | 
| 
       127 
     | 
    
         
            -
                          else
         
     | 
| 
       128 
     | 
    
         
            -
                            m.reply "Broken twitter link: %s (at %s)" % [ title, uri.host ] if title
         
     | 
| 
       129 
     | 
    
         
            -
                          end
         
     | 
| 
       130 
     | 
    
         
            -
             
     | 
| 
       131 
     | 
    
         
            -
                        when "isup.me"
         
     | 
| 
       132 
     | 
    
         
            -
                          container = page.search("//div[@id='container']")
         
     | 
| 
       133 
     | 
    
         
            -
                          m.reply "#{container.children[0].to_s.strip} #{uri.path[1..-1]} #{container.children[2].to_s.strip}"
         
     | 
| 
       134 
     | 
    
         
            -
             
     | 
| 
       135 
     | 
    
         
            -
                        else
         
     | 
| 
       136 
     | 
    
         
            -
                          m.reply "Title: %s (at %s)" % [ title, uri.host ] if title
         
     | 
| 
       137 
     | 
    
         
            -
                        end
         
     | 
| 
       138 
     | 
    
         
            -
                      end
         
     | 
| 
       139 
     | 
    
         
            -
                    end
         
     | 
| 
       140 
     | 
    
         
            -
                      
         
     | 
| 
       141 
     | 
    
         
            -
                  match /url (on|off)$/
         
     | 
| 
       142 
     | 
    
         
            -
                  
         
     | 
| 
       143 
     | 
    
         
            -
                  def execute(m, option)
         
     | 
| 
       144 
     | 
    
         
            -
                     
         
     | 
| 
       145 
     | 
    
         
            -
                     config[:enabled_channels] ||= [bot.channels.map(&:name)]
         
     | 
| 
       146 
     | 
    
         
            -
                     puts bot.channels.map(&:name)
         
     | 
| 
       147 
     | 
    
         
            -
                        
         
     | 
| 
       148 
     | 
    
         
            -
                        @url = option == "on"
         
     | 
| 
       149 
     | 
    
         
            -
                        
         
     | 
| 
       150 
     | 
    
         
            -
                        case option
         
     | 
| 
       151 
     | 
    
         
            -
                          when "on"
         
     | 
| 
       152 
     | 
    
         
            -
                            config[:enabled_channels] << m.channel.name
         
     | 
| 
       153 
     | 
    
         
            -
                          else
         
     | 
| 
       154 
     | 
    
         
            -
                            config[:enabled_channels].delete(m.channel.name)
         
     | 
| 
       155 
     | 
    
         
            -
                          end
         
     | 
| 
       156 
     | 
    
         
            -
                          
         
     | 
| 
       157 
     | 
    
         
            -
                          m.reply Format(:green, "URL Scraping for #{m.channel} is now #{@url ? 'enabled' : 'disabled'}!")
         
     | 
| 
       158 
     | 
    
         
            -
                          
         
     | 
| 
       159 
     | 
    
         
            -
                          @bot.debug("#{self.class.name} => #{config[:enabled_channels].inspect}");
         
     | 
| 
       160 
     | 
    
         
            -
                          
         
     | 
| 
       161 
     | 
    
         
            -
                          config[:enabled_channels]=nil if config[:enabled_channels]==[]
         
     | 
| 
       162 
     | 
    
         
            -
                          
         
     | 
| 
       163 
     | 
    
         
            -
                        rescue 
         
     | 
| 
       164 
     | 
    
         
            -
                          m.reply Format(:red, "Error: #{$!}")
         
     | 
| 
       165 
     | 
    
         
            -
                        end
         
     | 
| 
       166 
     | 
    
         
            -
                      end
         
     | 
| 
       167 
     | 
    
         
            -
                    end
         
     | 
| 
       168 
     | 
    
         
            -
                  end
         
     |