RubyGems - cinch-url-scraper - Versions diffs - 1.3.0 → 1.3.1 - Mend

cinch-url-scraper 1.3.0 → 1.3.1

Files changed (4) hide show

data/lib/cinch/plugins/urlscraper.rb +2 -2
data/lib/cinch/plugins/urlscraper.rb~ +168 -0
metadata +14 -7
checksums.yaml +0 -7

data/lib/cinch/plugins/urlscraper.rb CHANGED

@@ -71,7 +71,7 @@ module Cinch
               page = @agent.get(link + "&nofeather=True")
               # Get page hits
-              hits = page.search("//span[@class='watch-view-count ']")
+              hits = page.search("//span[@class='watch-view-count yt-uix-hovercard-target']")
               hits = hits.text.gsub(/[.,]/, ",")
               # Get likes
@@ -82,7 +82,7 @@ module Cinch
               dislikes = page.search("//span[@class='dislikes-count']")
               dislikes = dislikes.text.gsub(/[.,]/, ",")
-              m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
+              m.reply "#{m.user.nick}'s YT Title: %s (Views: %s - Likes: %s || Dislikes: %s)" % [
                 title, hits.strip, likes.strip, dislikes.strip
               ]

data/lib/cinch/plugins/urlscraper.rb~ ADDED

@@ -0,0 +1,168 @@
+# source: http://subforge.org/projects/shreds/repository/entry/bot/cinch.rb#L396
+# @copyright (c) 2010-2012, Christoph Kappel <unexist@dorfelite.net>
+require "json"
+require "mechanize"
+module Cinch
+  module Plugins
+    class UrlScraper
+      include Cinch::Plugin
+      include Cinch::Helpers
+      listen_to :channel
+      set :plugin_name, 'urlscraper'
+      set :help, <<-USAGE.gsub(/^ {6}/, '')
+        If enabled, this plugin will return the title of the webpage that you or another user posts in the channel. For YouTube and IMDB there are special outputs for relevent information.
+        Enable/Disable Usage:
+        - !url [on/off]: This command will turn the URL Scraper on or off for the channel you use this command in.
+        USAGE
+      def listen(m)
+        return if m.message.include? "nospoil"
+        return if config[:enabled_channels] && ! config[:enabled_channels].include?(m.channel.name)
+        # Create mechanize agent
+        if @agent.nil?
+          @agent = Mechanize.new
+          @agent.user_agent_alias = "Linux Mozilla"
+          @agent.max_history      = 0
+        end
+        URI.extract(m.message.gsub(/git@(gist.github.com):/,'git://\1/'), ["http", "https", "git"]).map do |link|
+          link=~/^(.*?)[:;,\)]?$/
+          $1
+        end.each do |link|
+          # Fetch data
+          begin
+            if git = link =~ /^git:\/\/(gist.github.com\/.*)\.git$/
+              link = "https://#{$1}"
+            end
+            uri  = URI.parse(link)
+            page = @agent.get(link)
+          rescue Mechanize::ResponseCodeError
+            if "www.youtube.com" == uri.host
+              m.reply "Thank you, GEMA!"
+            else
+              m.reply "Y U POST BROKEN LINKS?", true
+            end
+            next
+          end
+          # Replace strange characters
+          title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil
+          # Check host
+          case uri.host
+            when "www.imdb.com"
+              # Get user rating
+              rating = page.search("//strong/span[@itemprop='ratingValue']").text
+              # Get votes
+              votes = page.search("//a/span[@itemprop='ratingCount']").text
+              m.reply "#{m.user.nick}'s IMDB Title: %s (Rating: %s/10 from %s users)" % [
+                title, rating, votes
+              ]
+            when "www.youtube.com"
+              # Reload with nofeather
+              page = @agent.get(link + "&nofeather=True")
+              # Get page hits
+              hits = page.search("//span[@class='watch-view-count ']")
+              hits = hits.text.gsub(/[.,]/, ",")
+              # Get likes
+              likes = page.search("//span[@class='likes-count']")
+              likes = likes.text.gsub(/[.,]/, ",")
+              # Get dislikes
+              dislikes = page.search("//span[@class='dislikes-count']")
+              dislikes = dislikes.text.gsub(/[.,]/, ",")
+              m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
+                title, hits.strip, likes.strip, dislikes.strip
+              ]
+            when "gist.github.com"
+              # Get owner
+              owner = page.search("//div[@class='name']/a").inner_html
+              # Get time
+              age = page.search("//span[@class='date']/time")
+              age = age.first[:datetime] rescue age.text if age
+              age = Time.parse(age) rescue nil
+              age = age.strftime("%Y-%m-%d %H:%M") if age
+              if git
+                m.reply "Title: %s (at %s, %s on %s), Url: %s" % [
+                  title, uri.host, owner, age, link
+                ]
+              else
+                m.reply "Title: %s (at %s, %s on %s)" % [
+                  title, uri.host, owner, age
+                ]
+              end
+            when "pastie.org"
+              # Get time
+              age = Time.parse(page.search("//span[@class='typo_date']").text)
+              age = age.strftime("%Y-%m-%d %H:%M")
+              m.reply "Title: %s (at %s, on %s)" % [
+                title, uri.host, age
+              ]
+            when "subforge.org", "subtle.de"
+              m.reply "Title: %s (at %s)" % [ title, uri.host ]
+            when "twitter.com"
+              if link =~ /\/status\/(\d+)$/
+                json      = @agent.get("https://api.twitter.com/1/statuses/show/#{$1}.json?trim_user=1").body
+                tweet     = JSON.parse(json)
+                unescaped = CGI.unescapeHTML(tweet["text"])
+                m.reply "@%s: %s" % [ tweet["user"]["screen_name"], unescaped ]
+              else
+                m.reply "Broken twitter link: %s (at %s)" % [ title, uri.host ] if title
+              end
+            when "isup.me"
+              container = page.search("//div[@id='container']")
+              m.reply "#{container.children[0].to_s.strip} #{uri.path[1..-1]} #{container.children[2].to_s.strip}"
+            else
+              m.reply "Title: %s (at %s)" % [ title, uri.host ] if title
+            end
+          end
+        end
+      match /url (on|off)$/
+      def execute(m, option)
+         config[:enabled_channels] ||= [bot.channels.map(&:name)]
+         puts bot.channels.map(&:name)
+            @url = option == "on"
+            case option
+              when "on"
+                config[:enabled_channels] << m.channel.name
+              else
+                config[:enabled_channels].delete(m.channel.name)
+              end
+              m.reply Format(:green, "URL Scraping for #{m.channel} is now #{@url ? 'enabled' : 'disabled'}!")
+              @bot.debug("#{self.class.name} => #{config[:enabled_channels].inspect}");
+              config[:enabled_channels]=nil if config[:enabled_channels]==[]
+            rescue
+              m.reply Format(:red, "Error: #{$!}")
+            end
+          end
+        end
+      end

metadata CHANGED

@@ -1,7 +1,8 @@
 --- !ruby/object:Gem::Specification
 name: cinch-url-scraper
 version: !ruby/object:Gem::Version
-  version: 1.3.0
+  version: 1.3.1
+  prerelease:
 platform: ruby
 authors:
 - Michal Papis
@@ -9,11 +10,12 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-06-29 00:00:00.000000000 Z
+date: 2014-07-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cinch
   requirement: !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
@@ -21,6 +23,7 @@ dependencies:
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
@@ -28,6 +31,7 @@ dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
   requirement: !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
@@ -35,6 +39,7 @@ dependencies:
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
@@ -49,29 +54,31 @@ extra_rdoc_files: []
 files:
 - LICENSE
 - README.md
+- lib/cinch/plugins/urlscraper.rb~
 - lib/cinch/plugins/urlscraper.rb
 homepage: https://github.com/mpapis/cinch-url-scraper
 licenses:
 - LGPLv3
-metadata: {}
 post_install_message:
 rdoc_options: []
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
-  - - '>='
+  - - ! '>='
     - !ruby/object:Gem::Version
       version: 1.9.1
 required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
-  - - '>='
+  - - ! '>='
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.2
+rubygems_version: 1.8.23
 signing_key:
-specification_version: 4
+specification_version: 3
 summary: A Cinch plugin to get information about posted URLs.
 test_files: []

checksums.yaml DELETED

@@ -1,7 +0,0 @@
----
-SHA1:
-  metadata.gz: 97acaced44b7c782d5f57b47d2dde2f40be42ab3
-  data.tar.gz: b5be7bd2f97a5797ec225a2acac8bd9ccc876f51
-SHA512:
-  metadata.gz: 63259c11ac36eb6719a348ba425ad1b7c6c24fae5f7a3e55144e9b8cec77d150ffb086ef0e4f4ced058dfa1e1e1399d8eb8606f5c35c65521ed2f5415540d363
-  data.tar.gz: 2e6eae36d346cf69bee85aaa28ddaa81bae10d3eece8a4eff63acd9af48689430e92a4d30e32430aff3f3f65667e6e657e8582fd0d26d2d18348894f48c11c55