cinch-url-scraper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/LICENSE +14 -0
  2. data/README.md +3 -0
  3. data/lib/cinch/plugins/urlscraper.rb +105 -0
  4. metadata +80 -0
data/LICENSE ADDED
@@ -0,0 +1,14 @@
1
+ Copyright (c) 2012 Michal Papis
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+
@@ -0,0 +1,3 @@
1
+ # Cinch Url Scraper plugin
2
+
3
+ A Cinch plugin to get information about posted URLs.
@@ -0,0 +1,105 @@
1
+ # source: http://subforge.org/projects/shreds/repository/entry/bot/cinch.rb#L396
2
+ # @copyright (c) 2010-2012, Christoph Kappel <unexist@dorfelite.net>
3
+
4
+ require "json"
5
+ require "mechanize"
6
+
7
+ module Cinch
8
+ module Plugins
9
+ class UrlScraper
10
+ include Cinch::Plugin
11
+ set :react_on, :channel
12
+
13
+ listen_to :channel
14
+ def listen(m)
15
+ # Create mechanize agent
16
+ if @agent.nil?
17
+ @agent = Mechanize.new
18
+ @agent.user_agent_alias = "Linux Mozilla"
19
+ @agent.max_history = 0
20
+ end
21
+
22
+ URI.extract(m.message, ["http", "https"]) do |link|
23
+ # Fetch data
24
+ begin
25
+ uri = URI.parse(link)
26
+ page = @agent.get(link)
27
+ rescue Mechanize::ResponseCodeError
28
+ if "www.youtube.com" == uri.host
29
+ m.reply "Thank you, GEMA!"
30
+ else
31
+ m.reply "Y U POST BROKEN LINKS?", true
32
+ end
33
+
34
+ next
35
+ end
36
+
37
+ # Replace strange characters
38
+ title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil
39
+
40
+ # Check host
41
+ case uri.host
42
+ when "www.imdb.com"
43
+ # Get user rating
44
+ rating = page.search("//strong/span[@itemprop='ratingValue']").text
45
+
46
+ # Get votes
47
+ votes = page.search("//a/span[@itemprop='ratingCount']").text
48
+
49
+ m.reply "Title: %s (at %s, %s/10 from %s users)" % [
50
+ title, uri.host, rating, votes
51
+ ]
52
+ when "www.youtube.com"
53
+ # Reload with nofeather
54
+ page = @agent.get(link + "&nofeather=True")
55
+
56
+ # Get page hits
57
+ hits = page.search("//span[@class='watch-view-count']/strong")
58
+ hits = hits.text.gsub(/[.,]/, "")
59
+
60
+ # Get likes
61
+ likes = page.search("//span[@class='watch-likes-dislikes']")
62
+ likes = likes.text.gsub(/[.,]/, "")
63
+
64
+ m.reply "Title: %s (at %s, %s hits, %s)" % [
65
+ title, uri.host, hits, likes.strip
66
+ ]
67
+ when "gist.github.com"
68
+ # Get owner
69
+ owner = page.search("//div[@class='name']/a").inner_html
70
+
71
+ # Get time
72
+ age = Time.parse(page.search("//span[@class='date']/abbr").text)
73
+ age = age.strftime("%Y-%m-%d %H:%M")
74
+
75
+ m.reply "Title: %s (at %s, %s on %s)" % [
76
+ title, uri.host, owner, age
77
+ ]
78
+ when "pastie.org"
79
+ # Get time
80
+ age = Time.parse(page.search("//span[@class='typo_date']").text)
81
+ age = age.strftime("%Y-%m-%d %H:%M")
82
+
83
+ m.reply "Title: %s (at %s, on %s)" % [
84
+ title, uri.host, age
85
+ ]
86
+ when "subforge.org", "subtle.de"
87
+ m.reply "Title: %s (at %s)" % [ title, uri.host ]
88
+ when "twitter.com"
89
+ if link =~ /\/status\/(\d+)$/
90
+ json = @agent.get("https://api.twitter.com/1/statuses/show/#{$1}.json?trim_user=1").body
91
+ tweet = JSON.parse(json)
92
+ unescaped = CGI.unescapeHTML(tweet["text"])
93
+
94
+ m.reply "@%s: %s" % [ tweet["user"]["screen_name"], unescaped ]
95
+ else
96
+ m.reply "Broken twitter link: %s (at %s)" % [ title, uri.host ] if title
97
+ end
98
+ else
99
+ m.reply "Title: %s (at %s)" % [ title, uri.host ] if title
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cinch-url-scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Michal Papis
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: cinch
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '2'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '2'
30
+ - !ruby/object:Gem::Dependency
31
+ name: mechanize
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '2'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '2'
46
+ description: A Cinch plugin to get information about posted URLs.
47
+ email:
48
+ - mpapis@gmail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - LICENSE
54
+ - README.md
55
+ - lib/cinch/plugins/urlscraper.rb
56
+ homepage: https://github.com/mpapis/cinch-url-scraper
57
+ licenses: []
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: 1.9.1
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ! '>='
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubyforge_project:
76
+ rubygems_version: 1.8.24
77
+ signing_key:
78
+ specification_version: 3
79
+ summary: A Cinch plugin to get information about posted URLs.
80
+ test_files: []