cinch-url-scraper 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/cinch/plugins/urlscraper.rb +2 -2
- data/lib/cinch/plugins/urlscraper.rb~ +168 -0
- metadata +14 -7
- checksums.yaml +0 -7
@@ -71,7 +71,7 @@ module Cinch
|
|
71
71
|
page = @agent.get(link + "&nofeather=True")
|
72
72
|
|
73
73
|
# Get page hits
|
74
|
-
hits = page.search("//span[@class='watch-view-count ']")
|
74
|
+
hits = page.search("//span[@class='watch-view-count yt-uix-hovercard-target']")
|
75
75
|
hits = hits.text.gsub(/[.,]/, ",")
|
76
76
|
|
77
77
|
# Get likes
|
@@ -82,7 +82,7 @@ module Cinch
|
|
82
82
|
dislikes = page.search("//span[@class='dislikes-count']")
|
83
83
|
dislikes = dislikes.text.gsub(/[.,]/, ",")
|
84
84
|
|
85
|
-
m.reply "#{m.user.nick}'s YT Title: %s (Views: %s
|
85
|
+
m.reply "#{m.user.nick}'s YT Title: %s (Views: %s - Likes: %s || Dislikes: %s)" % [
|
86
86
|
title, hits.strip, likes.strip, dislikes.strip
|
87
87
|
]
|
88
88
|
|
@@ -0,0 +1,168 @@
|
|
1
|
+
# source: http://subforge.org/projects/shreds/repository/entry/bot/cinch.rb#L396
|
2
|
+
|
3
|
+
# @copyright (c) 2010-2012, Christoph Kappel <unexist@dorfelite.net>
|
4
|
+
|
5
|
+
require "json"
|
6
|
+
require "mechanize"
|
7
|
+
|
8
|
+
module Cinch
|
9
|
+
module Plugins
|
10
|
+
class UrlScraper
|
11
|
+
include Cinch::Plugin
|
12
|
+
include Cinch::Helpers
|
13
|
+
|
14
|
+
listen_to :channel
|
15
|
+
set :plugin_name, 'urlscraper'
|
16
|
+
set :help, <<-USAGE.gsub(/^ {6}/, '')
|
17
|
+
If enabled, this plugin will return the title of the webpage that you or another user posts in the channel. For YouTube and IMDB there are special outputs for relevent information.
|
18
|
+
Enable/Disable Usage:
|
19
|
+
- !url [on/off]: This command will turn the URL Scraper on or off for the channel you use this command in.
|
20
|
+
USAGE
|
21
|
+
|
22
|
+
def listen(m)
|
23
|
+
return if m.message.include? "nospoil"
|
24
|
+
return if config[:enabled_channels] && ! config[:enabled_channels].include?(m.channel.name)
|
25
|
+
# Create mechanize agent
|
26
|
+
if @agent.nil?
|
27
|
+
@agent = Mechanize.new
|
28
|
+
@agent.user_agent_alias = "Linux Mozilla"
|
29
|
+
@agent.max_history = 0
|
30
|
+
end
|
31
|
+
|
32
|
+
URI.extract(m.message.gsub(/git@(gist.github.com):/,'git://\1/'), ["http", "https", "git"]).map do |link|
|
33
|
+
link=~/^(.*?)[:;,\)]?$/
|
34
|
+
$1
|
35
|
+
end.each do |link|
|
36
|
+
# Fetch data
|
37
|
+
begin
|
38
|
+
if git = link =~ /^git:\/\/(gist.github.com\/.*)\.git$/
|
39
|
+
link = "https://#{$1}"
|
40
|
+
end
|
41
|
+
uri = URI.parse(link)
|
42
|
+
page = @agent.get(link)
|
43
|
+
rescue Mechanize::ResponseCodeError
|
44
|
+
if "www.youtube.com" == uri.host
|
45
|
+
m.reply "Thank you, GEMA!"
|
46
|
+
else
|
47
|
+
m.reply "Y U POST BROKEN LINKS?", true
|
48
|
+
end
|
49
|
+
|
50
|
+
next
|
51
|
+
end
|
52
|
+
|
53
|
+
# Replace strange characters
|
54
|
+
title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil
|
55
|
+
|
56
|
+
# Check host
|
57
|
+
case uri.host
|
58
|
+
when "www.imdb.com"
|
59
|
+
# Get user rating
|
60
|
+
rating = page.search("//strong/span[@itemprop='ratingValue']").text
|
61
|
+
|
62
|
+
# Get votes
|
63
|
+
votes = page.search("//a/span[@itemprop='ratingCount']").text
|
64
|
+
|
65
|
+
m.reply "#{m.user.nick}'s IMDB Title: %s (Rating: %s/10 from %s users)" % [
|
66
|
+
title, rating, votes
|
67
|
+
]
|
68
|
+
|
69
|
+
when "www.youtube.com"
|
70
|
+
# Reload with nofeather
|
71
|
+
page = @agent.get(link + "&nofeather=True")
|
72
|
+
|
73
|
+
# Get page hits
|
74
|
+
hits = page.search("//span[@class='watch-view-count ']")
|
75
|
+
hits = hits.text.gsub(/[.,]/, ",")
|
76
|
+
|
77
|
+
# Get likes
|
78
|
+
likes = page.search("//span[@class='likes-count']")
|
79
|
+
likes = likes.text.gsub(/[.,]/, ",")
|
80
|
+
|
81
|
+
# Get dislikes
|
82
|
+
dislikes = page.search("//span[@class='dislikes-count']")
|
83
|
+
dislikes = dislikes.text.gsub(/[.,]/, ",")
|
84
|
+
|
85
|
+
m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
|
86
|
+
title, hits.strip, likes.strip, dislikes.strip
|
87
|
+
]
|
88
|
+
|
89
|
+
|
90
|
+
when "gist.github.com"
|
91
|
+
# Get owner
|
92
|
+
owner = page.search("//div[@class='name']/a").inner_html
|
93
|
+
|
94
|
+
# Get time
|
95
|
+
age = page.search("//span[@class='date']/time")
|
96
|
+
age = age.first[:datetime] rescue age.text if age
|
97
|
+
age = Time.parse(age) rescue nil
|
98
|
+
age = age.strftime("%Y-%m-%d %H:%M") if age
|
99
|
+
|
100
|
+
if git
|
101
|
+
m.reply "Title: %s (at %s, %s on %s), Url: %s" % [
|
102
|
+
title, uri.host, owner, age, link
|
103
|
+
]
|
104
|
+
else
|
105
|
+
m.reply "Title: %s (at %s, %s on %s)" % [
|
106
|
+
title, uri.host, owner, age
|
107
|
+
]
|
108
|
+
end
|
109
|
+
when "pastie.org"
|
110
|
+
# Get time
|
111
|
+
age = Time.parse(page.search("//span[@class='typo_date']").text)
|
112
|
+
age = age.strftime("%Y-%m-%d %H:%M")
|
113
|
+
|
114
|
+
m.reply "Title: %s (at %s, on %s)" % [
|
115
|
+
title, uri.host, age
|
116
|
+
]
|
117
|
+
when "subforge.org", "subtle.de"
|
118
|
+
m.reply "Title: %s (at %s)" % [ title, uri.host ]
|
119
|
+
|
120
|
+
when "twitter.com"
|
121
|
+
if link =~ /\/status\/(\d+)$/
|
122
|
+
json = @agent.get("https://api.twitter.com/1/statuses/show/#{$1}.json?trim_user=1").body
|
123
|
+
tweet = JSON.parse(json)
|
124
|
+
unescaped = CGI.unescapeHTML(tweet["text"])
|
125
|
+
|
126
|
+
m.reply "@%s: %s" % [ tweet["user"]["screen_name"], unescaped ]
|
127
|
+
else
|
128
|
+
m.reply "Broken twitter link: %s (at %s)" % [ title, uri.host ] if title
|
129
|
+
end
|
130
|
+
|
131
|
+
when "isup.me"
|
132
|
+
container = page.search("//div[@id='container']")
|
133
|
+
m.reply "#{container.children[0].to_s.strip} #{uri.path[1..-1]} #{container.children[2].to_s.strip}"
|
134
|
+
|
135
|
+
else
|
136
|
+
m.reply "Title: %s (at %s)" % [ title, uri.host ] if title
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
match /url (on|off)$/
|
142
|
+
|
143
|
+
def execute(m, option)
|
144
|
+
|
145
|
+
config[:enabled_channels] ||= [bot.channels.map(&:name)]
|
146
|
+
puts bot.channels.map(&:name)
|
147
|
+
|
148
|
+
@url = option == "on"
|
149
|
+
|
150
|
+
case option
|
151
|
+
when "on"
|
152
|
+
config[:enabled_channels] << m.channel.name
|
153
|
+
else
|
154
|
+
config[:enabled_channels].delete(m.channel.name)
|
155
|
+
end
|
156
|
+
|
157
|
+
m.reply Format(:green, "URL Scraping for #{m.channel} is now #{@url ? 'enabled' : 'disabled'}!")
|
158
|
+
|
159
|
+
@bot.debug("#{self.class.name} => #{config[:enabled_channels].inspect}");
|
160
|
+
|
161
|
+
config[:enabled_channels]=nil if config[:enabled_channels]==[]
|
162
|
+
|
163
|
+
rescue
|
164
|
+
m.reply Format(:red, "Error: #{$!}")
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cinch-url-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.1
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Michal Papis
|
@@ -9,11 +10,12 @@ authors:
|
|
9
10
|
autorequire:
|
10
11
|
bindir: bin
|
11
12
|
cert_chain: []
|
12
|
-
date: 2014-06
|
13
|
+
date: 2014-07-06 00:00:00.000000000 Z
|
13
14
|
dependencies:
|
14
15
|
- !ruby/object:Gem::Dependency
|
15
16
|
name: cinch
|
16
17
|
requirement: !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
17
19
|
requirements:
|
18
20
|
- - ~>
|
19
21
|
- !ruby/object:Gem::Version
|
@@ -21,6 +23,7 @@ dependencies:
|
|
21
23
|
type: :runtime
|
22
24
|
prerelease: false
|
23
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
24
27
|
requirements:
|
25
28
|
- - ~>
|
26
29
|
- !ruby/object:Gem::Version
|
@@ -28,6 +31,7 @@ dependencies:
|
|
28
31
|
- !ruby/object:Gem::Dependency
|
29
32
|
name: mechanize
|
30
33
|
requirement: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
31
35
|
requirements:
|
32
36
|
- - ~>
|
33
37
|
- !ruby/object:Gem::Version
|
@@ -35,6 +39,7 @@ dependencies:
|
|
35
39
|
type: :runtime
|
36
40
|
prerelease: false
|
37
41
|
version_requirements: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
38
43
|
requirements:
|
39
44
|
- - ~>
|
40
45
|
- !ruby/object:Gem::Version
|
@@ -49,29 +54,31 @@ extra_rdoc_files: []
|
|
49
54
|
files:
|
50
55
|
- LICENSE
|
51
56
|
- README.md
|
57
|
+
- lib/cinch/plugins/urlscraper.rb~
|
52
58
|
- lib/cinch/plugins/urlscraper.rb
|
53
59
|
homepage: https://github.com/mpapis/cinch-url-scraper
|
54
60
|
licenses:
|
55
61
|
- LGPLv3
|
56
|
-
metadata: {}
|
57
62
|
post_install_message:
|
58
63
|
rdoc_options: []
|
59
64
|
require_paths:
|
60
65
|
- lib
|
61
66
|
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
62
68
|
requirements:
|
63
|
-
- - '>='
|
69
|
+
- - ! '>='
|
64
70
|
- !ruby/object:Gem::Version
|
65
71
|
version: 1.9.1
|
66
72
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
67
74
|
requirements:
|
68
|
-
- - '>='
|
75
|
+
- - ! '>='
|
69
76
|
- !ruby/object:Gem::Version
|
70
77
|
version: '0'
|
71
78
|
requirements: []
|
72
79
|
rubyforge_project:
|
73
|
-
rubygems_version:
|
80
|
+
rubygems_version: 1.8.23
|
74
81
|
signing_key:
|
75
|
-
specification_version:
|
82
|
+
specification_version: 3
|
76
83
|
summary: A Cinch plugin to get information about posted URLs.
|
77
84
|
test_files: []
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 97acaced44b7c782d5f57b47d2dde2f40be42ab3
|
4
|
-
data.tar.gz: b5be7bd2f97a5797ec225a2acac8bd9ccc876f51
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 63259c11ac36eb6719a348ba425ad1b7c6c24fae5f7a3e55144e9b8cec77d150ffb086ef0e4f4ced058dfa1e1e1399d8eb8606f5c35c65521ed2f5415540d363
|
7
|
-
data.tar.gz: 2e6eae36d346cf69bee85aaa28ddaa81bae10d3eece8a4eff63acd9af48689430e92a4d30e32430aff3f3f65667e6e657e8582fd0d26d2d18348894f48c11c55
|