cinch-url-scraper 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cinch/plugins/urlscraper.rb +1 -1
- metadata +2 -3
- data/lib/cinch/plugins/urlscraper.rb~ +0 -168
@@ -71,7 +71,7 @@ module Cinch
|
|
71
71
|
page = @agent.get(link + "&nofeather=True")
|
72
72
|
|
73
73
|
# Get page hits
|
74
|
-
hits = page.search("//
|
74
|
+
hits = page.search("//div[@id='watch7-views-info']//div[@class='watch-view-count']")
|
75
75
|
hits = hits.text.gsub(/[.,]/, ",")
|
76
76
|
|
77
77
|
# Get likes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cinch-url-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-07-
|
13
|
+
date: 2014-07-19 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: cinch
|
@@ -54,7 +54,6 @@ extra_rdoc_files: []
|
|
54
54
|
files:
|
55
55
|
- LICENSE
|
56
56
|
- README.md
|
57
|
-
- lib/cinch/plugins/urlscraper.rb~
|
58
57
|
- lib/cinch/plugins/urlscraper.rb
|
59
58
|
homepage: https://github.com/mpapis/cinch-url-scraper
|
60
59
|
licenses:
|
@@ -1,168 +0,0 @@
|
|
1
|
-
# source: http://subforge.org/projects/shreds/repository/entry/bot/cinch.rb#L396
|
2
|
-
|
3
|
-
# @copyright (c) 2010-2012, Christoph Kappel <unexist@dorfelite.net>
|
4
|
-
|
5
|
-
require "json"
|
6
|
-
require "mechanize"
|
7
|
-
|
8
|
-
module Cinch
|
9
|
-
module Plugins
|
10
|
-
class UrlScraper
|
11
|
-
include Cinch::Plugin
|
12
|
-
include Cinch::Helpers
|
13
|
-
|
14
|
-
listen_to :channel
|
15
|
-
set :plugin_name, 'urlscraper'
|
16
|
-
set :help, <<-USAGE.gsub(/^ {6}/, '')
|
17
|
-
If enabled, this plugin will return the title of the webpage that you or another user posts in the channel. For YouTube and IMDB there are special outputs for relevent information.
|
18
|
-
Enable/Disable Usage:
|
19
|
-
- !url [on/off]: This command will turn the URL Scraper on or off for the channel you use this command in.
|
20
|
-
USAGE
|
21
|
-
|
22
|
-
def listen(m)
|
23
|
-
return if m.message.include? "nospoil"
|
24
|
-
return if config[:enabled_channels] && ! config[:enabled_channels].include?(m.channel.name)
|
25
|
-
# Create mechanize agent
|
26
|
-
if @agent.nil?
|
27
|
-
@agent = Mechanize.new
|
28
|
-
@agent.user_agent_alias = "Linux Mozilla"
|
29
|
-
@agent.max_history = 0
|
30
|
-
end
|
31
|
-
|
32
|
-
URI.extract(m.message.gsub(/git@(gist.github.com):/,'git://\1/'), ["http", "https", "git"]).map do |link|
|
33
|
-
link=~/^(.*?)[:;,\)]?$/
|
34
|
-
$1
|
35
|
-
end.each do |link|
|
36
|
-
# Fetch data
|
37
|
-
begin
|
38
|
-
if git = link =~ /^git:\/\/(gist.github.com\/.*)\.git$/
|
39
|
-
link = "https://#{$1}"
|
40
|
-
end
|
41
|
-
uri = URI.parse(link)
|
42
|
-
page = @agent.get(link)
|
43
|
-
rescue Mechanize::ResponseCodeError
|
44
|
-
if "www.youtube.com" == uri.host
|
45
|
-
m.reply "Thank you, GEMA!"
|
46
|
-
else
|
47
|
-
m.reply "Y U POST BROKEN LINKS?", true
|
48
|
-
end
|
49
|
-
|
50
|
-
next
|
51
|
-
end
|
52
|
-
|
53
|
-
# Replace strange characters
|
54
|
-
title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil
|
55
|
-
|
56
|
-
# Check host
|
57
|
-
case uri.host
|
58
|
-
when "www.imdb.com"
|
59
|
-
# Get user rating
|
60
|
-
rating = page.search("//strong/span[@itemprop='ratingValue']").text
|
61
|
-
|
62
|
-
# Get votes
|
63
|
-
votes = page.search("//a/span[@itemprop='ratingCount']").text
|
64
|
-
|
65
|
-
m.reply "#{m.user.nick}'s IMDB Title: %s (Rating: %s/10 from %s users)" % [
|
66
|
-
title, rating, votes
|
67
|
-
]
|
68
|
-
|
69
|
-
when "www.youtube.com"
|
70
|
-
# Reload with nofeather
|
71
|
-
page = @agent.get(link + "&nofeather=True")
|
72
|
-
|
73
|
-
# Get page hits
|
74
|
-
hits = page.search("//span[@class='watch-view-count ']")
|
75
|
-
hits = hits.text.gsub(/[.,]/, ",")
|
76
|
-
|
77
|
-
# Get likes
|
78
|
-
likes = page.search("//span[@class='likes-count']")
|
79
|
-
likes = likes.text.gsub(/[.,]/, ",")
|
80
|
-
|
81
|
-
# Get dislikes
|
82
|
-
dislikes = page.search("//span[@class='dislikes-count']")
|
83
|
-
dislikes = dislikes.text.gsub(/[.,]/, ",")
|
84
|
-
|
85
|
-
m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
|
86
|
-
title, hits.strip, likes.strip, dislikes.strip
|
87
|
-
]
|
88
|
-
|
89
|
-
|
90
|
-
when "gist.github.com"
|
91
|
-
# Get owner
|
92
|
-
owner = page.search("//div[@class='name']/a").inner_html
|
93
|
-
|
94
|
-
# Get time
|
95
|
-
age = page.search("//span[@class='date']/time")
|
96
|
-
age = age.first[:datetime] rescue age.text if age
|
97
|
-
age = Time.parse(age) rescue nil
|
98
|
-
age = age.strftime("%Y-%m-%d %H:%M") if age
|
99
|
-
|
100
|
-
if git
|
101
|
-
m.reply "Title: %s (at %s, %s on %s), Url: %s" % [
|
102
|
-
title, uri.host, owner, age, link
|
103
|
-
]
|
104
|
-
else
|
105
|
-
m.reply "Title: %s (at %s, %s on %s)" % [
|
106
|
-
title, uri.host, owner, age
|
107
|
-
]
|
108
|
-
end
|
109
|
-
when "pastie.org"
|
110
|
-
# Get time
|
111
|
-
age = Time.parse(page.search("//span[@class='typo_date']").text)
|
112
|
-
age = age.strftime("%Y-%m-%d %H:%M")
|
113
|
-
|
114
|
-
m.reply "Title: %s (at %s, on %s)" % [
|
115
|
-
title, uri.host, age
|
116
|
-
]
|
117
|
-
when "subforge.org", "subtle.de"
|
118
|
-
m.reply "Title: %s (at %s)" % [ title, uri.host ]
|
119
|
-
|
120
|
-
when "twitter.com"
|
121
|
-
if link =~ /\/status\/(\d+)$/
|
122
|
-
json = @agent.get("https://api.twitter.com/1/statuses/show/#{$1}.json?trim_user=1").body
|
123
|
-
tweet = JSON.parse(json)
|
124
|
-
unescaped = CGI.unescapeHTML(tweet["text"])
|
125
|
-
|
126
|
-
m.reply "@%s: %s" % [ tweet["user"]["screen_name"], unescaped ]
|
127
|
-
else
|
128
|
-
m.reply "Broken twitter link: %s (at %s)" % [ title, uri.host ] if title
|
129
|
-
end
|
130
|
-
|
131
|
-
when "isup.me"
|
132
|
-
container = page.search("//div[@id='container']")
|
133
|
-
m.reply "#{container.children[0].to_s.strip} #{uri.path[1..-1]} #{container.children[2].to_s.strip}"
|
134
|
-
|
135
|
-
else
|
136
|
-
m.reply "Title: %s (at %s)" % [ title, uri.host ] if title
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
match /url (on|off)$/
|
142
|
-
|
143
|
-
def execute(m, option)
|
144
|
-
|
145
|
-
config[:enabled_channels] ||= [bot.channels.map(&:name)]
|
146
|
-
puts bot.channels.map(&:name)
|
147
|
-
|
148
|
-
@url = option == "on"
|
149
|
-
|
150
|
-
case option
|
151
|
-
when "on"
|
152
|
-
config[:enabled_channels] << m.channel.name
|
153
|
-
else
|
154
|
-
config[:enabled_channels].delete(m.channel.name)
|
155
|
-
end
|
156
|
-
|
157
|
-
m.reply Format(:green, "URL Scraping for #{m.channel} is now #{@url ? 'enabled' : 'disabled'}!")
|
158
|
-
|
159
|
-
@bot.debug("#{self.class.name} => #{config[:enabled_channels].inspect}");
|
160
|
-
|
161
|
-
config[:enabled_channels]=nil if config[:enabled_channels]==[]
|
162
|
-
|
163
|
-
rescue
|
164
|
-
m.reply Format(:red, "Error: #{$!}")
|
165
|
-
end
|
166
|
-
end
|
167
|
-
end
|
168
|
-
end
|