cinch-url-scraper 1.3.1 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -71,7 +71,7 @@ module Cinch
71
71
  page = @agent.get(link + "&nofeather=True")
72
72
 
73
73
  # Get page hits
74
- hits = page.search("//span[@class='watch-view-count yt-uix-hovercard-target']")
74
+ hits = page.search("//div[@id='watch7-views-info']//div[@class='watch-view-count']")
75
75
  hits = hits.text.gsub(/[.,]/, ",")
76
76
 
77
77
  # Get likes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cinch-url-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-07-06 00:00:00.000000000 Z
13
+ date: 2014-07-19 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: cinch
@@ -54,7 +54,6 @@ extra_rdoc_files: []
54
54
  files:
55
55
  - LICENSE
56
56
  - README.md
57
- - lib/cinch/plugins/urlscraper.rb~
58
57
  - lib/cinch/plugins/urlscraper.rb
59
58
  homepage: https://github.com/mpapis/cinch-url-scraper
60
59
  licenses:
@@ -1,168 +0,0 @@
1
- # source: http://subforge.org/projects/shreds/repository/entry/bot/cinch.rb#L396
2
-
3
- # @copyright (c) 2010-2012, Christoph Kappel <unexist@dorfelite.net>
4
-
5
- require "json"
6
- require "mechanize"
7
-
8
- module Cinch
9
- module Plugins
10
- class UrlScraper
11
- include Cinch::Plugin
12
- include Cinch::Helpers
13
-
14
- listen_to :channel
15
- set :plugin_name, 'urlscraper'
16
- set :help, <<-USAGE.gsub(/^ {6}/, '')
17
- If enabled, this plugin will return the title of the webpage that you or another user posts in the channel. For YouTube and IMDB there are special outputs for relevent information.
18
- Enable/Disable Usage:
19
- - !url [on/off]: This command will turn the URL Scraper on or off for the channel you use this command in.
20
- USAGE
21
-
22
- def listen(m)
23
- return if m.message.include? "nospoil"
24
- return if config[:enabled_channels] && ! config[:enabled_channels].include?(m.channel.name)
25
- # Create mechanize agent
26
- if @agent.nil?
27
- @agent = Mechanize.new
28
- @agent.user_agent_alias = "Linux Mozilla"
29
- @agent.max_history = 0
30
- end
31
-
32
- URI.extract(m.message.gsub(/git@(gist.github.com):/,'git://\1/'), ["http", "https", "git"]).map do |link|
33
- link=~/^(.*?)[:;,\)]?$/
34
- $1
35
- end.each do |link|
36
- # Fetch data
37
- begin
38
- if git = link =~ /^git:\/\/(gist.github.com\/.*)\.git$/
39
- link = "https://#{$1}"
40
- end
41
- uri = URI.parse(link)
42
- page = @agent.get(link)
43
- rescue Mechanize::ResponseCodeError
44
- if "www.youtube.com" == uri.host
45
- m.reply "Thank you, GEMA!"
46
- else
47
- m.reply "Y U POST BROKEN LINKS?", true
48
- end
49
-
50
- next
51
- end
52
-
53
- # Replace strange characters
54
- title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil
55
-
56
- # Check host
57
- case uri.host
58
- when "www.imdb.com"
59
- # Get user rating
60
- rating = page.search("//strong/span[@itemprop='ratingValue']").text
61
-
62
- # Get votes
63
- votes = page.search("//a/span[@itemprop='ratingCount']").text
64
-
65
- m.reply "#{m.user.nick}'s IMDB Title: %s (Rating: %s/10 from %s users)" % [
66
- title, rating, votes
67
- ]
68
-
69
- when "www.youtube.com"
70
- # Reload with nofeather
71
- page = @agent.get(link + "&nofeather=True")
72
-
73
- # Get page hits
74
- hits = page.search("//span[@class='watch-view-count ']")
75
- hits = hits.text.gsub(/[.,]/, ",")
76
-
77
- # Get likes
78
- likes = page.search("//span[@class='likes-count']")
79
- likes = likes.text.gsub(/[.,]/, ",")
80
-
81
- # Get dislikes
82
- dislikes = page.search("//span[@class='dislikes-count']")
83
- dislikes = dislikes.text.gsub(/[.,]/, ",")
84
-
85
- m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
86
- title, hits.strip, likes.strip, dislikes.strip
87
- ]
88
-
89
-
90
- when "gist.github.com"
91
- # Get owner
92
- owner = page.search("//div[@class='name']/a").inner_html
93
-
94
- # Get time
95
- age = page.search("//span[@class='date']/time")
96
- age = age.first[:datetime] rescue age.text if age
97
- age = Time.parse(age) rescue nil
98
- age = age.strftime("%Y-%m-%d %H:%M") if age
99
-
100
- if git
101
- m.reply "Title: %s (at %s, %s on %s), Url: %s" % [
102
- title, uri.host, owner, age, link
103
- ]
104
- else
105
- m.reply "Title: %s (at %s, %s on %s)" % [
106
- title, uri.host, owner, age
107
- ]
108
- end
109
- when "pastie.org"
110
- # Get time
111
- age = Time.parse(page.search("//span[@class='typo_date']").text)
112
- age = age.strftime("%Y-%m-%d %H:%M")
113
-
114
- m.reply "Title: %s (at %s, on %s)" % [
115
- title, uri.host, age
116
- ]
117
- when "subforge.org", "subtle.de"
118
- m.reply "Title: %s (at %s)" % [ title, uri.host ]
119
-
120
- when "twitter.com"
121
- if link =~ /\/status\/(\d+)$/
122
- json = @agent.get("https://api.twitter.com/1/statuses/show/#{$1}.json?trim_user=1").body
123
- tweet = JSON.parse(json)
124
- unescaped = CGI.unescapeHTML(tweet["text"])
125
-
126
- m.reply "@%s: %s" % [ tweet["user"]["screen_name"], unescaped ]
127
- else
128
- m.reply "Broken twitter link: %s (at %s)" % [ title, uri.host ] if title
129
- end
130
-
131
- when "isup.me"
132
- container = page.search("//div[@id='container']")
133
- m.reply "#{container.children[0].to_s.strip} #{uri.path[1..-1]} #{container.children[2].to_s.strip}"
134
-
135
- else
136
- m.reply "Title: %s (at %s)" % [ title, uri.host ] if title
137
- end
138
- end
139
- end
140
-
141
- match /url (on|off)$/
142
-
143
- def execute(m, option)
144
-
145
- config[:enabled_channels] ||= [bot.channels.map(&:name)]
146
- puts bot.channels.map(&:name)
147
-
148
- @url = option == "on"
149
-
150
- case option
151
- when "on"
152
- config[:enabled_channels] << m.channel.name
153
- else
154
- config[:enabled_channels].delete(m.channel.name)
155
- end
156
-
157
- m.reply Format(:green, "URL Scraping for #{m.channel} is now #{@url ? 'enabled' : 'disabled'}!")
158
-
159
- @bot.debug("#{self.class.name} => #{config[:enabled_channels].inspect}");
160
-
161
- config[:enabled_channels]=nil if config[:enabled_channels]==[]
162
-
163
- rescue
164
- m.reply Format(:red, "Error: #{$!}")
165
- end
166
- end
167
- end
168
- end