cinch-url-scraper 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,7 +71,7 @@ module Cinch
71
71
  page = @agent.get(link + "&nofeather=True")
72
72
 
73
73
  # Get page hits
74
- hits = page.search("//span[@class='watch-view-count yt-uix-hovercard-target']")
74
+ hits = page.search("//div[@id='watch7-views-info']//div[@class='watch-view-count']")
75
75
  hits = hits.text.gsub(/[.,]/, ",")
76
76
 
77
77
  # Get likes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cinch-url-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-07-06 00:00:00.000000000 Z
13
+ date: 2014-07-19 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: cinch
@@ -54,7 +54,6 @@ extra_rdoc_files: []
54
54
  files:
55
55
  - LICENSE
56
56
  - README.md
57
- - lib/cinch/plugins/urlscraper.rb~
58
57
  - lib/cinch/plugins/urlscraper.rb
59
58
  homepage: https://github.com/mpapis/cinch-url-scraper
60
59
  licenses:
@@ -1,168 +0,0 @@
1
- # source: http://subforge.org/projects/shreds/repository/entry/bot/cinch.rb#L396
2
-
3
- # @copyright (c) 2010-2012, Christoph Kappel <unexist@dorfelite.net>
4
-
5
- require "json"
6
- require "mechanize"
7
-
8
- module Cinch
9
- module Plugins
10
- class UrlScraper
11
- include Cinch::Plugin
12
- include Cinch::Helpers
13
-
14
- listen_to :channel
15
- set :plugin_name, 'urlscraper'
16
- set :help, <<-USAGE.gsub(/^ {6}/, '')
17
- If enabled, this plugin will return the title of the webpage that you or another user posts in the channel. For YouTube and IMDB there are special outputs for relevent information.
18
- Enable/Disable Usage:
19
- - !url [on/off]: This command will turn the URL Scraper on or off for the channel you use this command in.
20
- USAGE
21
-
22
- def listen(m)
23
- return if m.message.include? "nospoil"
24
- return if config[:enabled_channels] && ! config[:enabled_channels].include?(m.channel.name)
25
- # Create mechanize agent
26
- if @agent.nil?
27
- @agent = Mechanize.new
28
- @agent.user_agent_alias = "Linux Mozilla"
29
- @agent.max_history = 0
30
- end
31
-
32
- URI.extract(m.message.gsub(/git@(gist.github.com):/,'git://\1/'), ["http", "https", "git"]).map do |link|
33
- link=~/^(.*?)[:;,\)]?$/
34
- $1
35
- end.each do |link|
36
- # Fetch data
37
- begin
38
- if git = link =~ /^git:\/\/(gist.github.com\/.*)\.git$/
39
- link = "https://#{$1}"
40
- end
41
- uri = URI.parse(link)
42
- page = @agent.get(link)
43
- rescue Mechanize::ResponseCodeError
44
- if "www.youtube.com" == uri.host
45
- m.reply "Thank you, GEMA!"
46
- else
47
- m.reply "Y U POST BROKEN LINKS?", true
48
- end
49
-
50
- next
51
- end
52
-
53
- # Replace strange characters
54
- title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil
55
-
56
- # Check host
57
- case uri.host
58
- when "www.imdb.com"
59
- # Get user rating
60
- rating = page.search("//strong/span[@itemprop='ratingValue']").text
61
-
62
- # Get votes
63
- votes = page.search("//a/span[@itemprop='ratingCount']").text
64
-
65
- m.reply "#{m.user.nick}'s IMDB Title: %s (Rating: %s/10 from %s users)" % [
66
- title, rating, votes
67
- ]
68
-
69
- when "www.youtube.com"
70
- # Reload with nofeather
71
- page = @agent.get(link + "&nofeather=True")
72
-
73
- # Get page hits
74
- hits = page.search("//span[@class='watch-view-count ']")
75
- hits = hits.text.gsub(/[.,]/, ",")
76
-
77
- # Get likes
78
- likes = page.search("//span[@class='likes-count']")
79
- likes = likes.text.gsub(/[.,]/, ",")
80
-
81
- # Get dislikes
82
- dislikes = page.search("//span[@class='dislikes-count']")
83
- dislikes = dislikes.text.gsub(/[.,]/, ",")
84
-
85
- m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
86
- title, hits.strip, likes.strip, dislikes.strip
87
- ]
88
-
89
-
90
- when "gist.github.com"
91
- # Get owner
92
- owner = page.search("//div[@class='name']/a").inner_html
93
-
94
- # Get time
95
- age = page.search("//span[@class='date']/time")
96
- age = age.first[:datetime] rescue age.text if age
97
- age = Time.parse(age) rescue nil
98
- age = age.strftime("%Y-%m-%d %H:%M") if age
99
-
100
- if git
101
- m.reply "Title: %s (at %s, %s on %s), Url: %s" % [
102
- title, uri.host, owner, age, link
103
- ]
104
- else
105
- m.reply "Title: %s (at %s, %s on %s)" % [
106
- title, uri.host, owner, age
107
- ]
108
- end
109
- when "pastie.org"
110
- # Get time
111
- age = Time.parse(page.search("//span[@class='typo_date']").text)
112
- age = age.strftime("%Y-%m-%d %H:%M")
113
-
114
- m.reply "Title: %s (at %s, on %s)" % [
115
- title, uri.host, age
116
- ]
117
- when "subforge.org", "subtle.de"
118
- m.reply "Title: %s (at %s)" % [ title, uri.host ]
119
-
120
- when "twitter.com"
121
- if link =~ /\/status\/(\d+)$/
122
- json = @agent.get("https://api.twitter.com/1/statuses/show/#{$1}.json?trim_user=1").body
123
- tweet = JSON.parse(json)
124
- unescaped = CGI.unescapeHTML(tweet["text"])
125
-
126
- m.reply "@%s: %s" % [ tweet["user"]["screen_name"], unescaped ]
127
- else
128
- m.reply "Broken twitter link: %s (at %s)" % [ title, uri.host ] if title
129
- end
130
-
131
- when "isup.me"
132
- container = page.search("//div[@id='container']")
133
- m.reply "#{container.children[0].to_s.strip} #{uri.path[1..-1]} #{container.children[2].to_s.strip}"
134
-
135
- else
136
- m.reply "Title: %s (at %s)" % [ title, uri.host ] if title
137
- end
138
- end
139
- end
140
-
141
- match /url (on|off)$/
142
-
143
- def execute(m, option)
144
-
145
- config[:enabled_channels] ||= [bot.channels.map(&:name)]
146
- puts bot.channels.map(&:name)
147
-
148
- @url = option == "on"
149
-
150
- case option
151
- when "on"
152
- config[:enabled_channels] << m.channel.name
153
- else
154
- config[:enabled_channels].delete(m.channel.name)
155
- end
156
-
157
- m.reply Format(:green, "URL Scraping for #{m.channel} is now #{@url ? 'enabled' : 'disabled'}!")
158
-
159
- @bot.debug("#{self.class.name} => #{config[:enabled_channels].inspect}");
160
-
161
- config[:enabled_channels]=nil if config[:enabled_channels]==[]
162
-
163
- rescue
164
- m.reply Format(:red, "Error: #{$!}")
165
- end
166
- end
167
- end
168
- end