cinch-url-scraper 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -71,7 +71,7 @@ module Cinch
71
71
  page = @agent.get(link + "&nofeather=True")
72
72
 
73
73
  # Get page hits
74
- hits = page.search("//span[@class='watch-view-count ']")
74
+ hits = page.search("//span[@class='watch-view-count yt-uix-hovercard-target']")
75
75
  hits = hits.text.gsub(/[.,]/, ",")
76
76
 
77
77
  # Get likes
@@ -82,7 +82,7 @@ module Cinch
82
82
  dislikes = page.search("//span[@class='dislikes-count']")
83
83
  dislikes = dislikes.text.gsub(/[.,]/, ",")
84
84
 
85
- m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
85
+ m.reply "#{m.user.nick}'s YT Title: %s (Views: %s - Likes: %s || Dislikes: %s)" % [
86
86
  title, hits.strip, likes.strip, dislikes.strip
87
87
  ]
88
88
 
@@ -0,0 +1,168 @@
1
+ # source: http://subforge.org/projects/shreds/repository/entry/bot/cinch.rb#L396
2
+
3
+ # @copyright (c) 2010-2012, Christoph Kappel <unexist@dorfelite.net>
4
+
5
+ require "json"
6
+ require "mechanize"
7
+
8
+ module Cinch
9
+ module Plugins
10
+ class UrlScraper
11
+ include Cinch::Plugin
12
+ include Cinch::Helpers
13
+
14
+ listen_to :channel
15
+ set :plugin_name, 'urlscraper'
16
+ set :help, <<-USAGE.gsub(/^ {6}/, '')
17
+ If enabled, this plugin will return the title of the webpage that you or another user posts in the channel. For YouTube and IMDB there are special outputs for relevent information.
18
+ Enable/Disable Usage:
19
+ - !url [on/off]: This command will turn the URL Scraper on or off for the channel you use this command in.
20
+ USAGE
21
+
22
+ def listen(m)
23
+ return if m.message.include? "nospoil"
24
+ return if config[:enabled_channels] && ! config[:enabled_channels].include?(m.channel.name)
25
+ # Create mechanize agent
26
+ if @agent.nil?
27
+ @agent = Mechanize.new
28
+ @agent.user_agent_alias = "Linux Mozilla"
29
+ @agent.max_history = 0
30
+ end
31
+
32
+ URI.extract(m.message.gsub(/git@(gist.github.com):/,'git://\1/'), ["http", "https", "git"]).map do |link|
33
+ link=~/^(.*?)[:;,\)]?$/
34
+ $1
35
+ end.each do |link|
36
+ # Fetch data
37
+ begin
38
+ if git = link =~ /^git:\/\/(gist.github.com\/.*)\.git$/
39
+ link = "https://#{$1}"
40
+ end
41
+ uri = URI.parse(link)
42
+ page = @agent.get(link)
43
+ rescue Mechanize::ResponseCodeError
44
+ if "www.youtube.com" == uri.host
45
+ m.reply "Thank you, GEMA!"
46
+ else
47
+ m.reply "Y U POST BROKEN LINKS?", true
48
+ end
49
+
50
+ next
51
+ end
52
+
53
+ # Replace strange characters
54
+ title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil
55
+
56
+ # Check host
57
+ case uri.host
58
+ when "www.imdb.com"
59
+ # Get user rating
60
+ rating = page.search("//strong/span[@itemprop='ratingValue']").text
61
+
62
+ # Get votes
63
+ votes = page.search("//a/span[@itemprop='ratingCount']").text
64
+
65
+ m.reply "#{m.user.nick}'s IMDB Title: %s (Rating: %s/10 from %s users)" % [
66
+ title, rating, votes
67
+ ]
68
+
69
+ when "www.youtube.com"
70
+ # Reload with nofeather
71
+ page = @agent.get(link + "&nofeather=True")
72
+
73
+ # Get page hits
74
+ hits = page.search("//span[@class='watch-view-count ']")
75
+ hits = hits.text.gsub(/[.,]/, ",")
76
+
77
+ # Get likes
78
+ likes = page.search("//span[@class='likes-count']")
79
+ likes = likes.text.gsub(/[.,]/, ",")
80
+
81
+ # Get dislikes
82
+ dislikes = page.search("//span[@class='dislikes-count']")
83
+ dislikes = dislikes.text.gsub(/[.,]/, ",")
84
+
85
+ m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
86
+ title, hits.strip, likes.strip, dislikes.strip
87
+ ]
88
+
89
+
90
+ when "gist.github.com"
91
+ # Get owner
92
+ owner = page.search("//div[@class='name']/a").inner_html
93
+
94
+ # Get time
95
+ age = page.search("//span[@class='date']/time")
96
+ age = age.first[:datetime] rescue age.text if age
97
+ age = Time.parse(age) rescue nil
98
+ age = age.strftime("%Y-%m-%d %H:%M") if age
99
+
100
+ if git
101
+ m.reply "Title: %s (at %s, %s on %s), Url: %s" % [
102
+ title, uri.host, owner, age, link
103
+ ]
104
+ else
105
+ m.reply "Title: %s (at %s, %s on %s)" % [
106
+ title, uri.host, owner, age
107
+ ]
108
+ end
109
+ when "pastie.org"
110
+ # Get time
111
+ age = Time.parse(page.search("//span[@class='typo_date']").text)
112
+ age = age.strftime("%Y-%m-%d %H:%M")
113
+
114
+ m.reply "Title: %s (at %s, on %s)" % [
115
+ title, uri.host, age
116
+ ]
117
+ when "subforge.org", "subtle.de"
118
+ m.reply "Title: %s (at %s)" % [ title, uri.host ]
119
+
120
+ when "twitter.com"
121
+ if link =~ /\/status\/(\d+)$/
122
+ json = @agent.get("https://api.twitter.com/1/statuses/show/#{$1}.json?trim_user=1").body
123
+ tweet = JSON.parse(json)
124
+ unescaped = CGI.unescapeHTML(tweet["text"])
125
+
126
+ m.reply "@%s: %s" % [ tweet["user"]["screen_name"], unescaped ]
127
+ else
128
+ m.reply "Broken twitter link: %s (at %s)" % [ title, uri.host ] if title
129
+ end
130
+
131
+ when "isup.me"
132
+ container = page.search("//div[@id='container']")
133
+ m.reply "#{container.children[0].to_s.strip} #{uri.path[1..-1]} #{container.children[2].to_s.strip}"
134
+
135
+ else
136
+ m.reply "Title: %s (at %s)" % [ title, uri.host ] if title
137
+ end
138
+ end
139
+ end
140
+
141
+ match /url (on|off)$/
142
+
143
+ def execute(m, option)
144
+
145
+ config[:enabled_channels] ||= [bot.channels.map(&:name)]
146
+ puts bot.channels.map(&:name)
147
+
148
+ @url = option == "on"
149
+
150
+ case option
151
+ when "on"
152
+ config[:enabled_channels] << m.channel.name
153
+ else
154
+ config[:enabled_channels].delete(m.channel.name)
155
+ end
156
+
157
+ m.reply Format(:green, "URL Scraping for #{m.channel} is now #{@url ? 'enabled' : 'disabled'}!")
158
+
159
+ @bot.debug("#{self.class.name} => #{config[:enabled_channels].inspect}");
160
+
161
+ config[:enabled_channels]=nil if config[:enabled_channels]==[]
162
+
163
+ rescue
164
+ m.reply Format(:red, "Error: #{$!}")
165
+ end
166
+ end
167
+ end
168
+ end
metadata CHANGED
@@ -1,7 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cinch-url-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Michal Papis
@@ -9,11 +10,12 @@ authors:
9
10
  autorequire:
10
11
  bindir: bin
11
12
  cert_chain: []
12
- date: 2014-06-29 00:00:00.000000000 Z
13
+ date: 2014-07-06 00:00:00.000000000 Z
13
14
  dependencies:
14
15
  - !ruby/object:Gem::Dependency
15
16
  name: cinch
16
17
  requirement: !ruby/object:Gem::Requirement
18
+ none: false
17
19
  requirements:
18
20
  - - ~>
19
21
  - !ruby/object:Gem::Version
@@ -21,6 +23,7 @@ dependencies:
21
23
  type: :runtime
22
24
  prerelease: false
23
25
  version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
24
27
  requirements:
25
28
  - - ~>
26
29
  - !ruby/object:Gem::Version
@@ -28,6 +31,7 @@ dependencies:
28
31
  - !ruby/object:Gem::Dependency
29
32
  name: mechanize
30
33
  requirement: !ruby/object:Gem::Requirement
34
+ none: false
31
35
  requirements:
32
36
  - - ~>
33
37
  - !ruby/object:Gem::Version
@@ -35,6 +39,7 @@ dependencies:
35
39
  type: :runtime
36
40
  prerelease: false
37
41
  version_requirements: !ruby/object:Gem::Requirement
42
+ none: false
38
43
  requirements:
39
44
  - - ~>
40
45
  - !ruby/object:Gem::Version
@@ -49,29 +54,31 @@ extra_rdoc_files: []
49
54
  files:
50
55
  - LICENSE
51
56
  - README.md
57
+ - lib/cinch/plugins/urlscraper.rb~
52
58
  - lib/cinch/plugins/urlscraper.rb
53
59
  homepage: https://github.com/mpapis/cinch-url-scraper
54
60
  licenses:
55
61
  - LGPLv3
56
- metadata: {}
57
62
  post_install_message:
58
63
  rdoc_options: []
59
64
  require_paths:
60
65
  - lib
61
66
  required_ruby_version: !ruby/object:Gem::Requirement
67
+ none: false
62
68
  requirements:
63
- - - '>='
69
+ - - ! '>='
64
70
  - !ruby/object:Gem::Version
65
71
  version: 1.9.1
66
72
  required_rubygems_version: !ruby/object:Gem::Requirement
73
+ none: false
67
74
  requirements:
68
- - - '>='
75
+ - - ! '>='
69
76
  - !ruby/object:Gem::Version
70
77
  version: '0'
71
78
  requirements: []
72
79
  rubyforge_project:
73
- rubygems_version: 2.2.2
80
+ rubygems_version: 1.8.23
74
81
  signing_key:
75
- specification_version: 4
82
+ specification_version: 3
76
83
  summary: A Cinch plugin to get information about posted URLs.
77
84
  test_files: []
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 97acaced44b7c782d5f57b47d2dde2f40be42ab3
4
- data.tar.gz: b5be7bd2f97a5797ec225a2acac8bd9ccc876f51
5
- SHA512:
6
- metadata.gz: 63259c11ac36eb6719a348ba425ad1b7c6c24fae5f7a3e55144e9b8cec77d150ffb086ef0e4f4ced058dfa1e1e1399d8eb8606f5c35c65521ed2f5415540d363
7
- data.tar.gz: 2e6eae36d346cf69bee85aaa28ddaa81bae10d3eece8a4eff63acd9af48689430e92a4d30e32430aff3f3f65667e6e657e8582fd0d26d2d18348894f48c11c55