cinch-url-scraper 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,7 +71,7 @@ module Cinch
71
71
  page = @agent.get(link + "&nofeather=True")
72
72
 
73
73
  # Get page hits
74
- hits = page.search("//span[@class='watch-view-count ']")
74
+ hits = page.search("//span[@class='watch-view-count yt-uix-hovercard-target']")
75
75
  hits = hits.text.gsub(/[.,]/, ",")
76
76
 
77
77
  # Get likes
@@ -82,7 +82,7 @@ module Cinch
82
82
  dislikes = page.search("//span[@class='dislikes-count']")
83
83
  dislikes = dislikes.text.gsub(/[.,]/, ",")
84
84
 
85
- m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
85
+ m.reply "#{m.user.nick}'s YT Title: %s (Views: %s - Likes: %s || Dislikes: %s)" % [
86
86
  title, hits.strip, likes.strip, dislikes.strip
87
87
  ]
88
88
 
@@ -0,0 +1,168 @@
1
+ # source: http://subforge.org/projects/shreds/repository/entry/bot/cinch.rb#L396
2
+
3
+ # @copyright (c) 2010-2012, Christoph Kappel <unexist@dorfelite.net>
4
+
5
+ require "json"
6
+ require "mechanize"
7
+
8
+ module Cinch
9
+ module Plugins
10
+ class UrlScraper
11
+ include Cinch::Plugin
12
+ include Cinch::Helpers
13
+
14
+ listen_to :channel
15
+ set :plugin_name, 'urlscraper'
16
+ set :help, <<-USAGE.gsub(/^ {6}/, '')
17
+ If enabled, this plugin will return the title of the webpage that you or another user posts in the channel. For YouTube and IMDB there are special outputs for relevent information.
18
+ Enable/Disable Usage:
19
+ - !url [on/off]: This command will turn the URL Scraper on or off for the channel you use this command in.
20
+ USAGE
21
+
22
+ def listen(m)
23
+ return if m.message.include? "nospoil"
24
+ return if config[:enabled_channels] && ! config[:enabled_channels].include?(m.channel.name)
25
+ # Create mechanize agent
26
+ if @agent.nil?
27
+ @agent = Mechanize.new
28
+ @agent.user_agent_alias = "Linux Mozilla"
29
+ @agent.max_history = 0
30
+ end
31
+
32
+ URI.extract(m.message.gsub(/git@(gist.github.com):/,'git://\1/'), ["http", "https", "git"]).map do |link|
33
+ link=~/^(.*?)[:;,\)]?$/
34
+ $1
35
+ end.each do |link|
36
+ # Fetch data
37
+ begin
38
+ if git = link =~ /^git:\/\/(gist.github.com\/.*)\.git$/
39
+ link = "https://#{$1}"
40
+ end
41
+ uri = URI.parse(link)
42
+ page = @agent.get(link)
43
+ rescue Mechanize::ResponseCodeError
44
+ if "www.youtube.com" == uri.host
45
+ m.reply "Thank you, GEMA!"
46
+ else
47
+ m.reply "Y U POST BROKEN LINKS?", true
48
+ end
49
+
50
+ next
51
+ end
52
+
53
+ # Replace strange characters
54
+ title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil
55
+
56
+ # Check host
57
+ case uri.host
58
+ when "www.imdb.com"
59
+ # Get user rating
60
+ rating = page.search("//strong/span[@itemprop='ratingValue']").text
61
+
62
+ # Get votes
63
+ votes = page.search("//a/span[@itemprop='ratingCount']").text
64
+
65
+ m.reply "#{m.user.nick}'s IMDB Title: %s (Rating: %s/10 from %s users)" % [
66
+ title, rating, votes
67
+ ]
68
+
69
+ when "www.youtube.com"
70
+ # Reload with nofeather
71
+ page = @agent.get(link + "&nofeather=True")
72
+
73
+ # Get page hits
74
+ hits = page.search("//span[@class='watch-view-count ']")
75
+ hits = hits.text.gsub(/[.,]/, ",")
76
+
77
+ # Get likes
78
+ likes = page.search("//span[@class='likes-count']")
79
+ likes = likes.text.gsub(/[.,]/, ",")
80
+
81
+ # Get dislikes
82
+ dislikes = page.search("//span[@class='dislikes-count']")
83
+ dislikes = dislikes.text.gsub(/[.,]/, ",")
84
+
85
+ m.reply "#{m.user.nick}'s YT Title: %s (Views: %s, Likes: %s || Dislikes: %s)" % [
86
+ title, hits.strip, likes.strip, dislikes.strip
87
+ ]
88
+
89
+
90
+ when "gist.github.com"
91
+ # Get owner
92
+ owner = page.search("//div[@class='name']/a").inner_html
93
+
94
+ # Get time
95
+ age = page.search("//span[@class='date']/time")
96
+ age = age.first[:datetime] rescue age.text if age
97
+ age = Time.parse(age) rescue nil
98
+ age = age.strftime("%Y-%m-%d %H:%M") if age
99
+
100
+ if git
101
+ m.reply "Title: %s (at %s, %s on %s), Url: %s" % [
102
+ title, uri.host, owner, age, link
103
+ ]
104
+ else
105
+ m.reply "Title: %s (at %s, %s on %s)" % [
106
+ title, uri.host, owner, age
107
+ ]
108
+ end
109
+ when "pastie.org"
110
+ # Get time
111
+ age = Time.parse(page.search("//span[@class='typo_date']").text)
112
+ age = age.strftime("%Y-%m-%d %H:%M")
113
+
114
+ m.reply "Title: %s (at %s, on %s)" % [
115
+ title, uri.host, age
116
+ ]
117
+ when "subforge.org", "subtle.de"
118
+ m.reply "Title: %s (at %s)" % [ title, uri.host ]
119
+
120
+ when "twitter.com"
121
+ if link =~ /\/status\/(\d+)$/
122
+ json = @agent.get("https://api.twitter.com/1/statuses/show/#{$1}.json?trim_user=1").body
123
+ tweet = JSON.parse(json)
124
+ unescaped = CGI.unescapeHTML(tweet["text"])
125
+
126
+ m.reply "@%s: %s" % [ tweet["user"]["screen_name"], unescaped ]
127
+ else
128
+ m.reply "Broken twitter link: %s (at %s)" % [ title, uri.host ] if title
129
+ end
130
+
131
+ when "isup.me"
132
+ container = page.search("//div[@id='container']")
133
+ m.reply "#{container.children[0].to_s.strip} #{uri.path[1..-1]} #{container.children[2].to_s.strip}"
134
+
135
+ else
136
+ m.reply "Title: %s (at %s)" % [ title, uri.host ] if title
137
+ end
138
+ end
139
+ end
140
+
141
+ match /url (on|off)$/
142
+
143
+ def execute(m, option)
144
+
145
+ config[:enabled_channels] ||= [bot.channels.map(&:name)]
146
+ puts bot.channels.map(&:name)
147
+
148
+ @url = option == "on"
149
+
150
+ case option
151
+ when "on"
152
+ config[:enabled_channels] << m.channel.name
153
+ else
154
+ config[:enabled_channels].delete(m.channel.name)
155
+ end
156
+
157
+ m.reply Format(:green, "URL Scraping for #{m.channel} is now #{@url ? 'enabled' : 'disabled'}!")
158
+
159
+ @bot.debug("#{self.class.name} => #{config[:enabled_channels].inspect}");
160
+
161
+ config[:enabled_channels]=nil if config[:enabled_channels]==[]
162
+
163
+ rescue
164
+ m.reply Format(:red, "Error: #{$!}")
165
+ end
166
+ end
167
+ end
168
+ end
metadata CHANGED
@@ -1,7 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cinch-url-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Michal Papis
@@ -9,11 +10,12 @@ authors:
9
10
  autorequire:
10
11
  bindir: bin
11
12
  cert_chain: []
12
- date: 2014-06-29 00:00:00.000000000 Z
13
+ date: 2014-07-06 00:00:00.000000000 Z
13
14
  dependencies:
14
15
  - !ruby/object:Gem::Dependency
15
16
  name: cinch
16
17
  requirement: !ruby/object:Gem::Requirement
18
+ none: false
17
19
  requirements:
18
20
  - - ~>
19
21
  - !ruby/object:Gem::Version
@@ -21,6 +23,7 @@ dependencies:
21
23
  type: :runtime
22
24
  prerelease: false
23
25
  version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
24
27
  requirements:
25
28
  - - ~>
26
29
  - !ruby/object:Gem::Version
@@ -28,6 +31,7 @@ dependencies:
28
31
  - !ruby/object:Gem::Dependency
29
32
  name: mechanize
30
33
  requirement: !ruby/object:Gem::Requirement
34
+ none: false
31
35
  requirements:
32
36
  - - ~>
33
37
  - !ruby/object:Gem::Version
@@ -35,6 +39,7 @@ dependencies:
35
39
  type: :runtime
36
40
  prerelease: false
37
41
  version_requirements: !ruby/object:Gem::Requirement
42
+ none: false
38
43
  requirements:
39
44
  - - ~>
40
45
  - !ruby/object:Gem::Version
@@ -49,29 +54,31 @@ extra_rdoc_files: []
49
54
  files:
50
55
  - LICENSE
51
56
  - README.md
57
+ - lib/cinch/plugins/urlscraper.rb~
52
58
  - lib/cinch/plugins/urlscraper.rb
53
59
  homepage: https://github.com/mpapis/cinch-url-scraper
54
60
  licenses:
55
61
  - LGPLv3
56
- metadata: {}
57
62
  post_install_message:
58
63
  rdoc_options: []
59
64
  require_paths:
60
65
  - lib
61
66
  required_ruby_version: !ruby/object:Gem::Requirement
67
+ none: false
62
68
  requirements:
63
- - - '>='
69
+ - - ! '>='
64
70
  - !ruby/object:Gem::Version
65
71
  version: 1.9.1
66
72
  required_rubygems_version: !ruby/object:Gem::Requirement
73
+ none: false
67
74
  requirements:
68
- - - '>='
75
+ - - ! '>='
69
76
  - !ruby/object:Gem::Version
70
77
  version: '0'
71
78
  requirements: []
72
79
  rubyforge_project:
73
- rubygems_version: 2.2.2
80
+ rubygems_version: 1.8.23
74
81
  signing_key:
75
- specification_version: 4
82
+ specification_version: 3
76
83
  summary: A Cinch plugin to get information about posted URLs.
77
84
  test_files: []
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 97acaced44b7c782d5f57b47d2dde2f40be42ab3
4
- data.tar.gz: b5be7bd2f97a5797ec225a2acac8bd9ccc876f51
5
- SHA512:
6
- metadata.gz: 63259c11ac36eb6719a348ba425ad1b7c6c24fae5f7a3e55144e9b8cec77d150ffb086ef0e4f4ced058dfa1e1e1399d8eb8606f5c35c65521ed2f5415540d363
7
- data.tar.gz: 2e6eae36d346cf69bee85aaa28ddaa81bae10d3eece8a4eff63acd9af48689430e92a4d30e32430aff3f3f65667e6e657e8582fd0d26d2d18348894f48c11c55