birdwatcher 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/Rakefile +10 -0
- data/bin/console +42 -0
- data/birdwatcher.gemspec +40 -0
- data/data/english_stopwords.txt +319 -0
- data/data/top100Kenglishwords.txt +100000 -0
- data/db/migrations/001_create_workspaces.rb +11 -0
- data/db/migrations/002_create_users.rb +29 -0
- data/db/migrations/003_create_statuses.rb +28 -0
- data/db/migrations/004_create_mentions.rb +13 -0
- data/db/migrations/005_create_mentions_statuses.rb +8 -0
- data/db/migrations/006_create_hashtags.rb +11 -0
- data/db/migrations/007_create_hashtags_statuses.rb +8 -0
- data/db/migrations/008_create_urls.rb +16 -0
- data/db/migrations/009_create_statuses_urls.rb +8 -0
- data/db/migrations/010_create_klout_topics.rb +10 -0
- data/db/migrations/011_create_klout_topics_users.rb +8 -0
- data/db/migrations/012_create_influencers.rb +10 -0
- data/db/migrations/013_create_influencers_users.rb +8 -0
- data/db/migrations/014_create_influencees.rb +10 -0
- data/db/migrations/015_create_influencees_users.rb +8 -0
- data/exe/birdwatcher +12 -0
- data/lib/birdwatcher/command.rb +78 -0
- data/lib/birdwatcher/commands/back.rb +15 -0
- data/lib/birdwatcher/commands/exit.rb +16 -0
- data/lib/birdwatcher/commands/help.rb +60 -0
- data/lib/birdwatcher/commands/irb.rb +34 -0
- data/lib/birdwatcher/commands/module.rb +106 -0
- data/lib/birdwatcher/commands/query.rb +58 -0
- data/lib/birdwatcher/commands/query_csv.rb +56 -0
- data/lib/birdwatcher/commands/resource.rb +45 -0
- data/lib/birdwatcher/commands/run.rb +19 -0
- data/lib/birdwatcher/commands/schema.rb +116 -0
- data/lib/birdwatcher/commands/set.rb +56 -0
- data/lib/birdwatcher/commands/shell.rb +21 -0
- data/lib/birdwatcher/commands/show.rb +86 -0
- data/lib/birdwatcher/commands/status.rb +114 -0
- data/lib/birdwatcher/commands/unset.rb +37 -0
- data/lib/birdwatcher/commands/use.rb +25 -0
- data/lib/birdwatcher/commands/user.rb +155 -0
- data/lib/birdwatcher/commands/workspace.rb +176 -0
- data/lib/birdwatcher/concerns/concurrency.rb +25 -0
- data/lib/birdwatcher/concerns/core.rb +105 -0
- data/lib/birdwatcher/concerns/outputting.rb +114 -0
- data/lib/birdwatcher/concerns/persistence.rb +101 -0
- data/lib/birdwatcher/concerns/presentation.rb +122 -0
- data/lib/birdwatcher/concerns/util.rb +138 -0
- data/lib/birdwatcher/configuration.rb +63 -0
- data/lib/birdwatcher/configuration_wizard.rb +65 -0
- data/lib/birdwatcher/console.rb +201 -0
- data/lib/birdwatcher/http_client.rb +164 -0
- data/lib/birdwatcher/klout_client.rb +83 -0
- data/lib/birdwatcher/kml.rb +125 -0
- data/lib/birdwatcher/module.rb +253 -0
- data/lib/birdwatcher/modules/statuses/kml.rb +106 -0
- data/lib/birdwatcher/modules/statuses/sentiment.rb +77 -0
- data/lib/birdwatcher/modules/statuses/word_cloud.rb +205 -0
- data/lib/birdwatcher/modules/urls/crawl.rb +138 -0
- data/lib/birdwatcher/modules/urls/most_shared.rb +98 -0
- data/lib/birdwatcher/modules/users/activity_plot.rb +62 -0
- data/lib/birdwatcher/modules/users/import.rb +61 -0
- data/lib/birdwatcher/modules/users/influence_graph.rb +93 -0
- data/lib/birdwatcher/modules/users/klout_id.rb +62 -0
- data/lib/birdwatcher/modules/users/klout_influence.rb +83 -0
- data/lib/birdwatcher/modules/users/klout_score.rb +64 -0
- data/lib/birdwatcher/modules/users/klout_topics.rb +72 -0
- data/lib/birdwatcher/modules/users/social_graph.rb +110 -0
- data/lib/birdwatcher/punchcard.rb +183 -0
- data/lib/birdwatcher/util.rb +83 -0
- data/lib/birdwatcher/version.rb +3 -0
- data/lib/birdwatcher.rb +43 -0
- data/models/hashtag.rb +8 -0
- data/models/influencee.rb +8 -0
- data/models/influencer.rb +8 -0
- data/models/klout_topic.rb +8 -0
- data/models/mention.rb +8 -0
- data/models/status.rb +11 -0
- data/models/url.rb +8 -0
- data/models/user.rb +11 -0
- data/models/workspace.rb +26 -0
- metadata +405 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Statuses
|
4
|
+
class WordCloud < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "Word Cloud",
|
7
|
+
:description => "Generates a word cloud from statuses",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"DEST" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Destination file",
|
13
|
+
:required => true
|
14
|
+
},
|
15
|
+
"USERS" => {
|
16
|
+
:value => nil,
|
17
|
+
:description => "Space-separated list of screen names (all users if empty)",
|
18
|
+
:required => false
|
19
|
+
},
|
20
|
+
"SINCE" => {
|
21
|
+
:value => nil,
|
22
|
+
:description => "Process statuses posted since specified time (last 7 days if empty)",
|
23
|
+
:required => false
|
24
|
+
},
|
25
|
+
"BEFORE" => {
|
26
|
+
:value => nil,
|
27
|
+
:description => "Process statuses posted before specified time (from now if empty)",
|
28
|
+
:required => false
|
29
|
+
},
|
30
|
+
"MIN_WORD_COUNT" => {
|
31
|
+
:value => 3,
|
32
|
+
:description => "Exclude words mentioned fewer times than specified",
|
33
|
+
:required => false
|
34
|
+
},
|
35
|
+
"MIN_WORD_LENGTH" => {
|
36
|
+
:value => 3,
|
37
|
+
:description => "Exclude words smaller than specified",
|
38
|
+
:required => false
|
39
|
+
},
|
40
|
+
"EXCLUDE_STOPWORDS" => {
|
41
|
+
:value => true,
|
42
|
+
:description => "Exclude english stopwords",
|
43
|
+
:required => false,
|
44
|
+
:boolean => true
|
45
|
+
},
|
46
|
+
"EXCLUDE_COMMON" => {
|
47
|
+
:value => true,
|
48
|
+
:description => "Exclude common english words",
|
49
|
+
:required => false,
|
50
|
+
:boolean => true
|
51
|
+
},
|
52
|
+
"EXCLUDE_WORDS" => {
|
53
|
+
:value => nil,
|
54
|
+
:description => "Space-separated list of words to exclude",
|
55
|
+
:required => false
|
56
|
+
},
|
57
|
+
"EXCLUDE_HASHTAGS" => {
|
58
|
+
:value => false,
|
59
|
+
:description => "Exclude Hashtags",
|
60
|
+
:required => false,
|
61
|
+
:boolean => true
|
62
|
+
},
|
63
|
+
"EXCLUDE_MENTIONS" => {
|
64
|
+
:value => true,
|
65
|
+
:description => "Exclude @username mentions",
|
66
|
+
:required => false,
|
67
|
+
:boolean => true
|
68
|
+
},
|
69
|
+
"INCLUDE_PAGE_TITLES" => {
|
70
|
+
:value => false,
|
71
|
+
:description => "Include web page titles from shared URLs (requires crawling with urls/crawl)",
|
72
|
+
:required => false,
|
73
|
+
:boolean => true
|
74
|
+
},
|
75
|
+
"WORD_CAP" => {
|
76
|
+
:value => 200,
|
77
|
+
:description => "Cap list of words to specified amount",
|
78
|
+
:required => false
|
79
|
+
},
|
80
|
+
"PALETTE" => {
|
81
|
+
:value => "#8F99AB #A3ADC2 #272A2F #474C55 #3D4148 #021121 #293642 #516982 #516982 #415569",
|
82
|
+
:description => "Space-separated list of hex color codes to use for word cloud",
|
83
|
+
:required => true
|
84
|
+
},
|
85
|
+
"IMAGE_WIDTH" => {
|
86
|
+
:value => 1024,
|
87
|
+
:description => "Image width in pixels",
|
88
|
+
:required => true
|
89
|
+
},
|
90
|
+
"IMAGE_HEIGHT" => {
|
91
|
+
:value => 1024,
|
92
|
+
:description => "Image height in pixels",
|
93
|
+
:required => true
|
94
|
+
},
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
DEFAULT_EXCLUDED_WORDS = %w(rt via oh)
|
99
|
+
|
100
|
+
def self.info
|
101
|
+
<<-INFO
|
102
|
+
The Word Cloud module can generate a classic weighted word cloud from words used
|
103
|
+
in statuses across all or specific users and between different times.
|
104
|
+
|
105
|
+
The module is heavily configurable; have a look at the options with #{'show options'.bold}
|
106
|
+
|
107
|
+
Please note that configuring the module with a long timespan might result in a
|
108
|
+
very long execution time when the word cloud image is generated.
|
109
|
+
|
110
|
+
The generated image will be in PNG format.
|
111
|
+
INFO
|
112
|
+
end
|
113
|
+
|
114
|
+
def run
|
115
|
+
if option_setting("USERS")
|
116
|
+
user_ids = current_workspace.users_dataset.where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip)).map(&:id)
|
117
|
+
statuses = current_workspace.statuses_dataset.where("user_id IN ?", user_ids)
|
118
|
+
else
|
119
|
+
statuses = current_workspace.statuses_dataset
|
120
|
+
end
|
121
|
+
if option_setting("SINCE")
|
122
|
+
since = parse_time(option_setting("SINCE")).strftime("%Y-%m-%d")
|
123
|
+
else
|
124
|
+
since = (Date.today - 7).strftime("%Y-%m-%d")
|
125
|
+
end
|
126
|
+
if option_setting("BEFORE")
|
127
|
+
before = parse_time(option_setting("BEFORE")).strftime("%Y-%m-%d")
|
128
|
+
else
|
129
|
+
before = Time.now.strftime("%Y-%m-%d")
|
130
|
+
end
|
131
|
+
statuses = statuses.where("DATE(posted_at) >= DATE(?) AND DATE(posted_at) <= DATE(?)", since, before).all
|
132
|
+
if statuses.count.zero?
|
133
|
+
error("There are no statuses to process")
|
134
|
+
return false
|
135
|
+
end
|
136
|
+
prepare_exclusion_list
|
137
|
+
words = {}
|
138
|
+
sorted_words = []
|
139
|
+
task("Processing #{statuses.count.to_s.bold} statuses...") do
|
140
|
+
statuses.each do |status|
|
141
|
+
split_into_words(status.text).each do |word|
|
142
|
+
next if exclude_word?(word)
|
143
|
+
words.key?(word) ? words[word] += 1 : words[word] = 1
|
144
|
+
end
|
145
|
+
if option_setting("INCLUDE_PAGE_TITLES")
|
146
|
+
status.urls_dataset
|
147
|
+
.where("title IS NOT NULL")
|
148
|
+
.where("final_url NOT LIKE 'https://twitter.com/%'")
|
149
|
+
.map(&:title).each do |page_title|
|
150
|
+
split_into_words(page_title).each do |word|
|
151
|
+
next if exclude_word?(word)
|
152
|
+
words.key?(word) ? words[word] += 1 : words[word] = 1
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
if option_setting("MIN_WORD_COUNT")
|
158
|
+
words.delete_if { |word, count| count < option_setting("MIN_WORD_COUNT").to_i }
|
159
|
+
end
|
160
|
+
sorted_words = words.sort_by { |word, count| count}.reverse
|
161
|
+
if option_setting("WORD_CAP")
|
162
|
+
sorted_words = sorted_words.take(option_setting("WORD_CAP").to_i)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
task("Generating word cloud, patience please...") do
|
166
|
+
cloud = MagicCloud::Cloud.new(sorted_words,
|
167
|
+
:rotate => :none,
|
168
|
+
:palette => option_setting("PALETTE").split(" ").map(&:strip)
|
169
|
+
).draw(option_setting("IMAGE_WIDTH").to_i, option_setting("IMAGE_HEIGHT").to_i).to_blob { self.format = "png" }
|
170
|
+
File.open(option_setting("DEST"), "wb") { |f| f.write(cloud) }
|
171
|
+
end
|
172
|
+
info("Word cloud written to #{option_setting('DEST').bold}")
|
173
|
+
end
|
174
|
+
|
175
|
+
private
|
176
|
+
|
177
|
+
def prepare_exclusion_list
|
178
|
+
@exclusion_list = DEFAULT_EXCLUDED_WORDS
|
179
|
+
if option_setting("EXCLUDE_WORDS")
|
180
|
+
@exclusion_list += option_setting("EXCLUDE_WORDS").split(" ").map { |w| w.strip.downcase }
|
181
|
+
end
|
182
|
+
if option_setting("EXCLUDE_STOPWORDS")
|
183
|
+
@exclusion_list += read_data_file("english_stopwords.txt").split("\n").map { |w| w.strip.downcase }
|
184
|
+
end
|
185
|
+
if option_setting("EXCLUDE_COMMON")
|
186
|
+
@exclusion_list += read_data_file("top100Kenglishwords.txt").split("\n").map(&:strip)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def exclude_word?(word)
|
191
|
+
return true if word.empty?
|
192
|
+
return true if option_setting("MIN_WORD_LENGTH") && word.length < option_setting("MIN_WORD_LENGTH").to_i
|
193
|
+
return true if option_setting("EXCLUDE_HASHTAGS") && word.start_with?("#")
|
194
|
+
return true if option_setting("EXCLUDE_MENTIONS") && word.start_with?("@")
|
195
|
+
return true if @exclusion_list.include?(word)
|
196
|
+
end
|
197
|
+
|
198
|
+
def split_into_words(text)
|
199
|
+
text = text.downcase.strip.gsub(/https?:\/\/[\S]+/, "").gsub(/[^0-9a-z@#_ ]/i, " ")
|
200
|
+
text.split(" ").map(&:strip)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Urls
|
4
|
+
class Crawl < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "URL Crawler",
|
7
|
+
:description => "Enrich gathered URLs with HTTP status codes, content types and page titles",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"USER_AGENT" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Specific HTTP User-Agent to use (randomized user-agents if not set)",
|
13
|
+
:required => false
|
14
|
+
},
|
15
|
+
"TIMEOUT" => {
|
16
|
+
:value => Birdwatcher::HttpClient::DEFAULT_TIMEOUT,
|
17
|
+
:description => "Request timeout in seconds",
|
18
|
+
:required => false
|
19
|
+
},
|
20
|
+
"RETRIES" => {
|
21
|
+
:value => Birdwatcher::HttpClient::DEFAULT_RETRIES,
|
22
|
+
:description => "Amount of retries on failed requests",
|
23
|
+
:required => false
|
24
|
+
},
|
25
|
+
"RETRY_FAILED" => {
|
26
|
+
:value => false,
|
27
|
+
:description => "Retry previously failed crawls",
|
28
|
+
:required => false,
|
29
|
+
:boolean => true
|
30
|
+
},
|
31
|
+
"PROXY_ADDR" => {
|
32
|
+
:value => nil,
|
33
|
+
:description => "HTTP proxy address to use for requests",
|
34
|
+
:required => false
|
35
|
+
},
|
36
|
+
"PROXY_PORT" => {
|
37
|
+
:value => nil,
|
38
|
+
:description => "HTTP proxy port to use for requests",
|
39
|
+
:required => false
|
40
|
+
},
|
41
|
+
"PROXY_USER" => {
|
42
|
+
:value => nil,
|
43
|
+
:description => "HTTP proxy user to use for requests",
|
44
|
+
:required => false
|
45
|
+
},
|
46
|
+
"PROXY_PASS" => {
|
47
|
+
:value => nil,
|
48
|
+
:description => "HTTP proxy user to use for requests",
|
49
|
+
:required => false
|
50
|
+
},
|
51
|
+
"THREADS" => {
|
52
|
+
:value => 10,
|
53
|
+
:description => "The number of concurrent threads",
|
54
|
+
:required => false
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
PAGE_TITLE_REGEX = /<title>(.*?)<\/title>/i
|
60
|
+
|
61
|
+
def self.info
|
62
|
+
<<-INFO
|
63
|
+
The URL Crawler module crawls shared URLs and enriches them with additional
|
64
|
+
information:
|
65
|
+
|
66
|
+
* HTTP status code (200, 404, 500, etc.)
|
67
|
+
* Content type (application/html, application/pdf, etc)
|
68
|
+
* Page title (if HTML document)
|
69
|
+
|
70
|
+
Page titles can be included in the Word Cloud generated with the
|
71
|
+
#{'statuses/word_cloud'.bold} module.
|
72
|
+
|
73
|
+
#{'CAUTION:'.bold} Depending on the users in the workspace, it might not be safe
|
74
|
+
to blindly request shared URLs. Consider using the #{'PROXY_ADDR'.bold} and #{'PROXY_PORT'.bold}
|
75
|
+
module options.
|
76
|
+
INFO
|
77
|
+
end
|
78
|
+
|
79
|
+
def run
|
80
|
+
if option_setting("RETRY_FAILED")
|
81
|
+
urls = current_workspace.urls_dataset
|
82
|
+
.where("crawled_at IS NULL or (crawled_at IS NOT NULL AND http_status IS NULL)")
|
83
|
+
.order(Sequel.desc(:created_at))
|
84
|
+
else
|
85
|
+
urls = current_workspace.urls_dataset
|
86
|
+
.where(:crawled_at => nil)
|
87
|
+
.order(Sequel.desc(:created_at))
|
88
|
+
end
|
89
|
+
if urls.empty?
|
90
|
+
error("There are currently no URLs in this workspace")
|
91
|
+
return false
|
92
|
+
end
|
93
|
+
threads = thread_pool(option_setting("THREADS").to_i)
|
94
|
+
http_client = Birdwatcher::HttpClient.new(
|
95
|
+
:timeout => option_setting("TIMEOUT").to_i,
|
96
|
+
:retries => option_setting("RETRIES").to_i,
|
97
|
+
:user_agent => option_setting("USER_AGENT"),
|
98
|
+
:http_proxyaddr => option_setting("PROXY_ADDR"),
|
99
|
+
:http_proxyport => (option_setting("PROXY_PORT") ? option_setting("PROXY_PORT").to_i : nil),
|
100
|
+
:http_proxyuser => option_setting("PROXY_USER"),
|
101
|
+
:http_proxypass => option_setting("PROXY_PASS")
|
102
|
+
)
|
103
|
+
urls.each do |url|
|
104
|
+
threads.process do
|
105
|
+
begin
|
106
|
+
Timeout::timeout(option_setting("TIMEOUT").to_i * 2) do
|
107
|
+
response = http_client.do_head(url.url)
|
108
|
+
url.final_url = response.url
|
109
|
+
url.http_status = response.status
|
110
|
+
url.content_type = response.headers["content-type"]
|
111
|
+
if response.headers.key?("content-type") && response.headers["content-type"].include?("text/html")
|
112
|
+
url.title = extract_page_title(http_client.do_get(response.url).body)
|
113
|
+
end
|
114
|
+
url.crawled_at = Time.now
|
115
|
+
url.save
|
116
|
+
info("Crawled #{url.url.bold} (#{response.status} - #{response.headers["content-type"]})")
|
117
|
+
end
|
118
|
+
rescue => e
|
119
|
+
url.crawled_at = Time.now
|
120
|
+
url.save
|
121
|
+
error("Crawling failed for #{url.url.bold} (#{e.class})")
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
threads.shutdown
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def extract_page_title(body)
|
131
|
+
title = body.scan(PAGE_TITLE_REGEX).first
|
132
|
+
return nil if title.nil?
|
133
|
+
CGI.unescapeHTML(title.first)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Urls
|
4
|
+
class MostShared < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "Most Shared URLs",
|
7
|
+
:description => "Lists shared URLs ordered from most to least shared",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"USERS" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Space-separated list of screen names (all users if empty)",
|
13
|
+
:required => false
|
14
|
+
},
|
15
|
+
"MIN_SHARE_COUNT" => {
|
16
|
+
:value => 2,
|
17
|
+
:description => "Exclude URLS shared fewer times than specified",
|
18
|
+
:required => false
|
19
|
+
},
|
20
|
+
"SINCE" => {
|
21
|
+
:value => nil,
|
22
|
+
:description => "List URLs shared since specified time (last 7 days if empty)",
|
23
|
+
:required => false
|
24
|
+
},
|
25
|
+
"BEFORE" => {
|
26
|
+
:value => nil,
|
27
|
+
:description => "List URLs shared before specified time (from now if empty)",
|
28
|
+
:required => false
|
29
|
+
}
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
def self.info
|
34
|
+
<<-INFO
|
35
|
+
The Most Shared URLs module can show a simple list of shared URLs ordered from
|
36
|
+
most to least shared. If a URL has been shared by several people, it is a good
|
37
|
+
indication that it has important or interesting information.
|
38
|
+
|
39
|
+
To enhance the functionality of this module, it is recommended to run the
|
40
|
+
#{'urls/crawl'.bold} module first in order to get information on the URLs such
|
41
|
+
as HTTP status codes, content types and page titles. If the information is
|
42
|
+
available, this module will display it.
|
43
|
+
INFO
|
44
|
+
end
|
45
|
+
|
46
|
+
def run
|
47
|
+
if option_setting("SINCE")
|
48
|
+
since = parse_time(option_setting("SINCE")).strftime("%Y-%m-%d")
|
49
|
+
else
|
50
|
+
since = (Date.today - 7).strftime("%Y-%m-%d")
|
51
|
+
end
|
52
|
+
if option_setting("BEFORE")
|
53
|
+
before = parse_time(option_setting("BEFORE")).strftime("%Y-%m-%d")
|
54
|
+
else
|
55
|
+
before = Time.now.strftime("%Y-%m-%d")
|
56
|
+
end
|
57
|
+
if option_setting("USERS")
|
58
|
+
user_ids = current_workspace.users_dataset.where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip)).map(&:id)
|
59
|
+
urls = database["SELECT urls.url, urls.final_url, urls.title, urls.http_status, urls.content_type, count(statuses_urls.*) AS count
|
60
|
+
FROM urls
|
61
|
+
INNER JOIN statuses_urls
|
62
|
+
ON statuses_urls.url_id = urls.id
|
63
|
+
INNER JOIN statuses
|
64
|
+
ON statuses_urls.status_id = statuses.id
|
65
|
+
WHERE statuses.user_id IN ?
|
66
|
+
AND statuses.workspace_id = ?
|
67
|
+
AND DATE(statuses.posted_at) <= DATE(?)
|
68
|
+
AND DATE(statuses.posted_at) >= DATE(?)
|
69
|
+
GROUP BY urls.url, urls.final_url, urls.title, urls.http_status, urls.content_type
|
70
|
+
ORDER BY count DESC", user_ids, current_workspace.id, since, before].all
|
71
|
+
else
|
72
|
+
urls = database["SELECT urls.url, urls.final_url, urls.title, urls.http_status, urls.content_type, count(statuses_urls.*) AS count
|
73
|
+
FROM urls
|
74
|
+
INNER JOIN statuses_urls
|
75
|
+
ON statuses_urls.url_id = urls.id
|
76
|
+
INNER JOIN statuses
|
77
|
+
ON statuses_urls.status_id = statuses.id
|
78
|
+
WHERE statuses.workspace_id = ?
|
79
|
+
AND DATE(statuses.posted_at) <= DATE(?)
|
80
|
+
AND DATE(statuses.posted_at) >= DATE(?)
|
81
|
+
GROUP BY urls.url, urls.final_url, urls.title, urls.http_status, urls.content_type
|
82
|
+
ORDER BY count DESC", current_workspace.id, before, since].all
|
83
|
+
end
|
84
|
+
if urls.count.zero?
|
85
|
+
error("There are no URLs to display")
|
86
|
+
return false
|
87
|
+
end
|
88
|
+
text = ""
|
89
|
+
urls.each do |url|
|
90
|
+
next if option_setting("MIN_SHARE_COUNT") && url[:count] <= option_setting("MIN_SHARE_COUNT")
|
91
|
+
text += make_url_summary_output(url) + "\n#{Birdwatcher::Console::LINE_SEPARATOR}\n\n"
|
92
|
+
end
|
93
|
+
page_text(text)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Statuses
|
4
|
+
class ActivityPlot < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "Activity Plot",
|
7
|
+
:description => "Generates punchcard plot of a user's activity",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"DEST" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Destination file",
|
13
|
+
:required => true
|
14
|
+
},
|
15
|
+
"USER" => {
|
16
|
+
:value => nil,
|
17
|
+
:description => "Screen name of user to analyze",
|
18
|
+
:required => true
|
19
|
+
},
|
20
|
+
"ONLY_REPLIES" => {
|
21
|
+
:value => false,
|
22
|
+
:description => "Only plot when the user replies to other users",
|
23
|
+
:required => false,
|
24
|
+
:boolean => true
|
25
|
+
}
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
def self.info
|
30
|
+
<<-INFO
|
31
|
+
The Activity Plot module can generate a punchcard plot of when a user is the
|
32
|
+
most engaged with Twitter. The plot can be used to find the most likely time
|
33
|
+
(day and hour) where a user will engage with Twitter content.
|
34
|
+
|
35
|
+
The generated file is in PNG format.
|
36
|
+
INFO
|
37
|
+
end
|
38
|
+
|
39
|
+
def run
|
40
|
+
if !user = current_workspace.users_dataset.first(:screen_name => option_setting("USER"))
|
41
|
+
error("User #{screen_name.bold} was not found in workspace")
|
42
|
+
return false
|
43
|
+
end
|
44
|
+
if option_setting("ONLY_REPLIES")
|
45
|
+
timestamps = user.statuses_dataset.where("text LIKE '@%'").map(&:posted_at)
|
46
|
+
else
|
47
|
+
timestamps = user.statuses.map(&:posted_at)
|
48
|
+
end
|
49
|
+
if timestamps.empty?
|
50
|
+
error("There are no statuses to process")
|
51
|
+
return false
|
52
|
+
end
|
53
|
+
punchcard = Birdwatcher::Punchcard.new(timestamps)
|
54
|
+
task("Generating activity plot from #{timestamps.count.to_s.bold} statuses...") do
|
55
|
+
punchcard.generate(option_setting("DEST"))
|
56
|
+
end
|
57
|
+
info("Activity plot written to #{option_setting('DEST').bold}")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Users
|
4
|
+
class Import < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "User Importer",
|
7
|
+
:description => "Import users from a file containing screen names.",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"FILE" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "File to read screen names from.",
|
13
|
+
:required => true
|
14
|
+
}
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
def self.info
|
19
|
+
<<-INFO
|
20
|
+
The User Importer module is a simple module to add a large number of users to
|
21
|
+
the currently active workspace by parsing a file containing screen names.
|
22
|
+
|
23
|
+
The file is expected to contain one screen name per line, without the @ sign or
|
24
|
+
https://twitter.com/ in front of them.
|
25
|
+
INFO
|
26
|
+
end
|
27
|
+
|
28
|
+
def run
|
29
|
+
filename = File.expand_path(option_setting("FILE"))
|
30
|
+
if !File.exists?(filename)
|
31
|
+
error("File #{filename.bold} does not exist")
|
32
|
+
return false
|
33
|
+
end
|
34
|
+
if !File.readable?(filename)
|
35
|
+
error("File #{filename.bold} is not readable")
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
threads = thread_pool
|
39
|
+
File.read(filename).each_line do |screen_name|
|
40
|
+
threads.process do
|
41
|
+
begin
|
42
|
+
screen_name.strip!
|
43
|
+
next if screen_name.empty?
|
44
|
+
if current_workspace.users_dataset.first(:screen_name => screen_name)
|
45
|
+
info("User #{screen_name.bold} is already in the workspace")
|
46
|
+
next
|
47
|
+
end
|
48
|
+
user = twitter_client.user(screen_name)
|
49
|
+
save_user(user)
|
50
|
+
info("Added #{screen_name.bold} to workspace")
|
51
|
+
rescue Twitter::Error::NotFound
|
52
|
+
error("There is no user with screen name: #{screen_name.bold}")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
threads.shutdown
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Users
|
4
|
+
class InfluenceGraph < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "Influence Graph",
|
7
|
+
:description => "Graphs the influence between users from Klout",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"DEST" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Destination file",
|
13
|
+
:required => true
|
14
|
+
},
|
15
|
+
"USERS" => {
|
16
|
+
:value => nil,
|
17
|
+
:description => "Space-separated list of screen names (all users if empty)",
|
18
|
+
:required => false
|
19
|
+
},
|
20
|
+
"FORMAT" => {
|
21
|
+
:value => "png",
|
22
|
+
:description => "Destination file format (any format supported by Graphviz)",
|
23
|
+
:required => true
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
def self.info
|
29
|
+
<<-INFO
|
30
|
+
The Influence Graph module generates an influence graph between users in the
|
31
|
+
currently active workspace. The graph can be used to identify who each user is
|
32
|
+
being influenced by as well as who each user influences.
|
33
|
+
|
34
|
+
The influence information is retrieved by the #{'users/klout_influence'.bold} so
|
35
|
+
be sure to run that module before running this one.
|
36
|
+
|
37
|
+
The generated graph is in PNG format.
|
38
|
+
INFO
|
39
|
+
end
|
40
|
+
|
41
|
+
def run
|
42
|
+
if !GraphViz::Constants::FORMATS.include?(option_setting("FORMAT"))
|
43
|
+
error("Unsupported format: #{option_setting('FORMAT').bold}")
|
44
|
+
return false
|
45
|
+
end
|
46
|
+
if screen_names = option_setting("USERS")
|
47
|
+
users = current_workspace.users_dataset
|
48
|
+
.where("screen_name IN ?", screen_names.split(" ").map(&:strip))
|
49
|
+
.order(:screen_name)
|
50
|
+
.eager(:influencers, :influencees)
|
51
|
+
else
|
52
|
+
users = current_workspace.users_dataset.order(:screen_name).eager(:influencers, :influencees)
|
53
|
+
end
|
54
|
+
if users.empty?
|
55
|
+
error("There are no users to process")
|
56
|
+
return false
|
57
|
+
end
|
58
|
+
graph = GraphViz.new(:G, :type => :digraph)
|
59
|
+
users_in_workspace = current_workspace.users.map(&:screen_name)
|
60
|
+
nodes = {}
|
61
|
+
influences = {}
|
62
|
+
users.each do |user|
|
63
|
+
influences[user.screen_name] ||= []
|
64
|
+
influences[user.screen_name] += user.influencees.select { |i| users_in_workspace.include?(i.screen_name) }.map(&:screen_name)
|
65
|
+
user.influencers.select { |i| users_in_workspace.include?(i.screen_name) }.map(&:screen_name).each do |influencer|
|
66
|
+
influences[influencer] ||= []
|
67
|
+
influences[influencer] << user.screen_name unless influences[influencer].include?(user.screen_name)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
influences.each_pair do |user, influence|
|
71
|
+
influence.uniq!
|
72
|
+
next if influence.empty?
|
73
|
+
nodes[user] ||= graph.add_nodes(user)
|
74
|
+
influence.each do |influencee|
|
75
|
+
if influences[influencee] && influences[influencee].include?(user)
|
76
|
+
direction = "both"
|
77
|
+
influences[influencee].delete(user)
|
78
|
+
else
|
79
|
+
direction = "forward"
|
80
|
+
end
|
81
|
+
nodes[influencee] ||= graph.add_nodes(influencee)
|
82
|
+
graph.add_edges(nodes[user], nodes[influencee], :color => "lightblue", :fontcolor => "cornflowerblue", :dir => direction, :arrowhead => "normal")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
task("Outputting graph...") do
|
86
|
+
graph.output(option_setting("FORMAT") => option_setting("DEST"))
|
87
|
+
end
|
88
|
+
info("Graph written to #{option_setting('DEST').bold}")
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|