birdwatcher 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/Rakefile +10 -0
  8. data/bin/console +42 -0
  9. data/birdwatcher.gemspec +40 -0
  10. data/data/english_stopwords.txt +319 -0
  11. data/data/top100Kenglishwords.txt +100000 -0
  12. data/db/migrations/001_create_workspaces.rb +11 -0
  13. data/db/migrations/002_create_users.rb +29 -0
  14. data/db/migrations/003_create_statuses.rb +28 -0
  15. data/db/migrations/004_create_mentions.rb +13 -0
  16. data/db/migrations/005_create_mentions_statuses.rb +8 -0
  17. data/db/migrations/006_create_hashtags.rb +11 -0
  18. data/db/migrations/007_create_hashtags_statuses.rb +8 -0
  19. data/db/migrations/008_create_urls.rb +16 -0
  20. data/db/migrations/009_create_statuses_urls.rb +8 -0
  21. data/db/migrations/010_create_klout_topics.rb +10 -0
  22. data/db/migrations/011_create_klout_topics_users.rb +8 -0
  23. data/db/migrations/012_create_influencers.rb +10 -0
  24. data/db/migrations/013_create_influencers_users.rb +8 -0
  25. data/db/migrations/014_create_influencees.rb +10 -0
  26. data/db/migrations/015_create_influencees_users.rb +8 -0
  27. data/exe/birdwatcher +12 -0
  28. data/lib/birdwatcher/command.rb +78 -0
  29. data/lib/birdwatcher/commands/back.rb +15 -0
  30. data/lib/birdwatcher/commands/exit.rb +16 -0
  31. data/lib/birdwatcher/commands/help.rb +60 -0
  32. data/lib/birdwatcher/commands/irb.rb +34 -0
  33. data/lib/birdwatcher/commands/module.rb +106 -0
  34. data/lib/birdwatcher/commands/query.rb +58 -0
  35. data/lib/birdwatcher/commands/query_csv.rb +56 -0
  36. data/lib/birdwatcher/commands/resource.rb +45 -0
  37. data/lib/birdwatcher/commands/run.rb +19 -0
  38. data/lib/birdwatcher/commands/schema.rb +116 -0
  39. data/lib/birdwatcher/commands/set.rb +56 -0
  40. data/lib/birdwatcher/commands/shell.rb +21 -0
  41. data/lib/birdwatcher/commands/show.rb +86 -0
  42. data/lib/birdwatcher/commands/status.rb +114 -0
  43. data/lib/birdwatcher/commands/unset.rb +37 -0
  44. data/lib/birdwatcher/commands/use.rb +25 -0
  45. data/lib/birdwatcher/commands/user.rb +155 -0
  46. data/lib/birdwatcher/commands/workspace.rb +176 -0
  47. data/lib/birdwatcher/concerns/concurrency.rb +25 -0
  48. data/lib/birdwatcher/concerns/core.rb +105 -0
  49. data/lib/birdwatcher/concerns/outputting.rb +114 -0
  50. data/lib/birdwatcher/concerns/persistence.rb +101 -0
  51. data/lib/birdwatcher/concerns/presentation.rb +122 -0
  52. data/lib/birdwatcher/concerns/util.rb +138 -0
  53. data/lib/birdwatcher/configuration.rb +63 -0
  54. data/lib/birdwatcher/configuration_wizard.rb +65 -0
  55. data/lib/birdwatcher/console.rb +201 -0
  56. data/lib/birdwatcher/http_client.rb +164 -0
  57. data/lib/birdwatcher/klout_client.rb +83 -0
  58. data/lib/birdwatcher/kml.rb +125 -0
  59. data/lib/birdwatcher/module.rb +253 -0
  60. data/lib/birdwatcher/modules/statuses/kml.rb +106 -0
  61. data/lib/birdwatcher/modules/statuses/sentiment.rb +77 -0
  62. data/lib/birdwatcher/modules/statuses/word_cloud.rb +205 -0
  63. data/lib/birdwatcher/modules/urls/crawl.rb +138 -0
  64. data/lib/birdwatcher/modules/urls/most_shared.rb +98 -0
  65. data/lib/birdwatcher/modules/users/activity_plot.rb +62 -0
  66. data/lib/birdwatcher/modules/users/import.rb +61 -0
  67. data/lib/birdwatcher/modules/users/influence_graph.rb +93 -0
  68. data/lib/birdwatcher/modules/users/klout_id.rb +62 -0
  69. data/lib/birdwatcher/modules/users/klout_influence.rb +83 -0
  70. data/lib/birdwatcher/modules/users/klout_score.rb +64 -0
  71. data/lib/birdwatcher/modules/users/klout_topics.rb +72 -0
  72. data/lib/birdwatcher/modules/users/social_graph.rb +110 -0
  73. data/lib/birdwatcher/punchcard.rb +183 -0
  74. data/lib/birdwatcher/util.rb +83 -0
  75. data/lib/birdwatcher/version.rb +3 -0
  76. data/lib/birdwatcher.rb +43 -0
  77. data/models/hashtag.rb +8 -0
  78. data/models/influencee.rb +8 -0
  79. data/models/influencer.rb +8 -0
  80. data/models/klout_topic.rb +8 -0
  81. data/models/mention.rb +8 -0
  82. data/models/status.rb +11 -0
  83. data/models/url.rb +8 -0
  84. data/models/user.rb +11 -0
  85. data/models/workspace.rb +26 -0
  86. metadata +405 -0
@@ -0,0 +1,205 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Statuses
4
+ class WordCloud < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "Word Cloud",
7
+ :description => "Generates a word cloud from statuses",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "DEST" => {
11
+ :value => nil,
12
+ :description => "Destination file",
13
+ :required => true
14
+ },
15
+ "USERS" => {
16
+ :value => nil,
17
+ :description => "Space-separated list of screen names (all users if empty)",
18
+ :required => false
19
+ },
20
+ "SINCE" => {
21
+ :value => nil,
22
+ :description => "Process statuses posted since specified time (last 7 days if empty)",
23
+ :required => false
24
+ },
25
+ "BEFORE" => {
26
+ :value => nil,
27
+ :description => "Process statuses posted before specified time (from now if empty)",
28
+ :required => false
29
+ },
30
+ "MIN_WORD_COUNT" => {
31
+ :value => 3,
32
+ :description => "Exclude words mentioned fewer times than specified",
33
+ :required => false
34
+ },
35
+ "MIN_WORD_LENGTH" => {
36
+ :value => 3,
37
+ :description => "Exclude words smaller than specified",
38
+ :required => false
39
+ },
40
+ "EXCLUDE_STOPWORDS" => {
41
+ :value => true,
42
+ :description => "Exclude english stopwords",
43
+ :required => false,
44
+ :boolean => true
45
+ },
46
+ "EXCLUDE_COMMON" => {
47
+ :value => true,
48
+ :description => "Exclude common english words",
49
+ :required => false,
50
+ :boolean => true
51
+ },
52
+ "EXCLUDE_WORDS" => {
53
+ :value => nil,
54
+ :description => "Space-separated list of words to exclude",
55
+ :required => false
56
+ },
57
+ "EXCLUDE_HASHTAGS" => {
58
+ :value => false,
59
+ :description => "Exclude Hashtags",
60
+ :required => false,
61
+ :boolean => true
62
+ },
63
+ "EXCLUDE_MENTIONS" => {
64
+ :value => true,
65
+ :description => "Exclude @username mentions",
66
+ :required => false,
67
+ :boolean => true
68
+ },
69
+ "INCLUDE_PAGE_TITLES" => {
70
+ :value => false,
71
+ :description => "Include web page titles from shared URLs (requires crawling with urls/crawl)",
72
+ :required => false,
73
+ :boolean => true
74
+ },
75
+ "WORD_CAP" => {
76
+ :value => 200,
77
+ :description => "Cap list of words to specified amount",
78
+ :required => false
79
+ },
80
+ "PALETTE" => {
81
+ :value => "#8F99AB #A3ADC2 #272A2F #474C55 #3D4148 #021121 #293642 #516982 #516982 #415569",
82
+ :description => "Space-separated list of hex color codes to use for word cloud",
83
+ :required => true
84
+ },
85
+ "IMAGE_WIDTH" => {
86
+ :value => 1024,
87
+ :description => "Image width in pixels",
88
+ :required => true
89
+ },
90
+ "IMAGE_HEIGHT" => {
91
+ :value => 1024,
92
+ :description => "Image height in pixels",
93
+ :required => true
94
+ },
95
+ }
96
+ }
97
+
98
+ DEFAULT_EXCLUDED_WORDS = %w(rt via oh)
99
+
100
+ def self.info
101
+ <<-INFO
102
+ The Word Cloud module can generate a classic weighted word cloud from words used
103
+ in statuses across all or specific users and between different times.
104
+
105
+ The module is heavily configurable; have a look at the options with #{'show options'.bold}
106
+
107
+ Please note that configuring the module with a long timespan might result in a
108
+ very long execution time when the word cloud image is generated.
109
+
110
+ The generated image will be in PNG format.
111
+ INFO
112
+ end
113
+
114
+ def run
115
+ if option_setting("USERS")
116
+ user_ids = current_workspace.users_dataset.where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip)).map(&:id)
117
+ statuses = current_workspace.statuses_dataset.where("user_id IN ?", user_ids)
118
+ else
119
+ statuses = current_workspace.statuses_dataset
120
+ end
121
+ if option_setting("SINCE")
122
+ since = parse_time(option_setting("SINCE")).strftime("%Y-%m-%d")
123
+ else
124
+ since = (Date.today - 7).strftime("%Y-%m-%d")
125
+ end
126
+ if option_setting("BEFORE")
127
+ before = parse_time(option_setting("BEFORE")).strftime("%Y-%m-%d")
128
+ else
129
+ before = Time.now.strftime("%Y-%m-%d")
130
+ end
131
+ statuses = statuses.where("DATE(posted_at) >= DATE(?) AND DATE(posted_at) <= DATE(?)", since, before).all
132
+ if statuses.count.zero?
133
+ error("There are no statuses to process")
134
+ return false
135
+ end
136
+ prepare_exclusion_list
137
+ words = {}
138
+ sorted_words = []
139
+ task("Processing #{statuses.count.to_s.bold} statuses...") do
140
+ statuses.each do |status|
141
+ split_into_words(status.text).each do |word|
142
+ next if exclude_word?(word)
143
+ words.key?(word) ? words[word] += 1 : words[word] = 1
144
+ end
145
+ if option_setting("INCLUDE_PAGE_TITLES")
146
+ status.urls_dataset
147
+ .where("title IS NOT NULL")
148
+ .where("final_url NOT LIKE 'https://twitter.com/%'")
149
+ .map(&:title).each do |page_title|
150
+ split_into_words(page_title).each do |word|
151
+ next if exclude_word?(word)
152
+ words.key?(word) ? words[word] += 1 : words[word] = 1
153
+ end
154
+ end
155
+ end
156
+ end
157
+ if option_setting("MIN_WORD_COUNT")
158
+ words.delete_if { |word, count| count < option_setting("MIN_WORD_COUNT").to_i }
159
+ end
160
+ sorted_words = words.sort_by { |word, count| count}.reverse
161
+ if option_setting("WORD_CAP")
162
+ sorted_words = sorted_words.take(option_setting("WORD_CAP").to_i)
163
+ end
164
+ end
165
+ task("Generating word cloud, patience please...") do
166
+ cloud = MagicCloud::Cloud.new(sorted_words,
167
+ :rotate => :none,
168
+ :palette => option_setting("PALETTE").split(" ").map(&:strip)
169
+ ).draw(option_setting("IMAGE_WIDTH").to_i, option_setting("IMAGE_HEIGHT").to_i).to_blob { self.format = "png" }
170
+ File.open(option_setting("DEST"), "wb") { |f| f.write(cloud) }
171
+ end
172
+ info("Word cloud written to #{option_setting('DEST').bold}")
173
+ end
174
+
175
+ private
176
+
177
+ def prepare_exclusion_list
178
+ @exclusion_list = DEFAULT_EXCLUDED_WORDS
179
+ if option_setting("EXCLUDE_WORDS")
180
+ @exclusion_list += option_setting("EXCLUDE_WORDS").split(" ").map { |w| w.strip.downcase }
181
+ end
182
+ if option_setting("EXCLUDE_STOPWORDS")
183
+ @exclusion_list += read_data_file("english_stopwords.txt").split("\n").map { |w| w.strip.downcase }
184
+ end
185
+ if option_setting("EXCLUDE_COMMON")
186
+ @exclusion_list += read_data_file("top100Kenglishwords.txt").split("\n").map(&:strip)
187
+ end
188
+ end
189
+
190
+ def exclude_word?(word)
191
+ return true if word.empty?
192
+ return true if option_setting("MIN_WORD_LENGTH") && word.length < option_setting("MIN_WORD_LENGTH").to_i
193
+ return true if option_setting("EXCLUDE_HASHTAGS") && word.start_with?("#")
194
+ return true if option_setting("EXCLUDE_MENTIONS") && word.start_with?("@")
195
+ return true if @exclusion_list.include?(word)
196
+ end
197
+
198
+ def split_into_words(text)
199
+ text = text.downcase.strip.gsub(/https?:\/\/[\S]+/, "").gsub(/[^0-9a-z@#_ ]/i, " ")
200
+ text.split(" ").map(&:strip)
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,138 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Urls
4
+ class Crawl < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "URL Crawler",
7
+ :description => "Enrich gathered URLs with HTTP status codes, content types and page titles",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "USER_AGENT" => {
11
+ :value => nil,
12
+ :description => "Specific HTTP User-Agent to use (randomized user-agents if not set)",
13
+ :required => false
14
+ },
15
+ "TIMEOUT" => {
16
+ :value => Birdwatcher::HttpClient::DEFAULT_TIMEOUT,
17
+ :description => "Request timeout in seconds",
18
+ :required => false
19
+ },
20
+ "RETRIES" => {
21
+ :value => Birdwatcher::HttpClient::DEFAULT_RETRIES,
22
+ :description => "Amount of retries on failed requests",
23
+ :required => false
24
+ },
25
+ "RETRY_FAILED" => {
26
+ :value => false,
27
+ :description => "Retry previously failed crawls",
28
+ :required => false,
29
+ :boolean => true
30
+ },
31
+ "PROXY_ADDR" => {
32
+ :value => nil,
33
+ :description => "HTTP proxy address to use for requests",
34
+ :required => false
35
+ },
36
+ "PROXY_PORT" => {
37
+ :value => nil,
38
+ :description => "HTTP proxy port to use for requests",
39
+ :required => false
40
+ },
41
+ "PROXY_USER" => {
42
+ :value => nil,
43
+ :description => "HTTP proxy user to use for requests",
44
+ :required => false
45
+ },
46
+ "PROXY_PASS" => {
47
+ :value => nil,
48
+ :description => "HTTP proxy user to use for requests",
49
+ :required => false
50
+ },
51
+ "THREADS" => {
52
+ :value => 10,
53
+ :description => "The number of concurrent threads",
54
+ :required => false
55
+ }
56
+ }
57
+ }
58
+
59
+ PAGE_TITLE_REGEX = /<title>(.*?)<\/title>/i
60
+
61
+ def self.info
62
+ <<-INFO
63
+ The URL Crawler module crawls shared URLs and enriches them with additional
64
+ information:
65
+
66
+ * HTTP status code (200, 404, 500, etc.)
67
+ * Content type (application/html, application/pdf, etc)
68
+ * Page title (if HTML document)
69
+
70
+ Page titles can be included in the Word Cloud generated with the
71
+ #{'statuses/word_cloud'.bold} module.
72
+
73
+ #{'CAUTION:'.bold} Depending on the users in the workspace, it might not be safe
74
+ to blindly request shared URLs. Consider using the #{'PROXY_ADDR'.bold} and #{'PROXY_PORT'.bold}
75
+ module options.
76
+ INFO
77
+ end
78
+
79
+ def run
80
+ if option_setting("RETRY_FAILED")
81
+ urls = current_workspace.urls_dataset
82
+ .where("crawled_at IS NULL or (crawled_at IS NOT NULL AND http_status IS NULL)")
83
+ .order(Sequel.desc(:created_at))
84
+ else
85
+ urls = current_workspace.urls_dataset
86
+ .where(:crawled_at => nil)
87
+ .order(Sequel.desc(:created_at))
88
+ end
89
+ if urls.empty?
90
+ error("There are currently no URLs in this workspace")
91
+ return false
92
+ end
93
+ threads = thread_pool(option_setting("THREADS").to_i)
94
+ http_client = Birdwatcher::HttpClient.new(
95
+ :timeout => option_setting("TIMEOUT").to_i,
96
+ :retries => option_setting("RETRIES").to_i,
97
+ :user_agent => option_setting("USER_AGENT"),
98
+ :http_proxyaddr => option_setting("PROXY_ADDR"),
99
+ :http_proxyport => (option_setting("PROXY_PORT") ? option_setting("PROXY_PORT").to_i : nil),
100
+ :http_proxyuser => option_setting("PROXY_USER"),
101
+ :http_proxypass => option_setting("PROXY_PASS")
102
+ )
103
+ urls.each do |url|
104
+ threads.process do
105
+ begin
106
+ Timeout::timeout(option_setting("TIMEOUT").to_i * 2) do
107
+ response = http_client.do_head(url.url)
108
+ url.final_url = response.url
109
+ url.http_status = response.status
110
+ url.content_type = response.headers["content-type"]
111
+ if response.headers.key?("content-type") && response.headers["content-type"].include?("text/html")
112
+ url.title = extract_page_title(http_client.do_get(response.url).body)
113
+ end
114
+ url.crawled_at = Time.now
115
+ url.save
116
+ info("Crawled #{url.url.bold} (#{response.status} - #{response.headers["content-type"]})")
117
+ end
118
+ rescue => e
119
+ url.crawled_at = Time.now
120
+ url.save
121
+ error("Crawling failed for #{url.url.bold} (#{e.class})")
122
+ end
123
+ end
124
+ end
125
+ threads.shutdown
126
+ end
127
+
128
+ private
129
+
130
+ def extract_page_title(body)
131
+ title = body.scan(PAGE_TITLE_REGEX).first
132
+ return nil if title.nil?
133
+ CGI.unescapeHTML(title.first)
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,98 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Urls
4
+ class MostShared < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "Most Shared URLs",
7
+ :description => "Lists shared URLs ordered from most to least shared",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "USERS" => {
11
+ :value => nil,
12
+ :description => "Space-separated list of screen names (all users if empty)",
13
+ :required => false
14
+ },
15
+ "MIN_SHARE_COUNT" => {
16
+ :value => 2,
17
+ :description => "Exclude URLS shared fewer times than specified",
18
+ :required => false
19
+ },
20
+ "SINCE" => {
21
+ :value => nil,
22
+ :description => "List URLs shared since specified time (last 7 days if empty)",
23
+ :required => false
24
+ },
25
+ "BEFORE" => {
26
+ :value => nil,
27
+ :description => "List URLs shared before specified time (from now if empty)",
28
+ :required => false
29
+ }
30
+ }
31
+ }
32
+
33
+ def self.info
34
+ <<-INFO
35
+ The Most Shared URLs module can show a simple list of shared URLs ordered from
36
+ most to least shared. If a URL has been shared by several people, it is a good
37
+ indication that it has important or interesting information.
38
+
39
+ To enhance the functionality of this module, it is recommended to run the
40
+ #{'urls/crawl'.bold} module first in order to get information on the URLs such
41
+ as HTTP status codes, content types and page titles. If the information is
42
+ available, this module will display it.
43
+ INFO
44
+ end
45
+
46
+ def run
47
+ if option_setting("SINCE")
48
+ since = parse_time(option_setting("SINCE")).strftime("%Y-%m-%d")
49
+ else
50
+ since = (Date.today - 7).strftime("%Y-%m-%d")
51
+ end
52
+ if option_setting("BEFORE")
53
+ before = parse_time(option_setting("BEFORE")).strftime("%Y-%m-%d")
54
+ else
55
+ before = Time.now.strftime("%Y-%m-%d")
56
+ end
57
+ if option_setting("USERS")
58
+ user_ids = current_workspace.users_dataset.where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip)).map(&:id)
59
+ urls = database["SELECT urls.url, urls.final_url, urls.title, urls.http_status, urls.content_type, count(statuses_urls.*) AS count
60
+ FROM urls
61
+ INNER JOIN statuses_urls
62
+ ON statuses_urls.url_id = urls.id
63
+ INNER JOIN statuses
64
+ ON statuses_urls.status_id = statuses.id
65
+ WHERE statuses.user_id IN ?
66
+ AND statuses.workspace_id = ?
67
+ AND DATE(statuses.posted_at) <= DATE(?)
68
+ AND DATE(statuses.posted_at) >= DATE(?)
69
+ GROUP BY urls.url, urls.final_url, urls.title, urls.http_status, urls.content_type
70
+ ORDER BY count DESC", user_ids, current_workspace.id, since, before].all
71
+ else
72
+ urls = database["SELECT urls.url, urls.final_url, urls.title, urls.http_status, urls.content_type, count(statuses_urls.*) AS count
73
+ FROM urls
74
+ INNER JOIN statuses_urls
75
+ ON statuses_urls.url_id = urls.id
76
+ INNER JOIN statuses
77
+ ON statuses_urls.status_id = statuses.id
78
+ WHERE statuses.workspace_id = ?
79
+ AND DATE(statuses.posted_at) <= DATE(?)
80
+ AND DATE(statuses.posted_at) >= DATE(?)
81
+ GROUP BY urls.url, urls.final_url, urls.title, urls.http_status, urls.content_type
82
+ ORDER BY count DESC", current_workspace.id, before, since].all
83
+ end
84
+ if urls.count.zero?
85
+ error("There are no URLs to display")
86
+ return false
87
+ end
88
+ text = ""
89
+ urls.each do |url|
90
+ next if option_setting("MIN_SHARE_COUNT") && url[:count] <= option_setting("MIN_SHARE_COUNT")
91
+ text += make_url_summary_output(url) + "\n#{Birdwatcher::Console::LINE_SEPARATOR}\n\n"
92
+ end
93
+ page_text(text)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,62 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Statuses
4
+ class ActivityPlot < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "Activity Plot",
7
+ :description => "Generates punchcard plot of a user's activity",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "DEST" => {
11
+ :value => nil,
12
+ :description => "Destination file",
13
+ :required => true
14
+ },
15
+ "USER" => {
16
+ :value => nil,
17
+ :description => "Screen name of user to analyze",
18
+ :required => true
19
+ },
20
+ "ONLY_REPLIES" => {
21
+ :value => false,
22
+ :description => "Only plot when the user replies to other users",
23
+ :required => false,
24
+ :boolean => true
25
+ }
26
+ }
27
+ }
28
+
29
+ def self.info
30
+ <<-INFO
31
+ The Activity Plot module can generate a punchcard plot of when a user is the
32
+ most engaged with Twitter. The plot can be used to find the most likely time
33
+ (day and hour) where a user will engage with Twitter content.
34
+
35
+ The generated file is in PNG format.
36
+ INFO
37
+ end
38
+
39
+ def run
40
+ if !user = current_workspace.users_dataset.first(:screen_name => option_setting("USER"))
41
+ error("User #{screen_name.bold} was not found in workspace")
42
+ return false
43
+ end
44
+ if option_setting("ONLY_REPLIES")
45
+ timestamps = user.statuses_dataset.where("text LIKE '@%'").map(&:posted_at)
46
+ else
47
+ timestamps = user.statuses.map(&:posted_at)
48
+ end
49
+ if timestamps.empty?
50
+ error("There are no statuses to process")
51
+ return false
52
+ end
53
+ punchcard = Birdwatcher::Punchcard.new(timestamps)
54
+ task("Generating activity plot from #{timestamps.count.to_s.bold} statuses...") do
55
+ punchcard.generate(option_setting("DEST"))
56
+ end
57
+ info("Activity plot written to #{option_setting('DEST').bold}")
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,61 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Users
4
+ class Import < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "User Importer",
7
+ :description => "Import users from a file containing screen names.",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "FILE" => {
11
+ :value => nil,
12
+ :description => "File to read screen names from.",
13
+ :required => true
14
+ }
15
+ }
16
+ }
17
+
18
+ def self.info
19
+ <<-INFO
20
+ The User Importer module is a simple module to add a large number of users to
21
+ the currently active workspace by parsing a file containing screen names.
22
+
23
+ The file is expected to contain one screen name per line, without the @ sign or
24
+ https://twitter.com/ in front of them.
25
+ INFO
26
+ end
27
+
28
+ def run
29
+ filename = File.expand_path(option_setting("FILE"))
30
+ if !File.exists?(filename)
31
+ error("File #{filename.bold} does not exist")
32
+ return false
33
+ end
34
+ if !File.readable?(filename)
35
+ error("File #{filename.bold} is not readable")
36
+ return false
37
+ end
38
+ threads = thread_pool
39
+ File.read(filename).each_line do |screen_name|
40
+ threads.process do
41
+ begin
42
+ screen_name.strip!
43
+ next if screen_name.empty?
44
+ if current_workspace.users_dataset.first(:screen_name => screen_name)
45
+ info("User #{screen_name.bold} is already in the workspace")
46
+ next
47
+ end
48
+ user = twitter_client.user(screen_name)
49
+ save_user(user)
50
+ info("Added #{screen_name.bold} to workspace")
51
+ rescue Twitter::Error::NotFound
52
+ error("There is no user with screen name: #{screen_name.bold}")
53
+ end
54
+ end
55
+ end
56
+ threads.shutdown
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,93 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Users
4
+ class InfluenceGraph < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "Influence Graph",
7
+ :description => "Graphs the influence between users from Klout",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "DEST" => {
11
+ :value => nil,
12
+ :description => "Destination file",
13
+ :required => true
14
+ },
15
+ "USERS" => {
16
+ :value => nil,
17
+ :description => "Space-separated list of screen names (all users if empty)",
18
+ :required => false
19
+ },
20
+ "FORMAT" => {
21
+ :value => "png",
22
+ :description => "Destination file format (any format supported by Graphviz)",
23
+ :required => true
24
+ }
25
+ }
26
+ }
27
+
28
+ def self.info
29
+ <<-INFO
30
+ The Influence Graph module generates an influence graph between users in the
31
+ currently active workspace. The graph can be used to identify who each user is
32
+ being influenced by as well as who each user influences.
33
+
34
+ The influence information is retrieved by the #{'users/klout_influence'.bold} so
35
+ be sure to run that module before running this one.
36
+
37
+ The generated graph is in PNG format.
38
+ INFO
39
+ end
40
+
41
+ def run
42
+ if !GraphViz::Constants::FORMATS.include?(option_setting("FORMAT"))
43
+ error("Unsupported format: #{option_setting('FORMAT').bold}")
44
+ return false
45
+ end
46
+ if screen_names = option_setting("USERS")
47
+ users = current_workspace.users_dataset
48
+ .where("screen_name IN ?", screen_names.split(" ").map(&:strip))
49
+ .order(:screen_name)
50
+ .eager(:influencers, :influencees)
51
+ else
52
+ users = current_workspace.users_dataset.order(:screen_name).eager(:influencers, :influencees)
53
+ end
54
+ if users.empty?
55
+ error("There are no users to process")
56
+ return false
57
+ end
58
+ graph = GraphViz.new(:G, :type => :digraph)
59
+ users_in_workspace = current_workspace.users.map(&:screen_name)
60
+ nodes = {}
61
+ influences = {}
62
+ users.each do |user|
63
+ influences[user.screen_name] ||= []
64
+ influences[user.screen_name] += user.influencees.select { |i| users_in_workspace.include?(i.screen_name) }.map(&:screen_name)
65
+ user.influencers.select { |i| users_in_workspace.include?(i.screen_name) }.map(&:screen_name).each do |influencer|
66
+ influences[influencer] ||= []
67
+ influences[influencer] << user.screen_name unless influences[influencer].include?(user.screen_name)
68
+ end
69
+ end
70
+ influences.each_pair do |user, influence|
71
+ influence.uniq!
72
+ next if influence.empty?
73
+ nodes[user] ||= graph.add_nodes(user)
74
+ influence.each do |influencee|
75
+ if influences[influencee] && influences[influencee].include?(user)
76
+ direction = "both"
77
+ influences[influencee].delete(user)
78
+ else
79
+ direction = "forward"
80
+ end
81
+ nodes[influencee] ||= graph.add_nodes(influencee)
82
+ graph.add_edges(nodes[user], nodes[influencee], :color => "lightblue", :fontcolor => "cornflowerblue", :dir => direction, :arrowhead => "normal")
83
+ end
84
+ end
85
+ task("Outputting graph...") do
86
+ graph.output(option_setting("FORMAT") => option_setting("DEST"))
87
+ end
88
+ info("Graph written to #{option_setting('DEST').bold}")
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end