birdwatcher 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/Rakefile +10 -0
  8. data/bin/console +42 -0
  9. data/birdwatcher.gemspec +40 -0
  10. data/data/english_stopwords.txt +319 -0
  11. data/data/top100Kenglishwords.txt +100000 -0
  12. data/db/migrations/001_create_workspaces.rb +11 -0
  13. data/db/migrations/002_create_users.rb +29 -0
  14. data/db/migrations/003_create_statuses.rb +28 -0
  15. data/db/migrations/004_create_mentions.rb +13 -0
  16. data/db/migrations/005_create_mentions_statuses.rb +8 -0
  17. data/db/migrations/006_create_hashtags.rb +11 -0
  18. data/db/migrations/007_create_hashtags_statuses.rb +8 -0
  19. data/db/migrations/008_create_urls.rb +16 -0
  20. data/db/migrations/009_create_statuses_urls.rb +8 -0
  21. data/db/migrations/010_create_klout_topics.rb +10 -0
  22. data/db/migrations/011_create_klout_topics_users.rb +8 -0
  23. data/db/migrations/012_create_influencers.rb +10 -0
  24. data/db/migrations/013_create_influencers_users.rb +8 -0
  25. data/db/migrations/014_create_influencees.rb +10 -0
  26. data/db/migrations/015_create_influencees_users.rb +8 -0
  27. data/exe/birdwatcher +12 -0
  28. data/lib/birdwatcher/command.rb +78 -0
  29. data/lib/birdwatcher/commands/back.rb +15 -0
  30. data/lib/birdwatcher/commands/exit.rb +16 -0
  31. data/lib/birdwatcher/commands/help.rb +60 -0
  32. data/lib/birdwatcher/commands/irb.rb +34 -0
  33. data/lib/birdwatcher/commands/module.rb +106 -0
  34. data/lib/birdwatcher/commands/query.rb +58 -0
  35. data/lib/birdwatcher/commands/query_csv.rb +56 -0
  36. data/lib/birdwatcher/commands/resource.rb +45 -0
  37. data/lib/birdwatcher/commands/run.rb +19 -0
  38. data/lib/birdwatcher/commands/schema.rb +116 -0
  39. data/lib/birdwatcher/commands/set.rb +56 -0
  40. data/lib/birdwatcher/commands/shell.rb +21 -0
  41. data/lib/birdwatcher/commands/show.rb +86 -0
  42. data/lib/birdwatcher/commands/status.rb +114 -0
  43. data/lib/birdwatcher/commands/unset.rb +37 -0
  44. data/lib/birdwatcher/commands/use.rb +25 -0
  45. data/lib/birdwatcher/commands/user.rb +155 -0
  46. data/lib/birdwatcher/commands/workspace.rb +176 -0
  47. data/lib/birdwatcher/concerns/concurrency.rb +25 -0
  48. data/lib/birdwatcher/concerns/core.rb +105 -0
  49. data/lib/birdwatcher/concerns/outputting.rb +114 -0
  50. data/lib/birdwatcher/concerns/persistence.rb +101 -0
  51. data/lib/birdwatcher/concerns/presentation.rb +122 -0
  52. data/lib/birdwatcher/concerns/util.rb +138 -0
  53. data/lib/birdwatcher/configuration.rb +63 -0
  54. data/lib/birdwatcher/configuration_wizard.rb +65 -0
  55. data/lib/birdwatcher/console.rb +201 -0
  56. data/lib/birdwatcher/http_client.rb +164 -0
  57. data/lib/birdwatcher/klout_client.rb +83 -0
  58. data/lib/birdwatcher/kml.rb +125 -0
  59. data/lib/birdwatcher/module.rb +253 -0
  60. data/lib/birdwatcher/modules/statuses/kml.rb +106 -0
  61. data/lib/birdwatcher/modules/statuses/sentiment.rb +77 -0
  62. data/lib/birdwatcher/modules/statuses/word_cloud.rb +205 -0
  63. data/lib/birdwatcher/modules/urls/crawl.rb +138 -0
  64. data/lib/birdwatcher/modules/urls/most_shared.rb +98 -0
  65. data/lib/birdwatcher/modules/users/activity_plot.rb +62 -0
  66. data/lib/birdwatcher/modules/users/import.rb +61 -0
  67. data/lib/birdwatcher/modules/users/influence_graph.rb +93 -0
  68. data/lib/birdwatcher/modules/users/klout_id.rb +62 -0
  69. data/lib/birdwatcher/modules/users/klout_influence.rb +83 -0
  70. data/lib/birdwatcher/modules/users/klout_score.rb +64 -0
  71. data/lib/birdwatcher/modules/users/klout_topics.rb +72 -0
  72. data/lib/birdwatcher/modules/users/social_graph.rb +110 -0
  73. data/lib/birdwatcher/punchcard.rb +183 -0
  74. data/lib/birdwatcher/util.rb +83 -0
  75. data/lib/birdwatcher/version.rb +3 -0
  76. data/lib/birdwatcher.rb +43 -0
  77. data/models/hashtag.rb +8 -0
  78. data/models/influencee.rb +8 -0
  79. data/models/influencer.rb +8 -0
  80. data/models/klout_topic.rb +8 -0
  81. data/models/mention.rb +8 -0
  82. data/models/status.rb +11 -0
  83. data/models/url.rb +8 -0
  84. data/models/user.rb +11 -0
  85. data/models/workspace.rb +26 -0
  86. metadata +405 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8682e5d657e206d66a18fd2ee17cab1e0e696ceb
4
+ data.tar.gz: ae207377669987a05746239339a508e5119b1020
5
+ SHA512:
6
+ metadata.gz: ecc9517484aca7ed325d66ae7e2e1cf074a5230d6c00ab961a4656c352e4bfe5dae1201984d2ef991bcb504c5d3bcfeb7c4a3f861e19c001e4bb3461c1f42f74
7
+ data.tar.gz: c234083a65ba5f810768b9a4a833eadd275388d66cf371c7e33fb980829f47772f9acb2a5e496bff5e9a10fb829dca33a5698cf5a1caba4f1ebdb543b2b363cc
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.2.3
5
+ before_install: gem install bundler -v 1.12.5
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in birdwatcher.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Michael Henriksen
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,481 @@
1
+ # Birdwatcher
2
+
3
+ [Birdwatcher](https://github.com/michenriksen/birdwatcher) is a data analysis and OSINT framework for Twitter. Birdwatcher supports creating multiple workspaces where arbitrary Twitter users can be added and their Tweets harvested through the Twitter API for offline storage and analysis. Birdwatcher comes with several modules which can be envoked to further enrich collected data or work with it, e.g. Retrieving user's Klout score, generating social graphs between users and weighted word clouds based on their Tweets.
4
+
5
+ ![Birdwatcher commands](img/birdwatcher_help.png)
6
+
7
+ ## Installation
8
+
9
+ ### 1. Ruby
10
+
11
+ Birdwatcher is written in [Ruby](https://www.ruby-lang.org/) and requires at least version 1.9.3 or above. To check which version of Ruby you have installed, simply run `ruby --version` in a terminal.
12
+
13
+ Should you have an older version installed, it is very easy to upgrade and manage different versions with the Ruby Version Manager ([RVM](https://rvm.io/)). Please see the [RVM website](https://rvm.io/) for installation instructions.
14
+
15
+ ### 2. RubyGems
16
+
17
+ Birdwatcher is packaged as a Ruby gem to make it easy to install and update. To install Ruby gems you'll need the RubyGems tool installed. To check if you have it already, type `gem` in a Terminal. If you got it already, it is recommended to do a quick `gem update --system` to make sure you have the latest and greatest version. In case you don't have it installed, download it from [here](https://rubygems.org/pages/download) and follow the simple installation instructions.
18
+
19
+ ### 3. PostgreSQL
20
+
21
+ Birdwatcher uses a PostgreSQL database to store all its data. If you are setting up Birdwatcher in the [Kali](https://www.kali.org/) linux distribution you already have it installed, you just need to make sure it's running by executing `service postgresql start` and perhaps install a dependency with `apt-get install libpq-dev` in a terminal. Here's an excellent [guide](https://www.digitalocean.com/community/tutorials/how-to-install-and-use-postgresql-9-4-on-debian-8) on how to install PostgreSQL on a Debian based Linux system. If you are setting up Birdwatcher on a Mac, the easiest way to install PostgreSQL is with [Homebrew](http://brew.sh/). Here's a [guide](http://exponential.io/blog/2015/02/21/install-postgresql-on-mac-os-x-via-brew/) on how to install PostgreSQL with Homebrew.
22
+
23
+ #### 3.1 PostgreSQL user and database
24
+
25
+ You need to set up a user and a database in PostgreSQL for Birdwatcher. Execute the following commands in a terminal:
26
+
27
+ sudo su postgres # Not necessary on Mac OS X
28
+ createuser -s birdwatcher --pwprompt
29
+ createdb -O birdwatcher birdwatcher
30
+
31
+ You now have a new PostgreSQL user with the name `birdwatcher` and with the password you typed into the prompt. You also created a database with the name `birdwatcher` which is owned by the `birdwatcher` user.
32
+
33
+ ### 4. Graphviz
34
+
35
+ Some Birdwatcher modules use [Graphviz](http://graphviz.org/) to generate visual graphs and other things. On a Mac you can install Graphviz with [homebrew](http://brew.sh/) by typing `brew update && brew install graphviz` in a terminal. On a Debian based Linux distro, Graphviz can be installed by typing `sudo apt-get update && sudo apt-get install graphviz` in a terminal.
36
+
37
+ ### 5. ImageMagick
38
+
39
+ Some Birdwatcher modules use [ImageMagick](https://imagemagick.org/script/index.php) to generate images. On a Mac you can install Imagemagick with [homebrew](http://brew.sh/) by typing `brew update && brew install imagemagick` in a terminal. On a Debian based Linux distro, ImageMagick can be installed by typing `sudo apt-get update && sudo apt-get install libmagickwand-dev imagemagick` in a terminal.
40
+
41
+ ### 6. Birdwatcher
42
+
43
+ Finally with all the dependencies in place, Birdwatcher can now be installed with a simple command in a terminal:
44
+
45
+ $ gem install birdwatcher
46
+
47
+ This will download and set up Birdwatcher and install all its code depencies.
48
+
49
+ ## Configuration
50
+
51
+ Birdwatcher needs to know a bit about what database to connect to as well as API keys to use for API communication. For Twitter, you will need to register an application in order to get a consumer key and consumer secret. Head over to [apps.twitter.com](https://apps.twitter.com/) and set up your application. You don't need to have a valid callback URL as we won't be doing any OAuth authentication.
52
+
53
+ To make Birdwatcher even more useful, it is recommended to also obtain an API key for the [Klout API](https://klout.com/s/developers/v2) which is required for some modules to work. If you don't intend to use Klout modules, you can of course skip this step.
54
+
55
+ On the first run, Birdwatcher will automatically start a configuration wizard where it will ask for the configuration it needs. If you have PostgreSQL connection details, Twitter consumer key & secret and optional Klout API key, you can start Birdwatcher for the first time:
56
+
57
+ $ birdwatcher
58
+
59
+ Enter the details into the configuration wizard:
60
+
61
+ ![Birdwatcher configuration wizard](img/birdwatcher_configuration.png)
62
+
63
+ As can be seen from the above screenshot, Birdwatcher supports multiple Twitter and Klout keys. If you configure Birdwatcher with several keys it will randomly shuffle between them when communicating with APIs to potentially avoid any rate limiting issues on extensive use.
64
+
65
+ Birdwatcher will save its configuration to `~/.birdwatcherrc`. Be careful not to push this file up to any public code repositories!
66
+
67
+ ### System pager
68
+
69
+ Birdwatcher pages long command output with the operating system's default pager command (usually `less`) however the command output can be colored and will show up strangely if the pager is not configured to render terminal colors. It is advised to add the following to your `~/.bash_profile` or similar file:
70
+
71
+ # Get color support for 'less'
72
+ export LESS="--raw-control-chars"
73
+ export PAGER="less"
74
+
75
+ This will set up `less` as your default pager command as well as configure `less` to support terminal colors. Execute the command `source ~/.bash_profile` to apply the configuration immediately instead of next time you open a terminal.
76
+
77
+ ## Usage
78
+
79
+ Birdwatcher is built as a console and if you have any experience with other frameworks such as [Metasploit](https://www.metasploit.com/) or [Recon-ng](https://bitbucket.org/LaNMaSteR53/recon-ng), you should feel right at home as Birdwatcher has many of the same concepts and commands.
80
+
81
+ ### Workspaces
82
+
83
+ Birdwatcher, like Metasploit and Recon-ng, works with the concept of Workspaces. Workspaces enable you to segment and manage users and data stored in the database. You can use workspaces to create logical separation between different users. For example, you may want to create a workspace for a company, a department or for a specific topic.
84
+
85
+ Birdwatcher will always show the currently active workspace inside the square brackets on the command prompt:
86
+
87
+ ![Birdwatcher prompt](img/birdwatcher_prompt.png)
88
+
89
+ There will always be a default workspace with the name `default` which might be enough if you plan to use Birdwatcher for a small group of Twitter users. Let's create a new workspace called `top5` that we can use for experimentation:
90
+
91
+ birdwatcher[default]> workspace create top5
92
+ [+] Created workspace: top5
93
+ birdwatcher[top5]>
94
+
95
+ The `workspace create` command created a new workspace and automatically made it the active workspace. Any user we add and any data we collect now will only be available in the `top5` workspace.
96
+
97
+ If we want to navigate between workspaces, we can do so by using the `workspace` command again:
98
+
99
+ birdwatcher[top5]> workspace use default
100
+ [+] Now using workspace: default
101
+ birdwatcher[default]> workspace use top5
102
+ [+] Now using workspace: top5
103
+ birdwatcher[top5]>
104
+
105
+ With the above commands we switched over to the `default` workspace and then back again to the `top5` workspace.
106
+
107
+ ### Commands
108
+
109
+ The core of the Birdwatcher framework is of course its commands. You already got an introduction to the `workspace` command in the previous section, but there are many more. One of the most important commands is `help`:
110
+
111
+ birdwatcher[top5]> help
112
+ [+] Available commands:
113
+
114
+ back Unloads current module
115
+ exit Exit Birdwatcher
116
+ help [COMMAND] Show help and detailed command usage
117
+ irb Start an interactive Ruby shell
118
+ module ACTION Show modules
119
+ query QUERY Execute SQL query
120
+ query_csv QUERY Execute SQL query and return result as CSV
121
+ resource FILE Execute commands from a resource file
122
+ run Run current module
123
+ schema [TABLE_NAME] Show schema for database table
124
+ set OPTION VALUE Set module option
125
+ shell COMMAND Execute shell command
126
+ show DETAILS Show module details and options
127
+ status [ACTION] Manage statuses
128
+ unset OPTION Unset module option
129
+ use MODULE_PATH Load specified module
130
+ user [ACTION] Manage users
131
+ workspace [ACTION] Manage workspaces
132
+
133
+ birdwatcher[top5]>
134
+
135
+ The help command simply lists the available commands with a short description of what they do. If we want to get more information on a specific command, e.g. the `workspace` command, we can execute the following:
136
+
137
+ birdwatcher[top5]> help workspace
138
+
139
+ Workspaces enable you to segment and manage users and data stored in the database.
140
+ You can use workspaces to create logical separation between different users.
141
+ For example, you may want to create a workspace for a company, a department or
142
+ for a specific topic.
143
+
144
+ There will always be a default workspace with the name default which might be enough
145
+ if you plan to use Birdwatcher for a small group of Twitter users.
146
+
147
+ USAGE:
148
+
149
+ List available workspaces:
150
+ workspace list
151
+
152
+ Create a new workspace:
153
+ workspace create NAME [DESCRIPTION]
154
+
155
+ Switch to a workspace:
156
+ workspace use NAME
157
+
158
+ Delete a workspace:
159
+ workspace delete NAME
160
+
161
+ Rename a workspace
162
+ workspace rename NAME NEW_NAME
163
+
164
+ birdwatcher[top5]>
165
+
166
+ Another core command in Birdwatcher is the `user` command which can be used to add users to the workspace as well as updating or removing them at a later point.
167
+
168
+ As an example, let's add the top 5 most followed Twitter users from [this list](http://twittercounter.com/pages/100). At the time of writing it is:
169
+
170
+ * Katy Perry ([@katyperry](https://twitter.com/katyperry))
171
+ * Justin Bieber ([@justinbieber](https://twitter.com/justinbieber))
172
+ * Taylor Swift ([@taylorswift13](https://twitter.com/taylorswift13))
173
+ * Barack Obama ([@BarackObama](https://twitter.com/BarackObama))
174
+ * Rihanna ([@rihanna](https://twitter.com/rihanna))
175
+
176
+ Execute the following command to add them to the workspace:
177
+
178
+ birdwatcher[top5]> user add katyperry justinbieber taylorswift13 BarackObama rihanna
179
+ [+] Added katyperry to workspace
180
+ [+] Added justinbieber to workspace
181
+ [+] Added taylorswift13 to workspace
182
+ [+] Added BarackObama to workspace
183
+ [+] Added rihanna to workspace
184
+ birdwatcher[top5]>
185
+
186
+ Birdwatcher fetched basic information on the users through the Twitter API. A summary of users in the workspace can be seen with the `user list` command. For more information on what the `user` command can do, simply enter `help user`.
187
+
188
+ Now that we have a couple of users in the workspace we can look at the `status` command which is another core command of the Birdwatcher framework:
189
+
190
+ birdwatcher[top5]> status fetch
191
+ [+] Fetching statuses for BarackObama... done
192
+ [+] Processing 1000 statuses... done
193
+ [+] Fetching statuses for justinbieber... done
194
+ [+] Processing 997 statuses... done
195
+ [+] Fetching statuses for katyperry... done
196
+ [+] Processing 998 statuses... done
197
+ [+] Fetching statuses for rihanna... done
198
+ [+] Processing 999 statuses... done
199
+ [+] Fetching statuses for taylorswift13... done
200
+ [+] Processing 996 statuses... done
201
+ birdwatcher[top5]>
202
+
203
+ The `status fetch` command fetches up to 1.000 statuses from the users in the workspace and saves them to the underlying database. The statuses are also processed to extract URLs, mentions and hashtags to separate database tables. Running `status fetch` at a later time will fetch any new statuses that the users might have posted since last fetch.
204
+
205
+ We can page through the last 1.000 statuses across all the users with the `status list` command:
206
+
207
+ . . . .
208
+
209
+ Justin Bieber (@justinbieber) * Oct 5, 23:50
210
+ Proud of my buddies https://t.co/4vc0qfmSyA
211
+ Favorites: 51505 | Retweets: 22434
212
+
213
+ ================================================================================
214
+
215
+ KATY PERRY (@katyperry) * Oct 5, 23:36
216
+ You hear it as "excellent" I hear it as "egg salad with lint" 🤔
217
+ Favorites: 14204 | Retweets: 4340
218
+
219
+ ================================================================================
220
+
221
+ Justin Bieber (@justinbieber) * Oct 5, 23:29
222
+ Great show https://t.co/jgtRbU4RHC
223
+ Favorites: 50799 | Retweets: 22617
224
+
225
+ ================================================================================
226
+
227
+ Barack Obama (@BarackObama) * Oct 5, 21:30
228
+ This historic step in the fight to #ActOnClimate came faster than anyone predicted. https://t.co/W2rtcNXkI7
229
+ Favorites: 5053 | Retweets: 1254
230
+
231
+ ================================================================================
232
+
233
+ Barack Obama (@BarackObama) * Oct 5, 20:44
234
+ "Today is a historic day in the fight to protect our planet for future generations." —President Obama #ActOnClimate https://t.co/x3dJSCYUcj
235
+ Favorites: 5500 | Retweets: 1652
236
+
237
+ . . . .
238
+
239
+ It is also possible to page through statuses which contain a certain word or phrase with the `status search <word>` command. See `help status` for more usage on the `status` command.
240
+
241
+ ### Modules
242
+
243
+ Being able to fetch Twitter users and statuses from the API is cool and all, but if that was all Birdwatcher would just be an offline Twitter client. Modules are where the fun begins. Modules either enrich collected data with more data (e.g. Klout score) or do some sort of work on the collected data.
244
+
245
+ Here are some of the things the modules can do:
246
+
247
+ * Generate weighted word clouds based on statuses
248
+ * Listing the most shared URLs in a certain time frame
249
+ * Generate visual social graphs between users
250
+ * Crawl shared URLs to fetch status codes, content types and page titles
251
+ * Generate [KML](https://developers.google.com/kml/) files with geo-enabled statuses for viewing in [Google Earth](https://www.google.com/earth/)
252
+ * Generate Punchard-style plots of when users are most engaged with Twitter
253
+
254
+ To see all available modules, use the `module list` command:
255
+
256
+ birdwatcher[top5]> module list
257
+ [+] Available Modules:
258
+
259
+ Name: KML Document
260
+ Description: Creates a KML document of statuses with Geo locations
261
+ Path: statuses/kml
262
+
263
+ ================================================================================
264
+
265
+ Name: Status Sentiment Analysis
266
+ Description: Enrich statuses with sentiment score
267
+ Path: statuses/sentiment
268
+
269
+ ================================================================================
270
+
271
+ . . . .
272
+
273
+ ================================================================================
274
+
275
+ Name: User Klout Topics
276
+ Description: Enrich users with their Klout topics
277
+ Path: users/klout_topics
278
+
279
+ ================================================================================
280
+
281
+ Name: Social Graph
282
+ Description: Graphs the social relations between users
283
+ Path: users/social_graph
284
+
285
+ ================================================================================
286
+
287
+ birdwatcher[top5]>
288
+
289
+ The name, description and _path_ is listed for each available module. The module path is how the modules are divided up into folders on the file system. The folders can be seen as _namespaces_ and gives a clue about what sort of objects they work on.
290
+
291
+ The path is used to select modules with the `use` command:
292
+
293
+ birdwatcher[top5]> use statuses/word_cloud
294
+ birdwatcher[top5][statuses/word_cloud]>
295
+
296
+ The `use` command simply loads a module by providing the modules path as an argument. This also changes the command prompt to display the currently loaded module in square brackets next to the currently active workspace.
297
+
298
+ Now that we are inside the `statuses/word_cloud` module we may want to get a bit more information about it:
299
+
300
+ birdwatcher[top5][statuses/word_cloud]> show info
301
+
302
+ Name: Word Cloud
303
+ Description: Generates a word cloud from statuses
304
+ Author: Michael Henriksen <michenriksen@neomailbox.ch>
305
+ Path: statuses/word_cloud
306
+
307
+ ================================================================================
308
+
309
+ The Word Cloud module can generate a classic weighted word cloud from words used
310
+ in statuses across all or specific users and between different times.
311
+
312
+ The module is heavily configurable; have a look at the options with show options
313
+
314
+ Please note that configuring the module with a long timespan might result in a
315
+ very long execution time when the word cloud image is generated.
316
+
317
+ The generated image will be in PNG format.
318
+
319
+ birdwatcher[top5][statuses/word_cloud]>
320
+
321
+ The `show info` command shows additional information on the module if available. It can also be used to see any configuration options the module might have:
322
+
323
+ birdwatcher[top5][statuses/word_cloud]> show options
324
+
325
+ ------------------------------------------------------------------------------------------------------------------------------------------
326
+ Name Current Setting Required Description
327
+ ------------------------------------------------------------------------------------------------------------------------------------------
328
+ DEST yes Destination file
329
+ USERS no Space-separated list of screen names (all users if empty)
330
+ SINCE no Process statuses posted since specified time (last 7 days if empty)
331
+ BEFORE no Process statuses posted before specified time (from now if empty)
332
+ MIN_WORD_COUNT 3 no Exclude words mentioned fewer times than specified
333
+ MIN_WORD_LENGTH 3 no Exclude words smaller than specified
334
+ EXCLUDE_STOPWORDS true no Exclude english stopwords
335
+ EXCLUDE_COMMON true no Exclude common english words
336
+ EXCLUDE_WORDS no Space-separated list of words to exclude
337
+ EXCLUDE_HASHTAGS false no Exclude Hashtags
338
+ EXCLUDE_MENTIONS true no Exclude @username mentions
339
+ INCLUDE_PAGE_TITLES false no Include web page titles from shared URLs (requires crawling with urls/crawl)
340
+ WORD_CAP 200 no Cap list of words to specified amount
341
+ PALETTE #8F99AB #A3ADC2 #272A2F ... yes Space-separated list of hex color codes to use for word cloud
342
+ IMAGE_WIDTH 1024 yes Image width in pixels
343
+ IMAGE_HEIGHT 1024 yes Image height in pixels
344
+ ------------------------------------------------------------------------------------------------------------------------------------------
345
+
346
+ birdwatcher[top5][statuses/word_cloud]>
347
+
348
+ The `show options` command shows all of the configuration options for the loaded module. The `statuses/word_cloud` module happens to be one of the more configurable modules, but looking at the table we can see that only required option we need to set is the `DEST` option which tells the module where to write the final word cloud image. Because the workspace doesn't contain a whole lot of users we will also set the `SINCE` option to `6 months ago` in order to make a word cloud of what the Top 5 Twitter users have been talking about through the last 6 months. On a side note, all module options that have something to do with dates and times are processed with [Chronic](https://github.com/mojombo/chronic) gem for natural language processing; see the [examples](https://github.com/mojombo/chronic#examples) for what is supported.
349
+
350
+ birdwatcher[top5][statuses/word_cloud]> set DEST /tmp/wordcloud.png
351
+ birdwatcher[top5][statuses/word_cloud]> set SINCE 6 months ago
352
+ birdwatcher[top5][statuses/word_cloud]> run
353
+ [+] Processing 1542 statuses... done
354
+ [+] Generating word cloud, patience please... done
355
+ [+] Word cloud written to /tmp/wordcloud.png
356
+ birdwatcher[top5][statuses/word_cloud]>
357
+
358
+ After a couple of seconds the module wrote the word cloud image to `/tmo/wordcloud.png`. The result is:
359
+
360
+ ![Word cloud of Top-5 Twitter users](img/wordcloud.png)
361
+
362
+ ### Power User Features
363
+
364
+ Since everything is stored in a database, it is possible to perform arbitrary SQL queries against the data if you know the SQL language:
365
+
366
+ #### Raw SQL
367
+
368
+ birdwatcher[top5]> query select name, screen_name, followers_count from users where workspace_id = 2 order by followers_count DESC
369
+
370
+ +---------------+---------------+-----------------+
371
+ | name | screen_name | followers_count |
372
+ +---------------+---------------+-----------------+
373
+ | KATY PERRY | katyperry | 93379238 |
374
+ | Justin Bieber | justinbieber | 88715066 |
375
+ | Taylor Swift | taylorswift13 | 81158756 |
376
+ | Barack Obama | BarackObama | 77850900 |
377
+ | Rihanna | rihanna | 66491106 |
378
+ +---------------+---------------+-----------------+
379
+
380
+ The `query` command will execute any SQL query it receives as an argument and output the result in a table. As can be seen in the command example, the `query` command does not know how to scope the data to the currently active workspace, so you will have to take care of that in your queries. Usually rows can be scoped by filtering on `workspace_id = ?` in tables. The current workspace ID can be retrieved by issuing the `workspace` command.
381
+
382
+ #### Outputting in CSV Format
383
+
384
+ If you want to extract data with raw SQL and want it to be easily parsable by other applications or code, you can use the `query_csv` command. It works similarly to `query` but outputs the result in CSV format:
385
+
386
+ birdwatcher[top5]> query_csv select name, screen_name, followers_count from users where workspace_id = 2 order by followers_count DESC
387
+ name,screen_name,followers_count
388
+ KATY PERRY,katyperry,93379238
389
+ Justin Bieber,justinbieber,88715066
390
+ Taylor Swift,taylorswift13,81158756
391
+ Barack Obama,BarackObama,77850900
392
+ Rihanna,rihanna,66491106
393
+
394
+ #### Getting Schema Information
395
+
396
+ Performing raw SQL queries against Birdwatcher's database can be hard if you don't know how the tables are layed out. To get a list of available tables to inspect, you can use the `schema` command:
397
+
398
+ birdwatcher[default]> schema
399
+ [+] Available tables:
400
+
401
+ * hashtags
402
+ * hashtags_statuses
403
+ * influencees
404
+ * influencees_users
405
+ * influencers
406
+ * influencers_users
407
+ * klout_topics
408
+ * klout_topics_users
409
+ * mentions
410
+ * mentions_statuses
411
+ * schema_info
412
+ * statuses
413
+ * statuses_urls
414
+ * urls
415
+ * users
416
+ * workspaces
417
+
418
+ birdwatcher[default]>
419
+
420
+ Giving the `schema` command a table name as an argument will show detailed schema information, indexes and foreign keys for that table:
421
+
422
+ birdwatcher[default]> schema urls
423
+ [+] Schema information for table urls:
424
+
425
+ +--------------+-----------------------------+----------------------------------+------------+-------------+
426
+ | Column Name | Type | Default | Allow NULL | Primary Key |
427
+ +--------------+-----------------------------+----------------------------------+------------+-------------+
428
+ | id | integer | nextval('urls_id_seq'::regclass) | No | Yes |
429
+ | workspace_id | integer | NULL | Yes | No |
430
+ | url | text | NULL | Yes | No |
431
+ | final_url | text | NULL | Yes | No |
432
+ | http_status | integer | NULL | Yes | No |
433
+ | content_type | text | NULL | Yes | No |
434
+ | title | text | NULL | Yes | No |
435
+ | crawled_at | timestamp without time zone | NULL | Yes | No |
436
+ | updated_at | timestamp without time zone | NULL | Yes | No |
437
+ | created_at | timestamp without time zone | NULL | Yes | No |
438
+ +--------------+-----------------------------+----------------------------------+------------+-------------+
439
+
440
+ [+] Indexes on table urls:
441
+
442
+ +-------------------------+--------------+--------+
443
+ | Index Name | Column(s) | Unique |
444
+ +-------------------------+--------------+--------+
445
+ | urls_content_type_index | content_type | No |
446
+ | urls_crawled_at_index | crawled_at | No |
447
+ | urls_created_at_index | created_at | No |
448
+ | urls_final_url_index | final_url | No |
449
+ | urls_http_status_index | http_status | No |
450
+ | urls_title_index | title | No |
451
+ | urls_updated_at_index | updated_at | No |
452
+ | urls_url_index | url | No |
453
+ | urls_workspace_id_index | workspace_id | No |
454
+ +-------------------------+--------------+--------+
455
+
456
+ [+] Foreign keys on table urls:
457
+
458
+ +--------------+------------------+----------------------+
459
+ | Column(s) | Referenced Table | Referenced Column(s) |
460
+ +--------------+------------------+----------------------+
461
+ | workspace_id | workspaces | id |
462
+ +--------------+------------------+----------------------+
463
+
464
+ birdwatcher[default]>
465
+
466
+ If you want a graphical ER diagram, you can [find one here](img/birdwatcher_schema.png)
467
+
468
+ #### Interactive Ruby Shell
469
+
470
+ If you really know what you are doing and know the ins and outs of Birdwatcher's code, you can use the `irb` command to get an interactive Ruby shell ([IRB](http://ruby-doc.org/stdlib-2.0.0/libdoc/irb/rdoc/IRB.html)) where all of Birdwatcher's classes and models are available. You can use this command for debugging or for more complex, one-off data manipulation where building a module isn't suitable.
471
+
472
+ ## Development
473
+
474
+ ### Modules
475
+
476
+ For information on how to create a Birdwatcher module, please see this [Wiki page](https://github.com/michenriksen/birdwatcher/wiki/Creating-a-Birdwatcher-Module).
477
+
478
+ ## License
479
+
480
+ Birdwatcher is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
481
+
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :test
data/bin/console ADDED
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "birdwatcher"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ begin
14
+ Sequel.extension :migration, :core_extensions
15
+ DB = Sequel.connect(Birdwatcher::Configuration.get!(:database_connection_uri))
16
+ Sequel::Model.db = DB
17
+ Sequel::Model.plugin :timestamps
18
+ Dir[File.expand_path(File.join(File.dirname(__FILE__), "..", "models", "*.rb"))].each do |file|
19
+ require file
20
+ end
21
+ rescue Birdwatcher::Configuration::UnknownKey
22
+ puts "WARNING: Database connection has not been configured"
23
+ puts "Sequel and models will not work."
24
+ rescue => e
25
+ puts "ERROR: An exception was raised when setting up Sequel: #{e.class}: #{e.message}"
26
+ puts "Sequel and models will not work."
27
+ end
28
+
29
+ def twitter_client
30
+ keypair = Birdwatcher::Configuration.get!(:twitter).sample
31
+ Twitter::REST::Client.new do |config|
32
+ config.consumer_key = keypair["consumer_key"]
33
+ config.consumer_secret = keypair["consumer_secret"]
34
+ end
35
+ rescue Birdwatcher::Configuration::UnknownKey
36
+ puts "ERROR: Birdwatcher has not been configured with any Twitter keys"
37
+ end
38
+
39
+ require "irb"
40
+ require "awesome_print"
41
+ AwesomePrint.irb!
42
+ IRB.start
@@ -0,0 +1,40 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'birdwatcher/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "birdwatcher"
8
+ spec.version = Birdwatcher::VERSION
9
+ spec.authors = ["Michael Henrikesn"]
10
+ spec.email = ["michenriksen@neomailbox.ch"]
11
+
12
+ spec.summary = %q{Data analysis and OSINT framework for Twitter}
13
+ spec.homepage = "https://github.com/michenriksen/birdwatcher"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features|doc|img)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "sequel", "~> 4.38"
22
+ spec.add_dependency "pg", "~> 0.19.0"
23
+ spec.add_dependency "twitter", "~> 5.16"
24
+ spec.add_dependency "colorize", "~> 0.8.1"
25
+ spec.add_dependency "thread", "~> 0.2.2"
26
+ spec.add_dependency "httparty", "~> 0.14.0"
27
+ spec.add_dependency "highline", "~> 1.7", ">= 1.7.8"
28
+ spec.add_dependency "terminal-table", "~> 1.7", ">= 1.7.3"
29
+ spec.add_dependency "tty-pager", "~> 0.4.0"
30
+ spec.add_dependency "sentimental", "~> 1.4"
31
+ spec.add_dependency "ruby-graphviz", "~> 1.2", ">= 1.2.2"
32
+ spec.add_dependency "chronic", "~> 0.10.2"
33
+ spec.add_dependency "magic_cloud", "~> 0.0.3"
34
+ spec.add_dependency "cairo", "~> 1.15", ">= 1.15.2"
35
+ spec.add_dependency "awesome_print", "~> 1.7"
36
+
37
+ spec.add_development_dependency "bundler", "~> 1.12"
38
+ spec.add_development_dependency "rake", "~> 10.0"
39
+ spec.add_development_dependency "minitest", "~> 5.0"
40
+ end