birdwatcher 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/Rakefile +10 -0
  8. data/bin/console +42 -0
  9. data/birdwatcher.gemspec +40 -0
  10. data/data/english_stopwords.txt +319 -0
  11. data/data/top100Kenglishwords.txt +100000 -0
  12. data/db/migrations/001_create_workspaces.rb +11 -0
  13. data/db/migrations/002_create_users.rb +29 -0
  14. data/db/migrations/003_create_statuses.rb +28 -0
  15. data/db/migrations/004_create_mentions.rb +13 -0
  16. data/db/migrations/005_create_mentions_statuses.rb +8 -0
  17. data/db/migrations/006_create_hashtags.rb +11 -0
  18. data/db/migrations/007_create_hashtags_statuses.rb +8 -0
  19. data/db/migrations/008_create_urls.rb +16 -0
  20. data/db/migrations/009_create_statuses_urls.rb +8 -0
  21. data/db/migrations/010_create_klout_topics.rb +10 -0
  22. data/db/migrations/011_create_klout_topics_users.rb +8 -0
  23. data/db/migrations/012_create_influencers.rb +10 -0
  24. data/db/migrations/013_create_influencers_users.rb +8 -0
  25. data/db/migrations/014_create_influencees.rb +10 -0
  26. data/db/migrations/015_create_influencees_users.rb +8 -0
  27. data/exe/birdwatcher +12 -0
  28. data/lib/birdwatcher/command.rb +78 -0
  29. data/lib/birdwatcher/commands/back.rb +15 -0
  30. data/lib/birdwatcher/commands/exit.rb +16 -0
  31. data/lib/birdwatcher/commands/help.rb +60 -0
  32. data/lib/birdwatcher/commands/irb.rb +34 -0
  33. data/lib/birdwatcher/commands/module.rb +106 -0
  34. data/lib/birdwatcher/commands/query.rb +58 -0
  35. data/lib/birdwatcher/commands/query_csv.rb +56 -0
  36. data/lib/birdwatcher/commands/resource.rb +45 -0
  37. data/lib/birdwatcher/commands/run.rb +19 -0
  38. data/lib/birdwatcher/commands/schema.rb +116 -0
  39. data/lib/birdwatcher/commands/set.rb +56 -0
  40. data/lib/birdwatcher/commands/shell.rb +21 -0
  41. data/lib/birdwatcher/commands/show.rb +86 -0
  42. data/lib/birdwatcher/commands/status.rb +114 -0
  43. data/lib/birdwatcher/commands/unset.rb +37 -0
  44. data/lib/birdwatcher/commands/use.rb +25 -0
  45. data/lib/birdwatcher/commands/user.rb +155 -0
  46. data/lib/birdwatcher/commands/workspace.rb +176 -0
  47. data/lib/birdwatcher/concerns/concurrency.rb +25 -0
  48. data/lib/birdwatcher/concerns/core.rb +105 -0
  49. data/lib/birdwatcher/concerns/outputting.rb +114 -0
  50. data/lib/birdwatcher/concerns/persistence.rb +101 -0
  51. data/lib/birdwatcher/concerns/presentation.rb +122 -0
  52. data/lib/birdwatcher/concerns/util.rb +138 -0
  53. data/lib/birdwatcher/configuration.rb +63 -0
  54. data/lib/birdwatcher/configuration_wizard.rb +65 -0
  55. data/lib/birdwatcher/console.rb +201 -0
  56. data/lib/birdwatcher/http_client.rb +164 -0
  57. data/lib/birdwatcher/klout_client.rb +83 -0
  58. data/lib/birdwatcher/kml.rb +125 -0
  59. data/lib/birdwatcher/module.rb +253 -0
  60. data/lib/birdwatcher/modules/statuses/kml.rb +106 -0
  61. data/lib/birdwatcher/modules/statuses/sentiment.rb +77 -0
  62. data/lib/birdwatcher/modules/statuses/word_cloud.rb +205 -0
  63. data/lib/birdwatcher/modules/urls/crawl.rb +138 -0
  64. data/lib/birdwatcher/modules/urls/most_shared.rb +98 -0
  65. data/lib/birdwatcher/modules/users/activity_plot.rb +62 -0
  66. data/lib/birdwatcher/modules/users/import.rb +61 -0
  67. data/lib/birdwatcher/modules/users/influence_graph.rb +93 -0
  68. data/lib/birdwatcher/modules/users/klout_id.rb +62 -0
  69. data/lib/birdwatcher/modules/users/klout_influence.rb +83 -0
  70. data/lib/birdwatcher/modules/users/klout_score.rb +64 -0
  71. data/lib/birdwatcher/modules/users/klout_topics.rb +72 -0
  72. data/lib/birdwatcher/modules/users/social_graph.rb +110 -0
  73. data/lib/birdwatcher/punchcard.rb +183 -0
  74. data/lib/birdwatcher/util.rb +83 -0
  75. data/lib/birdwatcher/version.rb +3 -0
  76. data/lib/birdwatcher.rb +43 -0
  77. data/models/hashtag.rb +8 -0
  78. data/models/influencee.rb +8 -0
  79. data/models/influencer.rb +8 -0
  80. data/models/klout_topic.rb +8 -0
  81. data/models/mention.rb +8 -0
  82. data/models/status.rb +11 -0
  83. data/models/url.rb +8 -0
  84. data/models/user.rb +11 -0
  85. data/models/workspace.rb +26 -0
  86. metadata +405 -0
@@ -0,0 +1,83 @@
1
+ module Birdwatcher
2
+ class KloutClient < Birdwatcher::HttpClient
3
+ base_uri "https://api.klout.com/v2"
4
+
5
+ # Class initializer
6
+ #
7
+ # @param api_key [String] Klout API key
8
+ # @param options Http client options
9
+ # @see Birdwatcher::HttpClient
10
+ def initialize(api_key, options = {})
11
+ @api_key = api_key
12
+ @options = {
13
+ :headers => {
14
+ "User-Agent" => "Birdwatcher v#{Birdwatcher::VERSION}",
15
+ "Accept" => "application/json"
16
+ }
17
+ }.merge(options)
18
+ end
19
+
20
+ # Get Klout ID of a Twitter user
21
+ #
22
+ # @param screen_name [String] Twitter screen name
23
+ # @return [String] Klout ID or nil
24
+ # @see https://klout.com/s/developers/v2#identities
25
+ def get_id(screen_name)
26
+ response = do_get("/identity.json/twitter?screenName=#{url_encode(screen_name)}&key=#{url_encode(@api_key)}")
27
+ if response.status == 200
28
+ JSON.parse(response.body)["id"]
29
+ end
30
+ end
31
+
32
+ # Get Klout score of a user
33
+ #
34
+ # @param klout_id [String]
35
+ # @return [Numeric] Klout score or nil
36
+ # @see https://klout.com/s/developers/v2#scores
37
+ def get_score(klout_id)
38
+ response = do_get("/user.json/#{klout_id}/score?key=#{url_encode(@api_key)}")
39
+ if response.status == 200
40
+ JSON.parse(response.body)["score"]
41
+ end
42
+ end
43
+
44
+ # Get Klout topics of a user
45
+ #
46
+ # @param klout_id [String]
47
+ # @return [Array] Topics
48
+ # @see https://klout.com/s/developers/v2#topic
49
+ def get_topics(klout_id)
50
+ response = do_get("/user.json/#{klout_id}/topics?key=#{url_encode(@api_key)}")
51
+ if response.status == 200
52
+ JSON.parse(response.body).map { |t| t["displayName"] }
53
+ end
54
+ end
55
+
56
+ # Get Klout influence graph of a user
57
+ #
58
+ # @param klout_id [String]
59
+ # @return [Hash] +:influencers:+ contains screen names of influencers, +:influencees+ contains screen names of influencees
60
+ # @see https://klout.com/s/developers/v2#influence
61
+ def get_influence(klout_id)
62
+ response = do_get("/user.json/#{klout_id}/influence?key=#{url_encode(@api_key)}")
63
+ if response.status == 200
64
+ body = JSON.parse(response.body)
65
+ {
66
+ :influencers => body["myInfluencers"].map { |i| i["entity"]["payload"]["nick"] },
67
+ :influencees => body["myInfluencees"].map { |i| i["entity"]["payload"]["nick"] }
68
+ }
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ # URL encode a string
75
+ # @private
76
+ #
77
+ # @param string [String]
78
+ # @return [String] URL encoded string
79
+ def url_encode(string)
80
+ CGI.escape(string.to_s)
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,125 @@
1
+ module Birdwatcher
2
+ # KML Document generator
3
+ #
4
+ # KML is a file format used to display geographic data in an Earth browser
5
+ # such as Google Earth. You can create KML files to pinpoint locations, add
6
+ # image overlays, and expose rich data in new ways. KML is an international
7
+ # standard maintained by the Open Geospatial Consortium, Inc. (OGC).
8
+ #
9
+ # This class supports generating basic KML documents with Placemarks and Folders.
10
+ #
11
+ # @note Attribute values ARE NOT automatically escaped. All values will have to be given in an HTML escaped fashion if there is a risk that they might contain unexpected or dangerous HTML.
12
+ # @see https://developers.google.com/kml/
13
+ class KML
14
+ # KML document header
15
+ DOCUMENT_HEADER =
16
+ <<-HEAD
17
+ <?xml version="1.0" encoding="UTF-8"?>
18
+ <kml xmlns="http://www.opengis.net/kml/2.2">
19
+ <Document>
20
+ HEAD
21
+
22
+ # KML document footer
23
+ DOCUMENT_FOOTER =
24
+ <<-FOOT
25
+ </Document>
26
+ </kml>
27
+ FOOT
28
+
29
+ class Error < StandardError; end
30
+ class UnknownFolderError < Birdwatcher::KML::Error; end
31
+
32
+ # Class initializer
33
+ #
34
+ # @param attributes [Hash] Document attributes
35
+ # @see https://developers.google.com/kml/documentation/kmlreference#document
36
+ def initialize(attributes = {})
37
+ @attributes = attributes
38
+ @folders = {}
39
+ @placemarks = []
40
+ end
41
+
42
+ # Add a Placemark
43
+ #
44
+ # @param attributes [Hash] Placemark attributes
45
+ # @see https://developers.google.com/kml/documentation/kmlreference#placemark
46
+ def add_placemark(attributes)
47
+ @placemarks << attributes
48
+ end
49
+
50
+ # Add a Folder
51
+ #
52
+ # @param id [String] Folder ID
53
+ # @param attributes [Hash] Folder attributes
54
+ #
55
+ # @see https://developers.google.com/kml/documentation/kmlreference#folder
56
+ def add_folder(id, attributes)
57
+ @folders[id] = {
58
+ :placemarks => []
59
+ }.merge(attributes)
60
+ end
61
+
62
+ # Add a Placemark to a Folder
63
+ #
64
+ # @param folder_id [String]
65
+ # @param attributes [Hash] Placemark attributes
66
+ #
67
+ # @raise [Birdwatcher::KML::UnknownFolderError] if folder doesn't exist
68
+ # @see https://developers.google.com/kml/documentation/kmlreference#placemark
69
+ def add_placemark_to_folder(folder_id, attributes)
70
+ fail(UnknownFolderError, "There is no folder with id: #{folder_id}") unless @folders.key?(folder_id)
71
+ @folders[folder_id][:placemarks] << attributes
72
+ end
73
+
74
+ # Generate the KML document
75
+ #
76
+ # @return the final KML document
77
+ def generate
78
+ output = generate_document_header
79
+ @folders.each_pair { |id, attributes| output += generate_folder(id, attributes) }
80
+ output += @placemarks.map { |p| generate_placemark(p) }.join
81
+ output += generate_document_footer
82
+ end
83
+
84
+ private
85
+
86
+ # Generate document header
87
+ # @private
88
+ def generate_document_header
89
+ header = DOCUMENT_HEADER
90
+ @attributes.each_pair { |k, v| header += "<#{k}>#{escape(v)}</#{k}>\n" }
91
+ header
92
+ end
93
+
94
+ # Generate document footer
95
+ # @private
96
+ def generate_document_footer
97
+ DOCUMENT_FOOTER
98
+ end
99
+
100
+ # Generate Placemark element
101
+ # @private
102
+ def generate_placemark(attributes)
103
+ placemark = attributes.key?(:id) ? "<Placemark id='#{escape(attributes[:id])}'>" : "<Placemark>"
104
+ attributes.delete(:id)
105
+ attributes.each_pair { |k, v| placemark += "<#{k}>#{v}</#{k}>\n" }
106
+ placemark += "</Placemark>\n"
107
+ end
108
+
109
+ # Generate Folder element
110
+ # @private
111
+ def generate_folder(id, attributes)
112
+ placemarks = attributes.delete(:placemarks)
113
+ folder = "<Folder id='#{escape(id)}'>"
114
+ attributes.each_pair { |k, v| folder += "<#{k}>#{escape(v)}</#{k}>\n" }
115
+ folder += placemarks.map { |p| generate_placemark(p) }.join
116
+ folder += "</Folder>\n"
117
+ end
118
+
119
+ # HTML escape a string
120
+ # @private
121
+ def escape(string)
122
+ CGI.escapeHTML(string.to_s)
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,253 @@
1
+ module Birdwatcher
2
+ class Module
3
+ class Error < StandardError; end
4
+ class InvalidMetadataError < Error; end
5
+ class MetadataNotSetError < Error; end
6
+ class UnknownOptionError < Error; end
7
+
8
+ include Birdwatcher::Concerns::Core
9
+ include Birdwatcher::Concerns::Util
10
+ include Birdwatcher::Concerns::Outputting
11
+ include Birdwatcher::Concerns::Presentation
12
+ include Birdwatcher::Concerns::Persistence
13
+ include Birdwatcher::Concerns::Concurrency
14
+
15
+ # Path to modules directory
16
+ # @private
17
+ MODULE_PATH = File.join(File.dirname(__FILE__), "modules").freeze
18
+
19
+ # Get the module's file path
20
+ # @private
21
+ def self._file_path
22
+ @_file_path
23
+ end
24
+
25
+ # Set the module's file path
26
+ # @private
27
+ #
28
+ # @param path [String] file path
29
+ def self._file_path=(path)
30
+ @_file_path = path
31
+ end
32
+
33
+ # Automatically set the module file path
34
+ # @private
35
+ def self.inherited(k)
36
+ k._file_path = caller.first[/^[^:]+/]
37
+ end
38
+
39
+ # Get the module's meta data
40
+ # @private
41
+ #
42
+ # @return [Hash] meta data
43
+ # @raise [Birdwatcher::Model::MetadataNotSetError] if meta data has not been set
44
+ def self.meta
45
+ @meta || fail(MetadataNotSetError, "Metadata has not been set")
46
+ end
47
+
48
+ # Set the module's meta data
49
+ #
50
+ # @param meta [Hash] meta data
51
+ #
52
+ # The module's meta data is used by Birdwatcher to provide the user with
53
+ # useful information such as name, a short description of what it does as
54
+ # well as the author of the module in case they have any questions, etc.
55
+ #
56
+ # The meta data MUST be a hash and MUST contain at least the following keys:
57
+ # * +:name+: The module's name (e.g. User Importer)
58
+ # * +:description+: A short description of what the module can do
59
+ # * +:author+: Your name and email (e.g. John Doe <john@doe.com>)
60
+ # * +:options+: A hash of options for the module
61
+ #
62
+ # The +:options+ meta data key MUST be a Hash where each key is the option name
63
+ # in UPPERCASE. The value MUST be a Hash and MUST contain at least the following
64
+ # keys:
65
+ # * +:value+: The default value of the option setting (set to +nil+ if none)
66
+ # * +:description+: A short description of the option setting
67
+ # * +:required+: Set to +true+ if the option setting is required to be set
68
+ #
69
+ # If the option setting is a boolean flag, the +:boolean+ key can be set to
70
+ # +true+ to have Birdwatcher automatically parse "truthy" and "falsy" values
71
+ # (e.g. "true", "1", "yes", "no", "0", etc) into boolean true or false
72
+ #
73
+ # If an option setting's +:required+ key is set to +true+, Birdwatcher will
74
+ # automatically prevent running of the module if any of those option settings
75
+ # contain +nil+ (have not been set).
76
+ #
77
+ # @example Example meta data:
78
+ # self.meta = {
79
+ # :name => "User Importer",
80
+ # :description => "Import users from a file containing screen names",
81
+ # :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
82
+ # :options => {
83
+ # "FILE" => {
84
+ # :value => nil,
85
+ # :description => "File to read screen names from.",
86
+ # :required => true
87
+ # }
88
+ # }
89
+ # }
90
+ def self.meta=(meta)
91
+ validate_metadata(meta)
92
+ @meta = meta
93
+ end
94
+
95
+ # Get a module by it's path
96
+ # @private
97
+ #
98
+ # @param path [String] Module's short path
99
+ #
100
+ # @return [Birdwatcher::Module] descendant
101
+ def self.module_by_path(path)
102
+ modules[path]
103
+ end
104
+
105
+ # Get module short paths
106
+ # @private
107
+ def self.module_paths
108
+ modules.keys
109
+ end
110
+
111
+ # Get the module's short path
112
+ # @private
113
+ def self.path
114
+ @_file_path.gsub("#{MODULE_PATH}/", "").gsub(".rb", "")
115
+ end
116
+
117
+ # The module's detailed information and usage
118
+ #
119
+ # @abstract
120
+ #
121
+ # This method can be overwritten by modules to provide additional information
122
+ # and usage to the user. The method will be called when the user envokes the
123
+ # +show info+ on the module.
124
+ #
125
+ # The method must return a string.
126
+ #
127
+ # @return [String] additional module information
128
+ def self.info; end
129
+
130
+ # Get all Birdwatcher::Module descendants
131
+ # @private
132
+ #
133
+ # @return [Array] module classes
134
+ def self.descendants
135
+ ObjectSpace.each_object(Class).select { |klass| klass < self }
136
+ end
137
+
138
+ # Get all Birdwatcher modules sorted by their short path
139
+ # @private
140
+ #
141
+ # @return [Hash] module classes where the key is the module's short path
142
+ def self.modules
143
+ if !@modules
144
+ @modules = {}
145
+ descendants.each do |descendant|
146
+ @modules[descendant.path] = descendant
147
+ end
148
+ end
149
+ @modules
150
+ end
151
+
152
+ # Execute a module and catch any exceptions raised
153
+ # @private
154
+ #
155
+ # Calls the module's {run} method if options are valid and catches any
156
+ # exceptions raised to display an error to the user.
157
+ def execute
158
+ validate_options && run
159
+ rescue => e
160
+ error("#{e.class}".bold + ": #{e.message}")
161
+ puts e.backtrace.join("\n")
162
+ end
163
+
164
+ # The module's run method
165
+ #
166
+ # @abstract
167
+ #
168
+ # The run method must be overwritten by modules to perform the actual work.
169
+ # The method is called when the user envokes the +run+ command in the
170
+ # Birdwatcher console.
171
+ #
172
+ # If the module fails to run for whatever reason, e.g. insufficient data, the
173
+ # method should return +false+.
174
+ def run
175
+ fail NotImplementedError, "Modules must implement #run method"
176
+ end
177
+
178
+ protected
179
+
180
+ # Validate option settings
181
+ # @private
182
+ #
183
+ # @return [Boolean] true if meta data is valid and false otherwise
184
+ def validate_options
185
+ options.each_pair do |key, value|
186
+ if value[:required] && value[:value].nil?
187
+ error("Setting for required option has not been set: #{key.bold}")
188
+ return false
189
+ end
190
+ end
191
+ end
192
+
193
+ # Validate module meta data
194
+ # @private
195
+ #
196
+ # @param meta [Hash] meta data
197
+ #
198
+ # @raise [Birdwatcher::Module::InvalidMetadataError] if meta data is not valid.
199
+ def self.validate_metadata(meta)
200
+ fail InvalidMetadataError, "Metadata is not a hash" unless meta.is_a?(Hash)
201
+ fail InvalidMetadataError, "Metadata is empty" if meta.empty?
202
+ fail InvalidMetadataError, "Metadata is missing key: name" unless meta.key?(:name)
203
+ fail InvalidMetadataError, "Metadata is missing key: description" unless meta.key?(:description)
204
+ fail InvalidMetadataError, "Metadata is missing key: author" unless meta.key?(:author)
205
+ fail InvalidMetadataError, "Metadata is missing key: options" unless meta.key?(:options)
206
+ fail InvalidMetadataError, "Metadata name is not a string" unless meta[:name].is_a?(String)
207
+ fail InvalidMetadataError, "Metadata description is not a string" unless meta[:description].is_a?(String)
208
+ fail InvalidMetadataError, "Metadata author is not a string" unless meta[:author].is_a?(String)
209
+ validate_metadata_options(meta[:options])
210
+ end
211
+
212
+ # Validate meta data module options
213
+ # @private
214
+ #
215
+ # @param options [Hash] options
216
+ #
217
+ # Automatically called by {validate_metadata}
218
+ #
219
+ # @raise [Birdwatcher::Module::InvalidMetadataError] if options hash is not valid.
220
+ def self.validate_metadata_options(options)
221
+ fail InvalidMetadataError, "Metadata options is not a hash" unless options.is_a?(Hash)
222
+ options.each_pair do |key, value|
223
+ fail("Option key #{key} must be all uppercase") unless (key == key.upcase)
224
+ fail("Option value for #{key} is not a hash") unless value.is_a?(Hash)
225
+ fail("Option value for #{key} is missing key: value") unless value.key?(:value)
226
+ fail("Option value for #{key} is missing key: description") unless value.key?(:description)
227
+ fail("Option value for #{key} is missing key: required") unless value.key?(:required)
228
+ end
229
+ end
230
+
231
+ # Get the module's options hash
232
+ # @private
233
+ #
234
+ # @return [Hash] options meta data hash.
235
+ def options
236
+ self.class.meta[:options]
237
+ end
238
+
239
+ # Get an option setting
240
+ #
241
+ # @example getting option settings
242
+ # option_setting("DEST")
243
+ # option_setting("USERS")
244
+ #
245
+ # @return option setting
246
+ # @raise [Birdwatcher::Module::UnknownOptionError] if option is unknown
247
+ def option_setting(option)
248
+ option = option.to_s.upcase
249
+ fail UnknownOptionError, "Unknown module option: #{option}" unless options.keys.include?(option)
250
+ options[option][:value]
251
+ end
252
+ end
253
+ end
@@ -0,0 +1,106 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Statuses
4
+ class Kml < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "KML Document",
7
+ :description => "Creates a KML document of statuses with Geo locations",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "DEST" => {
11
+ :value => nil,
12
+ :description => "Destination file",
13
+ :required => true
14
+ },
15
+ "USERS" => {
16
+ :value => nil,
17
+ :description => "Space-separated list of screen names (all users if empty)",
18
+ :required => false
19
+ }
20
+ }
21
+ }
22
+
23
+ def self.info
24
+ <<-INFO
25
+ KML is a file format used to display geographic data in an Earth browser such as
26
+ Google Earth. You can create KML files to pinpoint locations, add image overlays,
27
+ and expose rich data in new ways. KML is an international standard maintained by
28
+ the Open Geospatial Consortium, Inc. (OGC).
29
+
30
+ This module can generate a KML document containing all statuses with geo information
31
+ which can be loaded into an application like Google Earth to browse and analyze
32
+ statuses.
33
+
34
+ The module supports mapping statuses from all or specific users.
35
+ INFO
36
+ end
37
+
38
+ def run
39
+ if option_setting("USERS")
40
+ users = current_workspace.users_dataset
41
+ .where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip))
42
+ .order(:screen_name)
43
+ else
44
+ users = current_workspace.users_dataset.order(:screen_name)
45
+ end
46
+ if users.empty?
47
+ error("There are no users to process")
48
+ return false
49
+ end
50
+ kml_document = Birdwatcher::KML.new(
51
+ :name => "Statuses with geo locations"
52
+ )
53
+ users.each do |user|
54
+ statuses = user.statuses_dataset.where(:geo => true).order(Sequel.desc(:posted_at)).eager(:user)
55
+ if statuses.count.zero?
56
+ warn("User #{user.screen_name.bold} has no statuses with geo location; skipping")
57
+ next
58
+ end
59
+ kml_document.add_folder(user.screen_name,
60
+ :name => "#{user.name} (@#{user.screen_name})",
61
+ :description => "Statuses from #{user.screen_name}"
62
+ )
63
+ statuses.each do |status|
64
+ kml_document.add_placemark_to_folder(user.screen_name,
65
+ :id => status.twitter_id,
66
+ :name => "@#{escape_html(user.screen_name)}, #{format_date(status.posted_at)}",
67
+ :description => make_status_description(status),
68
+ :Snippet => escape_html(excerpt(status.text, 80)),
69
+ :Style => "<Icon><href>#{escape_html(user.profile_image_url)}</href></Icon>",
70
+ :Point => "<coordinates>#{escape_html(status.latitude)},#{escape_html(status.longitude)}</coordinates>",
71
+ :address => "#{escape_html(status.place_name)}, #{escape_html(status.place_country)}",
72
+ :TimeStamp => escape_html(status.posted_at.strftime('%Y-%m-%dT%l:%M:%S%z'))
73
+ )
74
+ end
75
+ info "Added #{pluralize(statuses.count, 'status', 'statuses')} from #{user.screen_name.bold}"
76
+ end
77
+ File.write(option_setting("DEST"), kml_document.generate)
78
+ info("Wrote KML document to #{option_setting('DEST').bold}")
79
+ end
80
+
81
+ private
82
+
83
+ def make_status_description(status)
84
+ "<![CDATA[\n" +
85
+ " <table>\n" +
86
+ " <tr>\n" +
87
+ " <td style='vertical-align:top'>\n" +
88
+ " <img src='#{escape_html(status.user.profile_image_url)}' alt='#{escape_html(status.user.screen_name)}' />\n" +
89
+ " </td>\n" +
90
+ " <td>\n" +
91
+ " <strong>#{escape_html(status.user.name)}</strong> <span style='color: #8899a6'>@#{escape_html(status.user.screen_name)} &middot; <a href='https://twitter.com/#{escape_html(status.user.screen_name)}/status/#{escape_html(status.twitter_id)}' style='color: inherit'>#{format_date(status.posted_at)}</a></span><br />\n" +
92
+ " <p style='font-size: 16px'>#{escape_html(status.text)}</p>\n" +
93
+ " <p style='font-style: italic'>#{escape_html(status.place_name)}, #{escape_html(status.place_country)}</p>\n" +
94
+ " </td>\n" +
95
+ " </tr>\n" +
96
+ " </table>\n" +
97
+ "]]>\n"
98
+ end
99
+
100
+ def format_date(time)
101
+ time.strftime("%b %d, %Y")
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,77 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Statuses
4
+ class Sentiment < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "Status Sentiment Analysis",
7
+ :description => "Enrich statuses with sentiment score",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "THREADS" => {
11
+ :value => Birdwatcher::Concerns::Concurrency::DEFAULT_THREAD_POOL_SIZE,
12
+ :description => "Number of concurrent threads",
13
+ :required => false
14
+ }
15
+ }
16
+ }
17
+
18
+ def self.info
19
+ <<-INFO
20
+ The Status Sentiment Analysis module can calculate the rough sentiment of statuses
21
+ in the workspace. Each status will get a sentiment score of Negative, Positive
22
+ or Neutral.
23
+
24
+ Please note that sentiment analysis is not very accurate and should always be
25
+ manually reviewed for serious work.
26
+ INFO
27
+ end
28
+
29
+ def run
30
+ statuses = current_workspace.statuses_dataset.where(:sentiment => nil)
31
+ if statuses.empty?
32
+ error("There are no statuses to analyze")
33
+ return false
34
+ end
35
+ analyser = Sentimental.new
36
+ threads = thread_pool(option_setting("THREADS").to_i)
37
+ task("Training the sentiment analyzer...") do
38
+ analyser.load_defaults
39
+ end
40
+ statuses.each do |status|
41
+ threads.process do
42
+ begin
43
+ text = sanitize_text(status.text)
44
+ sentiment = analyser.sentiment(text)
45
+ case sentiment
46
+ when :positive
47
+ info("Positive: ".bold.light_green + Birdwatcher::Util.excerpt(status.text, 80))
48
+ when :negative
49
+ info("Negative: ".bold.light_red + Birdwatcher::Util.excerpt(status.text, 80))
50
+ else
51
+ info(" Neutral: ".bold + Birdwatcher::Util.excerpt(status.text, 80))
52
+ end
53
+ status.sentiment = sentiment.to_s
54
+ status.save
55
+ rescue => e
56
+ error("Sentiment analysis for status #{status.id.bold} failed (#{e.class})")
57
+ end
58
+ end
59
+ end
60
+ threads.shutdown
61
+ end
62
+
63
+ private
64
+
65
+ def sanitize_text(text)
66
+ text.split(" ").map(&:strip).delete_if do |word|
67
+ word.start_with?("@") ||
68
+ word.start_with?(".@")
69
+ word.start_with?("#") ||
70
+ word.downcase.start_with?("http") ||
71
+ %w(rt oh).include?(word.downcase)
72
+ end.join(" ")
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end