birdwatcher 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/Rakefile +10 -0
  8. data/bin/console +42 -0
  9. data/birdwatcher.gemspec +40 -0
  10. data/data/english_stopwords.txt +319 -0
  11. data/data/top100Kenglishwords.txt +100000 -0
  12. data/db/migrations/001_create_workspaces.rb +11 -0
  13. data/db/migrations/002_create_users.rb +29 -0
  14. data/db/migrations/003_create_statuses.rb +28 -0
  15. data/db/migrations/004_create_mentions.rb +13 -0
  16. data/db/migrations/005_create_mentions_statuses.rb +8 -0
  17. data/db/migrations/006_create_hashtags.rb +11 -0
  18. data/db/migrations/007_create_hashtags_statuses.rb +8 -0
  19. data/db/migrations/008_create_urls.rb +16 -0
  20. data/db/migrations/009_create_statuses_urls.rb +8 -0
  21. data/db/migrations/010_create_klout_topics.rb +10 -0
  22. data/db/migrations/011_create_klout_topics_users.rb +8 -0
  23. data/db/migrations/012_create_influencers.rb +10 -0
  24. data/db/migrations/013_create_influencers_users.rb +8 -0
  25. data/db/migrations/014_create_influencees.rb +10 -0
  26. data/db/migrations/015_create_influencees_users.rb +8 -0
  27. data/exe/birdwatcher +12 -0
  28. data/lib/birdwatcher/command.rb +78 -0
  29. data/lib/birdwatcher/commands/back.rb +15 -0
  30. data/lib/birdwatcher/commands/exit.rb +16 -0
  31. data/lib/birdwatcher/commands/help.rb +60 -0
  32. data/lib/birdwatcher/commands/irb.rb +34 -0
  33. data/lib/birdwatcher/commands/module.rb +106 -0
  34. data/lib/birdwatcher/commands/query.rb +58 -0
  35. data/lib/birdwatcher/commands/query_csv.rb +56 -0
  36. data/lib/birdwatcher/commands/resource.rb +45 -0
  37. data/lib/birdwatcher/commands/run.rb +19 -0
  38. data/lib/birdwatcher/commands/schema.rb +116 -0
  39. data/lib/birdwatcher/commands/set.rb +56 -0
  40. data/lib/birdwatcher/commands/shell.rb +21 -0
  41. data/lib/birdwatcher/commands/show.rb +86 -0
  42. data/lib/birdwatcher/commands/status.rb +114 -0
  43. data/lib/birdwatcher/commands/unset.rb +37 -0
  44. data/lib/birdwatcher/commands/use.rb +25 -0
  45. data/lib/birdwatcher/commands/user.rb +155 -0
  46. data/lib/birdwatcher/commands/workspace.rb +176 -0
  47. data/lib/birdwatcher/concerns/concurrency.rb +25 -0
  48. data/lib/birdwatcher/concerns/core.rb +105 -0
  49. data/lib/birdwatcher/concerns/outputting.rb +114 -0
  50. data/lib/birdwatcher/concerns/persistence.rb +101 -0
  51. data/lib/birdwatcher/concerns/presentation.rb +122 -0
  52. data/lib/birdwatcher/concerns/util.rb +138 -0
  53. data/lib/birdwatcher/configuration.rb +63 -0
  54. data/lib/birdwatcher/configuration_wizard.rb +65 -0
  55. data/lib/birdwatcher/console.rb +201 -0
  56. data/lib/birdwatcher/http_client.rb +164 -0
  57. data/lib/birdwatcher/klout_client.rb +83 -0
  58. data/lib/birdwatcher/kml.rb +125 -0
  59. data/lib/birdwatcher/module.rb +253 -0
  60. data/lib/birdwatcher/modules/statuses/kml.rb +106 -0
  61. data/lib/birdwatcher/modules/statuses/sentiment.rb +77 -0
  62. data/lib/birdwatcher/modules/statuses/word_cloud.rb +205 -0
  63. data/lib/birdwatcher/modules/urls/crawl.rb +138 -0
  64. data/lib/birdwatcher/modules/urls/most_shared.rb +98 -0
  65. data/lib/birdwatcher/modules/users/activity_plot.rb +62 -0
  66. data/lib/birdwatcher/modules/users/import.rb +61 -0
  67. data/lib/birdwatcher/modules/users/influence_graph.rb +93 -0
  68. data/lib/birdwatcher/modules/users/klout_id.rb +62 -0
  69. data/lib/birdwatcher/modules/users/klout_influence.rb +83 -0
  70. data/lib/birdwatcher/modules/users/klout_score.rb +64 -0
  71. data/lib/birdwatcher/modules/users/klout_topics.rb +72 -0
  72. data/lib/birdwatcher/modules/users/social_graph.rb +110 -0
  73. data/lib/birdwatcher/punchcard.rb +183 -0
  74. data/lib/birdwatcher/util.rb +83 -0
  75. data/lib/birdwatcher/version.rb +3 -0
  76. data/lib/birdwatcher.rb +43 -0
  77. data/models/hashtag.rb +8 -0
  78. data/models/influencee.rb +8 -0
  79. data/models/influencer.rb +8 -0
  80. data/models/klout_topic.rb +8 -0
  81. data/models/mention.rb +8 -0
  82. data/models/status.rb +11 -0
  83. data/models/url.rb +8 -0
  84. data/models/user.rb +11 -0
  85. data/models/workspace.rb +26 -0
  86. metadata +405 -0
@@ -0,0 +1,83 @@
1
+ module Birdwatcher
2
+ class KloutClient < Birdwatcher::HttpClient
3
+ base_uri "https://api.klout.com/v2"
4
+
5
+ # Class initializer
6
+ #
7
+ # @param api_key [String] Klout API key
8
+ # @param options Http client options
9
+ # @see Birdwatcher::HttpClient
10
+ def initialize(api_key, options = {})
11
+ @api_key = api_key
12
+ @options = {
13
+ :headers => {
14
+ "User-Agent" => "Birdwatcher v#{Birdwatcher::VERSION}",
15
+ "Accept" => "application/json"
16
+ }
17
+ }.merge(options)
18
+ end
19
+
20
+ # Get Klout ID of a Twitter user
21
+ #
22
+ # @param screen_name [String] Twitter screen name
23
+ # @return [String] Klout ID or nil
24
+ # @see https://klout.com/s/developers/v2#identities
25
+ def get_id(screen_name)
26
+ response = do_get("/identity.json/twitter?screenName=#{url_encode(screen_name)}&key=#{url_encode(@api_key)}")
27
+ if response.status == 200
28
+ JSON.parse(response.body)["id"]
29
+ end
30
+ end
31
+
32
+ # Get Klout score of a user
33
+ #
34
+ # @param klout_id [String]
35
+ # @return [Numeric] Klout score or nil
36
+ # @see https://klout.com/s/developers/v2#scores
37
+ def get_score(klout_id)
38
+ response = do_get("/user.json/#{klout_id}/score?key=#{url_encode(@api_key)}")
39
+ if response.status == 200
40
+ JSON.parse(response.body)["score"]
41
+ end
42
+ end
43
+
44
+ # Get Klout topics of a user
45
+ #
46
+ # @param klout_id [String]
47
+ # @return [Array] Topics
48
+ # @see https://klout.com/s/developers/v2#topic
49
+ def get_topics(klout_id)
50
+ response = do_get("/user.json/#{klout_id}/topics?key=#{url_encode(@api_key)}")
51
+ if response.status == 200
52
+ JSON.parse(response.body).map { |t| t["displayName"] }
53
+ end
54
+ end
55
+
56
+ # Get Klout influence graph of a user
57
+ #
58
+ # @param klout_id [String]
59
+ # @return [Hash] +:influencers:+ contains screen names of influencers, +:influencees+ contains screen names of influencees
60
+ # @see https://klout.com/s/developers/v2#influence
61
+ def get_influence(klout_id)
62
+ response = do_get("/user.json/#{klout_id}/influence?key=#{url_encode(@api_key)}")
63
+ if response.status == 200
64
+ body = JSON.parse(response.body)
65
+ {
66
+ :influencers => body["myInfluencers"].map { |i| i["entity"]["payload"]["nick"] },
67
+ :influencees => body["myInfluencees"].map { |i| i["entity"]["payload"]["nick"] }
68
+ }
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ # URL encode a string
75
+ # @private
76
+ #
77
+ # @param string [String]
78
+ # @return [String] URL encoded string
79
+ def url_encode(string)
80
+ CGI.escape(string.to_s)
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,125 @@
1
+ module Birdwatcher
2
+ # KML Document generator
3
+ #
4
+ # KML is a file format used to display geographic data in an Earth browser
5
+ # such as Google Earth. You can create KML files to pinpoint locations, add
6
+ # image overlays, and expose rich data in new ways. KML is an international
7
+ # standard maintained by the Open Geospatial Consortium, Inc. (OGC).
8
+ #
9
+ # This class supports generating basic KML documents with Placemarks and Folders.
10
+ #
11
+ # @note Attribute values ARE NOT automatically escaped. All values will have to be given in an HTML escaped fashion if there is a risk that they might contain unexpected or dangerous HTML.
12
+ # @see https://developers.google.com/kml/
13
+ class KML
14
+ # KML document header
15
+ DOCUMENT_HEADER =
16
+ <<-HEAD
17
+ <?xml version="1.0" encoding="UTF-8"?>
18
+ <kml xmlns="http://www.opengis.net/kml/2.2">
19
+ <Document>
20
+ HEAD
21
+
22
+ # KML document footer
23
+ DOCUMENT_FOOTER =
24
+ <<-FOOT
25
+ </Document>
26
+ </kml>
27
+ FOOT
28
+
29
+ class Error < StandardError; end
30
+ class UnknownFolderError < Birdwatcher::KML::Error; end
31
+
32
+ # Class initializer
33
+ #
34
+ # @param attributes [Hash] Document attributes
35
+ # @see https://developers.google.com/kml/documentation/kmlreference#document
36
+ def initialize(attributes = {})
37
+ @attributes = attributes
38
+ @folders = {}
39
+ @placemarks = []
40
+ end
41
+
42
+ # Add a Placemark
43
+ #
44
+ # @param attributes [Hash] Placemark attributes
45
+ # @see https://developers.google.com/kml/documentation/kmlreference#placemark
46
+ def add_placemark(attributes)
47
+ @placemarks << attributes
48
+ end
49
+
50
+ # Add a Folder
51
+ #
52
+ # @param id [String] Folder ID
53
+ # @param attributes [Hash] Folder attributes
54
+ #
55
+ # @see https://developers.google.com/kml/documentation/kmlreference#folder
56
+ def add_folder(id, attributes)
57
+ @folders[id] = {
58
+ :placemarks => []
59
+ }.merge(attributes)
60
+ end
61
+
62
+ # Add a Placemark to a Folder
63
+ #
64
+ # @param folder_id [String]
65
+ # @param attributes [Hash] Placemark attributes
66
+ #
67
+ # @raise [Birdwatcher::KML::UnknownFolderError] if folder doesn't exist
68
+ # @see https://developers.google.com/kml/documentation/kmlreference#placemark
69
+ def add_placemark_to_folder(folder_id, attributes)
70
+ fail(UnknownFolderError, "There is no folder with id: #{folder_id}") unless @folders.key?(folder_id)
71
+ @folders[folder_id][:placemarks] << attributes
72
+ end
73
+
74
+ # Generate the KML document
75
+ #
76
+ # @return the final KML document
77
+ def generate
78
+ output = generate_document_header
79
+ @folders.each_pair { |id, attributes| output += generate_folder(id, attributes) }
80
+ output += @placemarks.map { |p| generate_placemark(p) }.join
81
+ output += generate_document_footer
82
+ end
83
+
84
+ private
85
+
86
+ # Generate document header
87
+ # @private
88
+ def generate_document_header
89
+ header = DOCUMENT_HEADER
90
+ @attributes.each_pair { |k, v| header += "<#{k}>#{escape(v)}</#{k}>\n" }
91
+ header
92
+ end
93
+
94
+ # Generate document footer
95
+ # @private
96
+ def generate_document_footer
97
+ DOCUMENT_FOOTER
98
+ end
99
+
100
+ # Generate Placemark element
101
+ # @private
102
+ def generate_placemark(attributes)
103
+ placemark = attributes.key?(:id) ? "<Placemark id='#{escape(attributes[:id])}'>" : "<Placemark>"
104
+ attributes.delete(:id)
105
+ attributes.each_pair { |k, v| placemark += "<#{k}>#{v}</#{k}>\n" }
106
+ placemark += "</Placemark>\n"
107
+ end
108
+
109
+ # Generate Folder element
110
+ # @private
111
+ def generate_folder(id, attributes)
112
+ placemarks = attributes.delete(:placemarks)
113
+ folder = "<Folder id='#{escape(id)}'>"
114
+ attributes.each_pair { |k, v| folder += "<#{k}>#{escape(v)}</#{k}>\n" }
115
+ folder += placemarks.map { |p| generate_placemark(p) }.join
116
+ folder += "</Folder>\n"
117
+ end
118
+
119
+ # HTML escape a string
120
+ # @private
121
+ def escape(string)
122
+ CGI.escapeHTML(string.to_s)
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,253 @@
1
+ module Birdwatcher
2
+ class Module
3
+ class Error < StandardError; end
4
+ class InvalidMetadataError < Error; end
5
+ class MetadataNotSetError < Error; end
6
+ class UnknownOptionError < Error; end
7
+
8
+ include Birdwatcher::Concerns::Core
9
+ include Birdwatcher::Concerns::Util
10
+ include Birdwatcher::Concerns::Outputting
11
+ include Birdwatcher::Concerns::Presentation
12
+ include Birdwatcher::Concerns::Persistence
13
+ include Birdwatcher::Concerns::Concurrency
14
+
15
+ # Path to modules directory
16
+ # @private
17
+ MODULE_PATH = File.join(File.dirname(__FILE__), "modules").freeze
18
+
19
+ # Get the module's file path
20
+ # @private
21
+ def self._file_path
22
+ @_file_path
23
+ end
24
+
25
+ # Set the module's file path
26
+ # @private
27
+ #
28
+ # @param path [String] file path
29
+ def self._file_path=(path)
30
+ @_file_path = path
31
+ end
32
+
33
+ # Automatically set the module file path
34
+ # @private
35
+ def self.inherited(k)
36
+ k._file_path = caller.first[/^[^:]+/]
37
+ end
38
+
39
+ # Get the module's meta data
40
+ # @private
41
+ #
42
+ # @return [Hash] meta data
43
+ # @raise [Birdwatcher::Model::MetadataNotSetError] if meta data has not been set
44
+ def self.meta
45
+ @meta || fail(MetadataNotSetError, "Metadata has not been set")
46
+ end
47
+
48
+ # Set the module's meta data
49
+ #
50
+ # @param meta [Hash] meta data
51
+ #
52
+ # The module's meta data is used by Birdwatcher to provide the user with
53
+ # useful information such as name, a short description of what it does as
54
+ # well as the author of the module in case they have any questions, etc.
55
+ #
56
+ # The meta data MUST be a hash and MUST contain at least the following keys:
57
+ # * +:name+: The module's name (e.g. User Importer)
58
+ # * +:description+: A short description of what the module can do
59
+ # * +:author+: Your name and email (e.g. John Doe <john@doe.com>)
60
+ # * +:options+: A hash of options for the module
61
+ #
62
+ # The +:options+ meta data key MUST be a Hash where each key is the option name
63
+ # in UPPERCASE. The value MUST be a Hash and MUST contain at least the following
64
+ # keys:
65
+ # * +:value+: The default value of the option setting (set to +nil+ if none)
66
+ # * +:description+: A short description of the option setting
67
+ # * +:required+: Set to +true+ if the option setting is required to be set
68
+ #
69
+ # If the option setting is a boolean flag, the +:boolean+ key can be set to
70
+ # +true+ to have Birdwatcher automatically parse "truthy" and "falsy" values
71
+ # (e.g. "true", "1", "yes", "no", "0", etc) into boolean true or false
72
+ #
73
+ # If an option setting's +:required+ key is set to +true+, Birdwatcher will
74
+ # automatically prevent running of the module if any of those option settings
75
+ # contain +nil+ (have not been set).
76
+ #
77
+ # @example Example meta data:
78
+ # self.meta = {
79
+ # :name => "User Importer",
80
+ # :description => "Import users from a file containing screen names",
81
+ # :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
82
+ # :options => {
83
+ # "FILE" => {
84
+ # :value => nil,
85
+ # :description => "File to read screen names from.",
86
+ # :required => true
87
+ # }
88
+ # }
89
+ # }
90
+ def self.meta=(meta)
91
+ validate_metadata(meta)
92
+ @meta = meta
93
+ end
94
+
95
+ # Get a module by it's path
96
+ # @private
97
+ #
98
+ # @param path [String] Module's short path
99
+ #
100
+ # @return [Birdwatcher::Module] descendant
101
+ def self.module_by_path(path)
102
+ modules[path]
103
+ end
104
+
105
+ # Get module short paths
106
+ # @private
107
+ def self.module_paths
108
+ modules.keys
109
+ end
110
+
111
+ # Get the module's short path
112
+ # @private
113
+ def self.path
114
+ @_file_path.gsub("#{MODULE_PATH}/", "").gsub(".rb", "")
115
+ end
116
+
117
+ # The module's detailed information and usage
118
+ #
119
+ # @abstract
120
+ #
121
+ # This method can be overwritten by modules to provide additional information
122
+ # and usage to the user. The method will be called when the user envokes the
123
+ # +show info+ on the module.
124
+ #
125
+ # The method must return a string.
126
+ #
127
+ # @return [String] additional module information
128
+ def self.info; end
129
+
130
+ # Get all Birdwatcher::Module descendants
131
+ # @private
132
+ #
133
+ # @return [Array] module classes
134
+ def self.descendants
135
+ ObjectSpace.each_object(Class).select { |klass| klass < self }
136
+ end
137
+
138
+ # Get all Birdwatcher modules sorted by their short path
139
+ # @private
140
+ #
141
+ # @return [Hash] module classes where the key is the module's short path
142
+ def self.modules
143
+ if !@modules
144
+ @modules = {}
145
+ descendants.each do |descendant|
146
+ @modules[descendant.path] = descendant
147
+ end
148
+ end
149
+ @modules
150
+ end
151
+
152
+ # Execute a module and catch any exceptions raised
153
+ # @private
154
+ #
155
+ # Calls the module's {run} method if options are valid and catches any
156
+ # exceptions raised to display an error to the user.
157
+ def execute
158
+ validate_options && run
159
+ rescue => e
160
+ error("#{e.class}".bold + ": #{e.message}")
161
+ puts e.backtrace.join("\n")
162
+ end
163
+
164
+ # The module's run method
165
+ #
166
+ # @abstract
167
+ #
168
+ # The run method must be overwritten by modules to perform the actual work.
169
+ # The method is called when the user envokes the +run+ command in the
170
+ # Birdwatcher console.
171
+ #
172
+ # If the module fails to run for whatever reason, e.g. insufficient data, the
173
+ # method should return +false+.
174
+ def run
175
+ fail NotImplementedError, "Modules must implement #run method"
176
+ end
177
+
178
+ protected
179
+
180
+ # Validate option settings
181
+ # @private
182
+ #
183
+ # @return [Boolean] true if meta data is valid and false otherwise
184
+ def validate_options
185
+ options.each_pair do |key, value|
186
+ if value[:required] && value[:value].nil?
187
+ error("Setting for required option has not been set: #{key.bold}")
188
+ return false
189
+ end
190
+ end
191
+ end
192
+
193
+ # Validate module meta data
194
+ # @private
195
+ #
196
+ # @param meta [Hash] meta data
197
+ #
198
+ # @raise [Birdwatcher::Module::InvalidMetadataError] if meta data is not valid.
199
+ def self.validate_metadata(meta)
200
+ fail InvalidMetadataError, "Metadata is not a hash" unless meta.is_a?(Hash)
201
+ fail InvalidMetadataError, "Metadata is empty" if meta.empty?
202
+ fail InvalidMetadataError, "Metadata is missing key: name" unless meta.key?(:name)
203
+ fail InvalidMetadataError, "Metadata is missing key: description" unless meta.key?(:description)
204
+ fail InvalidMetadataError, "Metadata is missing key: author" unless meta.key?(:author)
205
+ fail InvalidMetadataError, "Metadata is missing key: options" unless meta.key?(:options)
206
+ fail InvalidMetadataError, "Metadata name is not a string" unless meta[:name].is_a?(String)
207
+ fail InvalidMetadataError, "Metadata description is not a string" unless meta[:description].is_a?(String)
208
+ fail InvalidMetadataError, "Metadata author is not a string" unless meta[:author].is_a?(String)
209
+ validate_metadata_options(meta[:options])
210
+ end
211
+
212
+ # Validate meta data module options
213
+ # @private
214
+ #
215
+ # @param options [Hash] options
216
+ #
217
+ # Automatically called by {validate_metadata}
218
+ #
219
+ # @raise [Birdwatcher::Module::InvalidMetadataError] if options hash is not valid.
220
+ def self.validate_metadata_options(options)
221
+ fail InvalidMetadataError, "Metadata options is not a hash" unless options.is_a?(Hash)
222
+ options.each_pair do |key, value|
223
+ fail("Option key #{key} must be all uppercase") unless (key == key.upcase)
224
+ fail("Option value for #{key} is not a hash") unless value.is_a?(Hash)
225
+ fail("Option value for #{key} is missing key: value") unless value.key?(:value)
226
+ fail("Option value for #{key} is missing key: description") unless value.key?(:description)
227
+ fail("Option value for #{key} is missing key: required") unless value.key?(:required)
228
+ end
229
+ end
230
+
231
+ # Get the module's options hash
232
+ # @private
233
+ #
234
+ # @return [Hash] options meta data hash.
235
+ def options
236
+ self.class.meta[:options]
237
+ end
238
+
239
+ # Get an option setting
240
+ #
241
+ # @example getting option settings
242
+ # option_setting("DEST")
243
+ # option_setting("USERS")
244
+ #
245
+ # @return option setting
246
+ # @raise [Birdwatcher::Module::UnknownOptionError] if option is unknown
247
+ def option_setting(option)
248
+ option = option.to_s.upcase
249
+ fail UnknownOptionError, "Unknown module option: #{option}" unless options.keys.include?(option)
250
+ options[option][:value]
251
+ end
252
+ end
253
+ end
@@ -0,0 +1,106 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Statuses
4
+ class Kml < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "KML Document",
7
+ :description => "Creates a KML document of statuses with Geo locations",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "DEST" => {
11
+ :value => nil,
12
+ :description => "Destination file",
13
+ :required => true
14
+ },
15
+ "USERS" => {
16
+ :value => nil,
17
+ :description => "Space-separated list of screen names (all users if empty)",
18
+ :required => false
19
+ }
20
+ }
21
+ }
22
+
23
+ def self.info
24
+ <<-INFO
25
+ KML is a file format used to display geographic data in an Earth browser such as
26
+ Google Earth. You can create KML files to pinpoint locations, add image overlays,
27
+ and expose rich data in new ways. KML is an international standard maintained by
28
+ the Open Geospatial Consortium, Inc. (OGC).
29
+
30
+ This module can generate a KML document containing all statuses with geo information
31
+ which can be loaded into an application like Google Earth to browse and analyze
32
+ statuses.
33
+
34
+ The module supports mapping statuses from all or specific users.
35
+ INFO
36
+ end
37
+
38
+ def run
39
+ if option_setting("USERS")
40
+ users = current_workspace.users_dataset
41
+ .where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip))
42
+ .order(:screen_name)
43
+ else
44
+ users = current_workspace.users_dataset.order(:screen_name)
45
+ end
46
+ if users.empty?
47
+ error("There are no users to process")
48
+ return false
49
+ end
50
+ kml_document = Birdwatcher::KML.new(
51
+ :name => "Statuses with geo locations"
52
+ )
53
+ users.each do |user|
54
+ statuses = user.statuses_dataset.where(:geo => true).order(Sequel.desc(:posted_at)).eager(:user)
55
+ if statuses.count.zero?
56
+ warn("User #{user.screen_name.bold} has no statuses with geo location; skipping")
57
+ next
58
+ end
59
+ kml_document.add_folder(user.screen_name,
60
+ :name => "#{user.name} (@#{user.screen_name})",
61
+ :description => "Statuses from #{user.screen_name}"
62
+ )
63
+ statuses.each do |status|
64
+ kml_document.add_placemark_to_folder(user.screen_name,
65
+ :id => status.twitter_id,
66
+ :name => "@#{escape_html(user.screen_name)}, #{format_date(status.posted_at)}",
67
+ :description => make_status_description(status),
68
+ :Snippet => escape_html(excerpt(status.text, 80)),
69
+ :Style => "<Icon><href>#{escape_html(user.profile_image_url)}</href></Icon>",
70
+ :Point => "<coordinates>#{escape_html(status.latitude)},#{escape_html(status.longitude)}</coordinates>",
71
+ :address => "#{escape_html(status.place_name)}, #{escape_html(status.place_country)}",
72
+ :TimeStamp => escape_html(status.posted_at.strftime('%Y-%m-%dT%l:%M:%S%z'))
73
+ )
74
+ end
75
+ info "Added #{pluralize(statuses.count, 'status', 'statuses')} from #{user.screen_name.bold}"
76
+ end
77
+ File.write(option_setting("DEST"), kml_document.generate)
78
+ info("Wrote KML document to #{option_setting('DEST').bold}")
79
+ end
80
+
81
+ private
82
+
83
+ def make_status_description(status)
84
+ "<![CDATA[\n" +
85
+ " <table>\n" +
86
+ " <tr>\n" +
87
+ " <td style='vertical-align:top'>\n" +
88
+ " <img src='#{escape_html(status.user.profile_image_url)}' alt='#{escape_html(status.user.screen_name)}' />\n" +
89
+ " </td>\n" +
90
+ " <td>\n" +
91
+ " <strong>#{escape_html(status.user.name)}</strong> <span style='color: #8899a6'>@#{escape_html(status.user.screen_name)} &middot; <a href='https://twitter.com/#{escape_html(status.user.screen_name)}/status/#{escape_html(status.twitter_id)}' style='color: inherit'>#{format_date(status.posted_at)}</a></span><br />\n" +
92
+ " <p style='font-size: 16px'>#{escape_html(status.text)}</p>\n" +
93
+ " <p style='font-style: italic'>#{escape_html(status.place_name)}, #{escape_html(status.place_country)}</p>\n" +
94
+ " </td>\n" +
95
+ " </tr>\n" +
96
+ " </table>\n" +
97
+ "]]>\n"
98
+ end
99
+
100
+ def format_date(time)
101
+ time.strftime("%b %d, %Y")
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,77 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Statuses
4
+ class Sentiment < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "Status Sentiment Analysis",
7
+ :description => "Enrich statuses with sentiment score",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "THREADS" => {
11
+ :value => Birdwatcher::Concerns::Concurrency::DEFAULT_THREAD_POOL_SIZE,
12
+ :description => "Number of concurrent threads",
13
+ :required => false
14
+ }
15
+ }
16
+ }
17
+
18
+ def self.info
19
+ <<-INFO
20
+ The Status Sentiment Analysis module can calculate the rough sentiment of statuses
21
+ in the workspace. Each status will get a sentiment score of Negative, Positive
22
+ or Neutral.
23
+
24
+ Please note that sentiment analysis is not very accurate and should always be
25
+ manually reviewed for serious work.
26
+ INFO
27
+ end
28
+
29
+ def run
30
+ statuses = current_workspace.statuses_dataset.where(:sentiment => nil)
31
+ if statuses.empty?
32
+ error("There are no statuses to analyze")
33
+ return false
34
+ end
35
+ analyser = Sentimental.new
36
+ threads = thread_pool(option_setting("THREADS").to_i)
37
+ task("Training the sentiment analyzer...") do
38
+ analyser.load_defaults
39
+ end
40
+ statuses.each do |status|
41
+ threads.process do
42
+ begin
43
+ text = sanitize_text(status.text)
44
+ sentiment = analyser.sentiment(text)
45
+ case sentiment
46
+ when :positive
47
+ info("Positive: ".bold.light_green + Birdwatcher::Util.excerpt(status.text, 80))
48
+ when :negative
49
+ info("Negative: ".bold.light_red + Birdwatcher::Util.excerpt(status.text, 80))
50
+ else
51
+ info(" Neutral: ".bold + Birdwatcher::Util.excerpt(status.text, 80))
52
+ end
53
+ status.sentiment = sentiment.to_s
54
+ status.save
55
+ rescue => e
56
+ error("Sentiment analysis for status #{status.id.bold} failed (#{e.class})")
57
+ end
58
+ end
59
+ end
60
+ threads.shutdown
61
+ end
62
+
63
+ private
64
+
65
+ def sanitize_text(text)
66
+ text.split(" ").map(&:strip).delete_if do |word|
67
+ word.start_with?("@") ||
68
+ word.start_with?(".@")
69
+ word.start_with?("#") ||
70
+ word.downcase.start_with?("http") ||
71
+ %w(rt oh).include?(word.downcase)
72
+ end.join(" ")
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end