birdwatcher 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/Rakefile +10 -0
- data/bin/console +42 -0
- data/birdwatcher.gemspec +40 -0
- data/data/english_stopwords.txt +319 -0
- data/data/top100Kenglishwords.txt +100000 -0
- data/db/migrations/001_create_workspaces.rb +11 -0
- data/db/migrations/002_create_users.rb +29 -0
- data/db/migrations/003_create_statuses.rb +28 -0
- data/db/migrations/004_create_mentions.rb +13 -0
- data/db/migrations/005_create_mentions_statuses.rb +8 -0
- data/db/migrations/006_create_hashtags.rb +11 -0
- data/db/migrations/007_create_hashtags_statuses.rb +8 -0
- data/db/migrations/008_create_urls.rb +16 -0
- data/db/migrations/009_create_statuses_urls.rb +8 -0
- data/db/migrations/010_create_klout_topics.rb +10 -0
- data/db/migrations/011_create_klout_topics_users.rb +8 -0
- data/db/migrations/012_create_influencers.rb +10 -0
- data/db/migrations/013_create_influencers_users.rb +8 -0
- data/db/migrations/014_create_influencees.rb +10 -0
- data/db/migrations/015_create_influencees_users.rb +8 -0
- data/exe/birdwatcher +12 -0
- data/lib/birdwatcher/command.rb +78 -0
- data/lib/birdwatcher/commands/back.rb +15 -0
- data/lib/birdwatcher/commands/exit.rb +16 -0
- data/lib/birdwatcher/commands/help.rb +60 -0
- data/lib/birdwatcher/commands/irb.rb +34 -0
- data/lib/birdwatcher/commands/module.rb +106 -0
- data/lib/birdwatcher/commands/query.rb +58 -0
- data/lib/birdwatcher/commands/query_csv.rb +56 -0
- data/lib/birdwatcher/commands/resource.rb +45 -0
- data/lib/birdwatcher/commands/run.rb +19 -0
- data/lib/birdwatcher/commands/schema.rb +116 -0
- data/lib/birdwatcher/commands/set.rb +56 -0
- data/lib/birdwatcher/commands/shell.rb +21 -0
- data/lib/birdwatcher/commands/show.rb +86 -0
- data/lib/birdwatcher/commands/status.rb +114 -0
- data/lib/birdwatcher/commands/unset.rb +37 -0
- data/lib/birdwatcher/commands/use.rb +25 -0
- data/lib/birdwatcher/commands/user.rb +155 -0
- data/lib/birdwatcher/commands/workspace.rb +176 -0
- data/lib/birdwatcher/concerns/concurrency.rb +25 -0
- data/lib/birdwatcher/concerns/core.rb +105 -0
- data/lib/birdwatcher/concerns/outputting.rb +114 -0
- data/lib/birdwatcher/concerns/persistence.rb +101 -0
- data/lib/birdwatcher/concerns/presentation.rb +122 -0
- data/lib/birdwatcher/concerns/util.rb +138 -0
- data/lib/birdwatcher/configuration.rb +63 -0
- data/lib/birdwatcher/configuration_wizard.rb +65 -0
- data/lib/birdwatcher/console.rb +201 -0
- data/lib/birdwatcher/http_client.rb +164 -0
- data/lib/birdwatcher/klout_client.rb +83 -0
- data/lib/birdwatcher/kml.rb +125 -0
- data/lib/birdwatcher/module.rb +253 -0
- data/lib/birdwatcher/modules/statuses/kml.rb +106 -0
- data/lib/birdwatcher/modules/statuses/sentiment.rb +77 -0
- data/lib/birdwatcher/modules/statuses/word_cloud.rb +205 -0
- data/lib/birdwatcher/modules/urls/crawl.rb +138 -0
- data/lib/birdwatcher/modules/urls/most_shared.rb +98 -0
- data/lib/birdwatcher/modules/users/activity_plot.rb +62 -0
- data/lib/birdwatcher/modules/users/import.rb +61 -0
- data/lib/birdwatcher/modules/users/influence_graph.rb +93 -0
- data/lib/birdwatcher/modules/users/klout_id.rb +62 -0
- data/lib/birdwatcher/modules/users/klout_influence.rb +83 -0
- data/lib/birdwatcher/modules/users/klout_score.rb +64 -0
- data/lib/birdwatcher/modules/users/klout_topics.rb +72 -0
- data/lib/birdwatcher/modules/users/social_graph.rb +110 -0
- data/lib/birdwatcher/punchcard.rb +183 -0
- data/lib/birdwatcher/util.rb +83 -0
- data/lib/birdwatcher/version.rb +3 -0
- data/lib/birdwatcher.rb +43 -0
- data/models/hashtag.rb +8 -0
- data/models/influencee.rb +8 -0
- data/models/influencer.rb +8 -0
- data/models/klout_topic.rb +8 -0
- data/models/mention.rb +8 -0
- data/models/status.rb +11 -0
- data/models/url.rb +8 -0
- data/models/user.rb +11 -0
- data/models/workspace.rb +26 -0
- metadata +405 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
class KloutClient < Birdwatcher::HttpClient
|
3
|
+
base_uri "https://api.klout.com/v2"
|
4
|
+
|
5
|
+
# Class initializer
|
6
|
+
#
|
7
|
+
# @param api_key [String] Klout API key
|
8
|
+
# @param options Http client options
|
9
|
+
# @see Birdwatcher::HttpClient
|
10
|
+
def initialize(api_key, options = {})
|
11
|
+
@api_key = api_key
|
12
|
+
@options = {
|
13
|
+
:headers => {
|
14
|
+
"User-Agent" => "Birdwatcher v#{Birdwatcher::VERSION}",
|
15
|
+
"Accept" => "application/json"
|
16
|
+
}
|
17
|
+
}.merge(options)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Get Klout ID of a Twitter user
|
21
|
+
#
|
22
|
+
# @param screen_name [String] Twitter screen name
|
23
|
+
# @return [String] Klout ID or nil
|
24
|
+
# @see https://klout.com/s/developers/v2#identities
|
25
|
+
def get_id(screen_name)
|
26
|
+
response = do_get("/identity.json/twitter?screenName=#{url_encode(screen_name)}&key=#{url_encode(@api_key)}")
|
27
|
+
if response.status == 200
|
28
|
+
JSON.parse(response.body)["id"]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get Klout score of a user
|
33
|
+
#
|
34
|
+
# @param klout_id [String]
|
35
|
+
# @return [Numeric] Klout score or nil
|
36
|
+
# @see https://klout.com/s/developers/v2#scores
|
37
|
+
def get_score(klout_id)
|
38
|
+
response = do_get("/user.json/#{klout_id}/score?key=#{url_encode(@api_key)}")
|
39
|
+
if response.status == 200
|
40
|
+
JSON.parse(response.body)["score"]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Get Klout topics of a user
|
45
|
+
#
|
46
|
+
# @param klout_id [String]
|
47
|
+
# @return [Array] Topics
|
48
|
+
# @see https://klout.com/s/developers/v2#topic
|
49
|
+
def get_topics(klout_id)
|
50
|
+
response = do_get("/user.json/#{klout_id}/topics?key=#{url_encode(@api_key)}")
|
51
|
+
if response.status == 200
|
52
|
+
JSON.parse(response.body).map { |t| t["displayName"] }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Get Klout influence graph of a user
|
57
|
+
#
|
58
|
+
# @param klout_id [String]
|
59
|
+
# @return [Hash] +:influencers:+ contains screen names of influencers, +:influencees+ contains screen names of influencees
|
60
|
+
# @see https://klout.com/s/developers/v2#influence
|
61
|
+
def get_influence(klout_id)
|
62
|
+
response = do_get("/user.json/#{klout_id}/influence?key=#{url_encode(@api_key)}")
|
63
|
+
if response.status == 200
|
64
|
+
body = JSON.parse(response.body)
|
65
|
+
{
|
66
|
+
:influencers => body["myInfluencers"].map { |i| i["entity"]["payload"]["nick"] },
|
67
|
+
:influencees => body["myInfluencees"].map { |i| i["entity"]["payload"]["nick"] }
|
68
|
+
}
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
# URL encode a string
|
75
|
+
# @private
|
76
|
+
#
|
77
|
+
# @param string [String]
|
78
|
+
# @return [String] URL encoded string
|
79
|
+
def url_encode(string)
|
80
|
+
CGI.escape(string.to_s)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
# KML Document generator
|
3
|
+
#
|
4
|
+
# KML is a file format used to display geographic data in an Earth browser
|
5
|
+
# such as Google Earth. You can create KML files to pinpoint locations, add
|
6
|
+
# image overlays, and expose rich data in new ways. KML is an international
|
7
|
+
# standard maintained by the Open Geospatial Consortium, Inc. (OGC).
|
8
|
+
#
|
9
|
+
# This class supports generating basic KML documents with Placemarks and Folders.
|
10
|
+
#
|
11
|
+
# @note Attribute values ARE NOT automatically escaped. All values will have to be given in an HTML escaped fashion if there is a risk that they might contain unexpected or dangerous HTML.
|
12
|
+
# @see https://developers.google.com/kml/
|
13
|
+
class KML
|
14
|
+
# KML document header
|
15
|
+
DOCUMENT_HEADER =
|
16
|
+
<<-HEAD
|
17
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
18
|
+
<kml xmlns="http://www.opengis.net/kml/2.2">
|
19
|
+
<Document>
|
20
|
+
HEAD
|
21
|
+
|
22
|
+
# KML document footer
|
23
|
+
DOCUMENT_FOOTER =
|
24
|
+
<<-FOOT
|
25
|
+
</Document>
|
26
|
+
</kml>
|
27
|
+
FOOT
|
28
|
+
|
29
|
+
class Error < StandardError; end
|
30
|
+
class UnknownFolderError < Birdwatcher::KML::Error; end
|
31
|
+
|
32
|
+
# Class initializer
|
33
|
+
#
|
34
|
+
# @param attributes [Hash] Document attributes
|
35
|
+
# @see https://developers.google.com/kml/documentation/kmlreference#document
|
36
|
+
def initialize(attributes = {})
|
37
|
+
@attributes = attributes
|
38
|
+
@folders = {}
|
39
|
+
@placemarks = []
|
40
|
+
end
|
41
|
+
|
42
|
+
# Add a Placemark
|
43
|
+
#
|
44
|
+
# @param attributes [Hash] Placemark attributes
|
45
|
+
# @see https://developers.google.com/kml/documentation/kmlreference#placemark
|
46
|
+
def add_placemark(attributes)
|
47
|
+
@placemarks << attributes
|
48
|
+
end
|
49
|
+
|
50
|
+
# Add a Folder
|
51
|
+
#
|
52
|
+
# @param id [String] Folder ID
|
53
|
+
# @param attributes [Hash] Folder attributes
|
54
|
+
#
|
55
|
+
# @see https://developers.google.com/kml/documentation/kmlreference#folder
|
56
|
+
def add_folder(id, attributes)
|
57
|
+
@folders[id] = {
|
58
|
+
:placemarks => []
|
59
|
+
}.merge(attributes)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add a Placemark to a Folder
|
63
|
+
#
|
64
|
+
# @param folder_id [String]
|
65
|
+
# @param attributes [Hash] Placemark attributes
|
66
|
+
#
|
67
|
+
# @raise [Birdwatcher::KML::UnknownFolderError] if folder doesn't exist
|
68
|
+
# @see https://developers.google.com/kml/documentation/kmlreference#placemark
|
69
|
+
def add_placemark_to_folder(folder_id, attributes)
|
70
|
+
fail(UnknownFolderError, "There is no folder with id: #{folder_id}") unless @folders.key?(folder_id)
|
71
|
+
@folders[folder_id][:placemarks] << attributes
|
72
|
+
end
|
73
|
+
|
74
|
+
# Generate the KML document
|
75
|
+
#
|
76
|
+
# @return the final KML document
|
77
|
+
def generate
|
78
|
+
output = generate_document_header
|
79
|
+
@folders.each_pair { |id, attributes| output += generate_folder(id, attributes) }
|
80
|
+
output += @placemarks.map { |p| generate_placemark(p) }.join
|
81
|
+
output += generate_document_footer
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
# Generate document header
|
87
|
+
# @private
|
88
|
+
def generate_document_header
|
89
|
+
header = DOCUMENT_HEADER
|
90
|
+
@attributes.each_pair { |k, v| header += "<#{k}>#{escape(v)}</#{k}>\n" }
|
91
|
+
header
|
92
|
+
end
|
93
|
+
|
94
|
+
# Generate document footer
|
95
|
+
# @private
|
96
|
+
def generate_document_footer
|
97
|
+
DOCUMENT_FOOTER
|
98
|
+
end
|
99
|
+
|
100
|
+
# Generate Placemark element
|
101
|
+
# @private
|
102
|
+
def generate_placemark(attributes)
|
103
|
+
placemark = attributes.key?(:id) ? "<Placemark id='#{escape(attributes[:id])}'>" : "<Placemark>"
|
104
|
+
attributes.delete(:id)
|
105
|
+
attributes.each_pair { |k, v| placemark += "<#{k}>#{v}</#{k}>\n" }
|
106
|
+
placemark += "</Placemark>\n"
|
107
|
+
end
|
108
|
+
|
109
|
+
# Generate Folder element
|
110
|
+
# @private
|
111
|
+
def generate_folder(id, attributes)
|
112
|
+
placemarks = attributes.delete(:placemarks)
|
113
|
+
folder = "<Folder id='#{escape(id)}'>"
|
114
|
+
attributes.each_pair { |k, v| folder += "<#{k}>#{escape(v)}</#{k}>\n" }
|
115
|
+
folder += placemarks.map { |p| generate_placemark(p) }.join
|
116
|
+
folder += "</Folder>\n"
|
117
|
+
end
|
118
|
+
|
119
|
+
# HTML escape a string
|
120
|
+
# @private
|
121
|
+
def escape(string)
|
122
|
+
CGI.escapeHTML(string.to_s)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
class Module
|
3
|
+
class Error < StandardError; end
|
4
|
+
class InvalidMetadataError < Error; end
|
5
|
+
class MetadataNotSetError < Error; end
|
6
|
+
class UnknownOptionError < Error; end
|
7
|
+
|
8
|
+
include Birdwatcher::Concerns::Core
|
9
|
+
include Birdwatcher::Concerns::Util
|
10
|
+
include Birdwatcher::Concerns::Outputting
|
11
|
+
include Birdwatcher::Concerns::Presentation
|
12
|
+
include Birdwatcher::Concerns::Persistence
|
13
|
+
include Birdwatcher::Concerns::Concurrency
|
14
|
+
|
15
|
+
# Path to modules directory
|
16
|
+
# @private
|
17
|
+
MODULE_PATH = File.join(File.dirname(__FILE__), "modules").freeze
|
18
|
+
|
19
|
+
# Get the module's file path
|
20
|
+
# @private
|
21
|
+
def self._file_path
|
22
|
+
@_file_path
|
23
|
+
end
|
24
|
+
|
25
|
+
# Set the module's file path
|
26
|
+
# @private
|
27
|
+
#
|
28
|
+
# @param path [String] file path
|
29
|
+
def self._file_path=(path)
|
30
|
+
@_file_path = path
|
31
|
+
end
|
32
|
+
|
33
|
+
# Automatically set the module file path
|
34
|
+
# @private
|
35
|
+
def self.inherited(k)
|
36
|
+
k._file_path = caller.first[/^[^:]+/]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Get the module's meta data
|
40
|
+
# @private
|
41
|
+
#
|
42
|
+
# @return [Hash] meta data
|
43
|
+
# @raise [Birdwatcher::Model::MetadataNotSetError] if meta data has not been set
|
44
|
+
def self.meta
|
45
|
+
@meta || fail(MetadataNotSetError, "Metadata has not been set")
|
46
|
+
end
|
47
|
+
|
48
|
+
# Set the module's meta data
|
49
|
+
#
|
50
|
+
# @param meta [Hash] meta data
|
51
|
+
#
|
52
|
+
# The module's meta data is used by Birdwatcher to provide the user with
|
53
|
+
# useful information such as name, a short description of what it does as
|
54
|
+
# well as the author of the module in case they have any questions, etc.
|
55
|
+
#
|
56
|
+
# The meta data MUST be a hash and MUST contain at least the following keys:
|
57
|
+
# * +:name+: The module's name (e.g. User Importer)
|
58
|
+
# * +:description+: A short description of what the module can do
|
59
|
+
# * +:author+: Your name and email (e.g. John Doe <john@doe.com>)
|
60
|
+
# * +:options+: A hash of options for the module
|
61
|
+
#
|
62
|
+
# The +:options+ meta data key MUST be a Hash where each key is the option name
|
63
|
+
# in UPPERCASE. The value MUST be a Hash and MUST contain at least the following
|
64
|
+
# keys:
|
65
|
+
# * +:value+: The default value of the option setting (set to +nil+ if none)
|
66
|
+
# * +:description+: A short description of the option setting
|
67
|
+
# * +:required+: Set to +true+ if the option setting is required to be set
|
68
|
+
#
|
69
|
+
# If the option setting is a boolean flag, the +:boolean+ key can be set to
|
70
|
+
# +true+ to have Birdwatcher automatically parse "truthy" and "falsy" values
|
71
|
+
# (e.g. "true", "1", "yes", "no", "0", etc) into boolean true or false
|
72
|
+
#
|
73
|
+
# If an option setting's +:required+ key is set to +true+, Birdwatcher will
|
74
|
+
# automatically prevent running of the module if any of those option settings
|
75
|
+
# contain +nil+ (have not been set).
|
76
|
+
#
|
77
|
+
# @example Example meta data:
|
78
|
+
# self.meta = {
|
79
|
+
# :name => "User Importer",
|
80
|
+
# :description => "Import users from a file containing screen names",
|
81
|
+
# :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
82
|
+
# :options => {
|
83
|
+
# "FILE" => {
|
84
|
+
# :value => nil,
|
85
|
+
# :description => "File to read screen names from.",
|
86
|
+
# :required => true
|
87
|
+
# }
|
88
|
+
# }
|
89
|
+
# }
|
90
|
+
def self.meta=(meta)
|
91
|
+
validate_metadata(meta)
|
92
|
+
@meta = meta
|
93
|
+
end
|
94
|
+
|
95
|
+
# Get a module by it's path
|
96
|
+
# @private
|
97
|
+
#
|
98
|
+
# @param path [String] Module's short path
|
99
|
+
#
|
100
|
+
# @return [Birdwatcher::Module] descendant
|
101
|
+
def self.module_by_path(path)
|
102
|
+
modules[path]
|
103
|
+
end
|
104
|
+
|
105
|
+
# Get module short paths
|
106
|
+
# @private
|
107
|
+
def self.module_paths
|
108
|
+
modules.keys
|
109
|
+
end
|
110
|
+
|
111
|
+
# Get the module's short path
|
112
|
+
# @private
|
113
|
+
def self.path
|
114
|
+
@_file_path.gsub("#{MODULE_PATH}/", "").gsub(".rb", "")
|
115
|
+
end
|
116
|
+
|
117
|
+
# The module's detailed information and usage
|
118
|
+
#
|
119
|
+
# @abstract
|
120
|
+
#
|
121
|
+
# This method can be overwritten by modules to provide additional information
|
122
|
+
# and usage to the user. The method will be called when the user envokes the
|
123
|
+
# +show info+ on the module.
|
124
|
+
#
|
125
|
+
# The method must return a string.
|
126
|
+
#
|
127
|
+
# @return [String] additional module information
|
128
|
+
def self.info; end
|
129
|
+
|
130
|
+
# Get all Birdwatcher::Module descendants
|
131
|
+
# @private
|
132
|
+
#
|
133
|
+
# @return [Array] module classes
|
134
|
+
def self.descendants
|
135
|
+
ObjectSpace.each_object(Class).select { |klass| klass < self }
|
136
|
+
end
|
137
|
+
|
138
|
+
# Get all Birdwatcher modules sorted by their short path
|
139
|
+
# @private
|
140
|
+
#
|
141
|
+
# @return [Hash] module classes where the key is the module's short path
|
142
|
+
def self.modules
|
143
|
+
if !@modules
|
144
|
+
@modules = {}
|
145
|
+
descendants.each do |descendant|
|
146
|
+
@modules[descendant.path] = descendant
|
147
|
+
end
|
148
|
+
end
|
149
|
+
@modules
|
150
|
+
end
|
151
|
+
|
152
|
+
# Execute a module and catch any exceptions raised
|
153
|
+
# @private
|
154
|
+
#
|
155
|
+
# Calls the module's {run} method if options are valid and catches any
|
156
|
+
# exceptions raised to display an error to the user.
|
157
|
+
def execute
|
158
|
+
validate_options && run
|
159
|
+
rescue => e
|
160
|
+
error("#{e.class}".bold + ": #{e.message}")
|
161
|
+
puts e.backtrace.join("\n")
|
162
|
+
end
|
163
|
+
|
164
|
+
# The module's run method
|
165
|
+
#
|
166
|
+
# @abstract
|
167
|
+
#
|
168
|
+
# The run method must be overwritten by modules to perform the actual work.
|
169
|
+
# The method is called when the user envokes the +run+ command in the
|
170
|
+
# Birdwatcher console.
|
171
|
+
#
|
172
|
+
# If the module fails to run for whatever reason, e.g. insufficient data, the
|
173
|
+
# method should return +false+.
|
174
|
+
def run
|
175
|
+
fail NotImplementedError, "Modules must implement #run method"
|
176
|
+
end
|
177
|
+
|
178
|
+
protected
|
179
|
+
|
180
|
+
# Validate option settings
|
181
|
+
# @private
|
182
|
+
#
|
183
|
+
# @return [Boolean] true if meta data is valid and false otherwise
|
184
|
+
def validate_options
|
185
|
+
options.each_pair do |key, value|
|
186
|
+
if value[:required] && value[:value].nil?
|
187
|
+
error("Setting for required option has not been set: #{key.bold}")
|
188
|
+
return false
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
# Validate module meta data
|
194
|
+
# @private
|
195
|
+
#
|
196
|
+
# @param meta [Hash] meta data
|
197
|
+
#
|
198
|
+
# @raise [Birdwatcher::Module::InvalidMetadataError] if meta data is not valid.
|
199
|
+
def self.validate_metadata(meta)
|
200
|
+
fail InvalidMetadataError, "Metadata is not a hash" unless meta.is_a?(Hash)
|
201
|
+
fail InvalidMetadataError, "Metadata is empty" if meta.empty?
|
202
|
+
fail InvalidMetadataError, "Metadata is missing key: name" unless meta.key?(:name)
|
203
|
+
fail InvalidMetadataError, "Metadata is missing key: description" unless meta.key?(:description)
|
204
|
+
fail InvalidMetadataError, "Metadata is missing key: author" unless meta.key?(:author)
|
205
|
+
fail InvalidMetadataError, "Metadata is missing key: options" unless meta.key?(:options)
|
206
|
+
fail InvalidMetadataError, "Metadata name is not a string" unless meta[:name].is_a?(String)
|
207
|
+
fail InvalidMetadataError, "Metadata description is not a string" unless meta[:description].is_a?(String)
|
208
|
+
fail InvalidMetadataError, "Metadata author is not a string" unless meta[:author].is_a?(String)
|
209
|
+
validate_metadata_options(meta[:options])
|
210
|
+
end
|
211
|
+
|
212
|
+
# Validate meta data module options
|
213
|
+
# @private
|
214
|
+
#
|
215
|
+
# @param options [Hash] options
|
216
|
+
#
|
217
|
+
# Automatically called by {validate_metadata}
|
218
|
+
#
|
219
|
+
# @raise [Birdwatcher::Module::InvalidMetadataError] if options hash is not valid.
|
220
|
+
def self.validate_metadata_options(options)
|
221
|
+
fail InvalidMetadataError, "Metadata options is not a hash" unless options.is_a?(Hash)
|
222
|
+
options.each_pair do |key, value|
|
223
|
+
fail("Option key #{key} must be all uppercase") unless (key == key.upcase)
|
224
|
+
fail("Option value for #{key} is not a hash") unless value.is_a?(Hash)
|
225
|
+
fail("Option value for #{key} is missing key: value") unless value.key?(:value)
|
226
|
+
fail("Option value for #{key} is missing key: description") unless value.key?(:description)
|
227
|
+
fail("Option value for #{key} is missing key: required") unless value.key?(:required)
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# Get the module's options hash
|
232
|
+
# @private
|
233
|
+
#
|
234
|
+
# @return [Hash] options meta data hash.
|
235
|
+
def options
|
236
|
+
self.class.meta[:options]
|
237
|
+
end
|
238
|
+
|
239
|
+
# Get an option setting
|
240
|
+
#
|
241
|
+
# @example getting option settings
|
242
|
+
# option_setting("DEST")
|
243
|
+
# option_setting("USERS")
|
244
|
+
#
|
245
|
+
# @return option setting
|
246
|
+
# @raise [Birdwatcher::Module::UnknownOptionError] if option is unknown
|
247
|
+
def option_setting(option)
|
248
|
+
option = option.to_s.upcase
|
249
|
+
fail UnknownOptionError, "Unknown module option: #{option}" unless options.keys.include?(option)
|
250
|
+
options[option][:value]
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Statuses
|
4
|
+
class Kml < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "KML Document",
|
7
|
+
:description => "Creates a KML document of statuses with Geo locations",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"DEST" => {
|
11
|
+
:value => nil,
|
12
|
+
:description => "Destination file",
|
13
|
+
:required => true
|
14
|
+
},
|
15
|
+
"USERS" => {
|
16
|
+
:value => nil,
|
17
|
+
:description => "Space-separated list of screen names (all users if empty)",
|
18
|
+
:required => false
|
19
|
+
}
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
def self.info
|
24
|
+
<<-INFO
|
25
|
+
KML is a file format used to display geographic data in an Earth browser such as
|
26
|
+
Google Earth. You can create KML files to pinpoint locations, add image overlays,
|
27
|
+
and expose rich data in new ways. KML is an international standard maintained by
|
28
|
+
the Open Geospatial Consortium, Inc. (OGC).
|
29
|
+
|
30
|
+
This module can generate a KML document containing all statuses with geo information
|
31
|
+
which can be loaded into an application like Google Earth to browse and analyze
|
32
|
+
statuses.
|
33
|
+
|
34
|
+
The module supports mapping statuses from all or specific users.
|
35
|
+
INFO
|
36
|
+
end
|
37
|
+
|
38
|
+
def run
|
39
|
+
if option_setting("USERS")
|
40
|
+
users = current_workspace.users_dataset
|
41
|
+
.where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip))
|
42
|
+
.order(:screen_name)
|
43
|
+
else
|
44
|
+
users = current_workspace.users_dataset.order(:screen_name)
|
45
|
+
end
|
46
|
+
if users.empty?
|
47
|
+
error("There are no users to process")
|
48
|
+
return false
|
49
|
+
end
|
50
|
+
kml_document = Birdwatcher::KML.new(
|
51
|
+
:name => "Statuses with geo locations"
|
52
|
+
)
|
53
|
+
users.each do |user|
|
54
|
+
statuses = user.statuses_dataset.where(:geo => true).order(Sequel.desc(:posted_at)).eager(:user)
|
55
|
+
if statuses.count.zero?
|
56
|
+
warn("User #{user.screen_name.bold} has no statuses with geo location; skipping")
|
57
|
+
next
|
58
|
+
end
|
59
|
+
kml_document.add_folder(user.screen_name,
|
60
|
+
:name => "#{user.name} (@#{user.screen_name})",
|
61
|
+
:description => "Statuses from #{user.screen_name}"
|
62
|
+
)
|
63
|
+
statuses.each do |status|
|
64
|
+
kml_document.add_placemark_to_folder(user.screen_name,
|
65
|
+
:id => status.twitter_id,
|
66
|
+
:name => "@#{escape_html(user.screen_name)}, #{format_date(status.posted_at)}",
|
67
|
+
:description => make_status_description(status),
|
68
|
+
:Snippet => escape_html(excerpt(status.text, 80)),
|
69
|
+
:Style => "<Icon><href>#{escape_html(user.profile_image_url)}</href></Icon>",
|
70
|
+
:Point => "<coordinates>#{escape_html(status.latitude)},#{escape_html(status.longitude)}</coordinates>",
|
71
|
+
:address => "#{escape_html(status.place_name)}, #{escape_html(status.place_country)}",
|
72
|
+
:TimeStamp => escape_html(status.posted_at.strftime('%Y-%m-%dT%l:%M:%S%z'))
|
73
|
+
)
|
74
|
+
end
|
75
|
+
info "Added #{pluralize(statuses.count, 'status', 'statuses')} from #{user.screen_name.bold}"
|
76
|
+
end
|
77
|
+
File.write(option_setting("DEST"), kml_document.generate)
|
78
|
+
info("Wrote KML document to #{option_setting('DEST').bold}")
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def make_status_description(status)
|
84
|
+
"<![CDATA[\n" +
|
85
|
+
" <table>\n" +
|
86
|
+
" <tr>\n" +
|
87
|
+
" <td style='vertical-align:top'>\n" +
|
88
|
+
" <img src='#{escape_html(status.user.profile_image_url)}' alt='#{escape_html(status.user.screen_name)}' />\n" +
|
89
|
+
" </td>\n" +
|
90
|
+
" <td>\n" +
|
91
|
+
" <strong>#{escape_html(status.user.name)}</strong> <span style='color: #8899a6'>@#{escape_html(status.user.screen_name)} · <a href='https://twitter.com/#{escape_html(status.user.screen_name)}/status/#{escape_html(status.twitter_id)}' style='color: inherit'>#{format_date(status.posted_at)}</a></span><br />\n" +
|
92
|
+
" <p style='font-size: 16px'>#{escape_html(status.text)}</p>\n" +
|
93
|
+
" <p style='font-style: italic'>#{escape_html(status.place_name)}, #{escape_html(status.place_country)}</p>\n" +
|
94
|
+
" </td>\n" +
|
95
|
+
" </tr>\n" +
|
96
|
+
" </table>\n" +
|
97
|
+
"]]>\n"
|
98
|
+
end
|
99
|
+
|
100
|
+
def format_date(time)
|
101
|
+
time.strftime("%b %d, %Y")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Birdwatcher
|
2
|
+
module Modules
|
3
|
+
module Statuses
|
4
|
+
class Sentiment < Birdwatcher::Module
|
5
|
+
self.meta = {
|
6
|
+
:name => "Status Sentiment Analysis",
|
7
|
+
:description => "Enrich statuses with sentiment score",
|
8
|
+
:author => "Michael Henriksen <michenriksen@neomailbox.ch>",
|
9
|
+
:options => {
|
10
|
+
"THREADS" => {
|
11
|
+
:value => Birdwatcher::Concerns::Concurrency::DEFAULT_THREAD_POOL_SIZE,
|
12
|
+
:description => "Number of concurrent threads",
|
13
|
+
:required => false
|
14
|
+
}
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
def self.info
|
19
|
+
<<-INFO
|
20
|
+
The Status Sentiment Analysis module can calculate the rough sentiment of statuses
|
21
|
+
in the workspace. Each status will get a sentiment score of Negative, Positive
|
22
|
+
or Neutral.
|
23
|
+
|
24
|
+
Please note that sentiment analysis is not very accurate and should always be
|
25
|
+
manually reviewed for serious work.
|
26
|
+
INFO
|
27
|
+
end
|
28
|
+
|
29
|
+
def run
|
30
|
+
statuses = current_workspace.statuses_dataset.where(:sentiment => nil)
|
31
|
+
if statuses.empty?
|
32
|
+
error("There are no statuses to analyze")
|
33
|
+
return false
|
34
|
+
end
|
35
|
+
analyser = Sentimental.new
|
36
|
+
threads = thread_pool(option_setting("THREADS").to_i)
|
37
|
+
task("Training the sentiment analyzer...") do
|
38
|
+
analyser.load_defaults
|
39
|
+
end
|
40
|
+
statuses.each do |status|
|
41
|
+
threads.process do
|
42
|
+
begin
|
43
|
+
text = sanitize_text(status.text)
|
44
|
+
sentiment = analyser.sentiment(text)
|
45
|
+
case sentiment
|
46
|
+
when :positive
|
47
|
+
info("Positive: ".bold.light_green + Birdwatcher::Util.excerpt(status.text, 80))
|
48
|
+
when :negative
|
49
|
+
info("Negative: ".bold.light_red + Birdwatcher::Util.excerpt(status.text, 80))
|
50
|
+
else
|
51
|
+
info(" Neutral: ".bold + Birdwatcher::Util.excerpt(status.text, 80))
|
52
|
+
end
|
53
|
+
status.sentiment = sentiment.to_s
|
54
|
+
status.save
|
55
|
+
rescue => e
|
56
|
+
error("Sentiment analysis for status #{status.id.bold} failed (#{e.class})")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
threads.shutdown
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def sanitize_text(text)
|
66
|
+
text.split(" ").map(&:strip).delete_if do |word|
|
67
|
+
word.start_with?("@") ||
|
68
|
+
word.start_with?(".@")
|
69
|
+
word.start_with?("#") ||
|
70
|
+
word.downcase.start_with?("http") ||
|
71
|
+
%w(rt oh).include?(word.downcase)
|
72
|
+
end.join(" ")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|