empyrean 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,260 @@
1
+ # tweetparser.rb - parses tweets
2
+ #
3
+ # This file is part of Empyrean
4
+ # Copyright (C) 2015 nilsding, pixeldesu
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require 'json'
20
+ require 'empyrean/defaults'
21
+
22
+ module Empyrean
23
+ class TweetParser
24
+ def initialize(options, config)
25
+ @options = options
26
+ @config = config
27
+ end
28
+
29
+ # Parses an array of tweets
30
+ #
31
+ # Returns a dict of things
32
+ def parse(tweets)
33
+ retdict = {
34
+ mentions: {},
35
+ hashtags: {},
36
+ clients: {},
37
+ smileys: {},
38
+ times_of_day: [0] * 24,
39
+ tweet_count: 0,
40
+ retweet_count: 0,
41
+ selftweet_count: 0,
42
+ }
43
+ tweets.each do |tweet|
44
+ parsed_tweet = self.parse_one tweet
45
+
46
+ if parsed_tweet[:retweet] # the tweet was a retweet
47
+ # increase retweeted tweets count
48
+ retdict[:retweet_count] += 1
49
+ else
50
+ parsed_tweet[:mentions].each do |user, data| # add mentions to the mentions dict
51
+ retdict[:mentions][user] ||= { count: 0 }
52
+ retdict[:mentions][user][:count] += data[:count]
53
+ retdict[:mentions][user][:name] ||= data[:name]
54
+ retdict[:mentions][user][:examples] ||= []
55
+ retdict[:mentions][user][:examples] << data[:example]
56
+ end
57
+ parsed_tweet[:hashtags].each do |hashtag, data| # add hashtags to the hashtags dict
58
+ retdict[:hashtags][hashtag] ||= { count: 0 }
59
+ retdict[:hashtags][hashtag][:count] += data[:count]
60
+ retdict[:hashtags][hashtag][:hashtag] ||= data[:hashtag]
61
+ retdict[:hashtags][hashtag][:examples] ||= []
62
+ retdict[:hashtags][hashtag][:examples] << data[:example]
63
+ end
64
+
65
+ parsed_tweet[:smileys].each do |smile, data|
66
+ retdict[:smileys][smile] ||= { count: 0 }
67
+ retdict[:smileys][smile][:frown] ||= data[:frown]
68
+ retdict[:smileys][smile][:count] += data[:count]
69
+ retdict[:smileys][smile][:smiley] ||= data[:smiley]
70
+ retdict[:smileys][smile][:examples] ||= []
71
+ retdict[:smileys][smile][:examples] << data[:example]
72
+ end
73
+
74
+ # increase self tweeted tweets count
75
+ retdict[:selftweet_count] += 1
76
+ end
77
+
78
+ # add client to the clients dict
79
+ client_dict = parsed_tweet[:client][:name]
80
+ retdict[:clients][client_dict] ||= { count: 0 }
81
+ retdict[:clients][client_dict][:count] += 1
82
+ retdict[:clients][client_dict][:name] = parsed_tweet[:client][:name]
83
+ retdict[:clients][client_dict][:url] = parsed_tweet[:client][:url]
84
+
85
+ retdict[:times_of_day][parsed_tweet[:time_of_day]] += 1
86
+
87
+ # increase tweet count
88
+ retdict[:tweet_count] += 1
89
+ end
90
+
91
+ retdict
92
+ end
93
+
94
+ # Parses a single tweet object
95
+ #
96
+ # Returns a dict of things.
97
+ def parse_one(tweet)
98
+ puts "==> #{tweet['id']}" if @options.verbose
99
+ retdict = {
100
+ mentions: {},
101
+ hashtags: {},
102
+ time_of_day: 0,
103
+ retweet: false,
104
+ client: {
105
+ name: "",
106
+ url: "",
107
+ },
108
+ smileys: {}
109
+ }
110
+
111
+ # check if the tweet is actually a retweet and ignore the status text
112
+ unless tweet['retweeted_status'].nil?
113
+ retdict[:retweet] = true
114
+ else
115
+ # scan for mentions
116
+ tweet['text'].scan USERNAME_REGEX do |user|
117
+ hash_user = user[0].downcase
118
+ puts "===> mentioned: #{user[0]}" if @options.verbose
119
+ unless @config[:ignored_users].include? hash_user
120
+ if @config[:renamed_users].include? hash_user.to_sym
121
+ hash_user = @config[:renamed_users][hash_user.to_sym]
122
+ end
123
+ retdict[:mentions][hash_user] ||= {}
124
+ retdict[:mentions][hash_user][:name] ||= user[0]
125
+ retdict[:mentions][hash_user][:count] = retdict[:mentions][hash_user][:count].to_i.succ
126
+ retdict[:mentions][hash_user][:example] ||= { text: tweet['text'], id: tweet['id'] }
127
+ end
128
+ end
129
+
130
+ # scan for hashtags
131
+ tweet['text'].scan HASHTAG_REGEX do |hashtag|
132
+ hash_hashtag = hashtag[0].downcase
133
+ puts "===> hashtag: ##{hashtag[0]}" if @options.verbose
134
+ retdict[:hashtags][hash_hashtag] ||= {}
135
+ retdict[:hashtags][hash_hashtag][:hashtag] ||= hashtag[0]
136
+ retdict[:hashtags][hash_hashtag][:count] = retdict[:hashtags][hash_hashtag][:count].to_i.succ
137
+ retdict[:hashtags][hash_hashtag][:example] ||= { text: tweet['text'], id: tweet['id'] }
138
+ end
139
+
140
+ # Smileys :^)
141
+ eyes = "[xX8;:=%]"
142
+ nose = "[-oc*^]"
143
+ smile_regex = /(>?#{eyes}'?#{nose}[\)pPD\}\]>]|[\(\{\[<]#{nose}'?#{eyes}<?|[;:][\)pPD\}\]\>]|\([;:]|\^[_o-]*\^[';]|\\[o.]\/)/
144
+ frown_regex = /(#{eyes}'?#{nose}[\(\[\\\/\{|]|[\)\]\\\/\}|]#{nose}'?#{eyes}|[;:][\(\/]|[\)D]:|;_+;|T_+T|-[._]+-)/
145
+
146
+ unescaped_tweet = tweet['text'].gsub("&amp;", "&").gsub("&lt;", "<").gsub("&gt;", ">")
147
+
148
+ unescaped_tweet.scan smile_regex do |smile|
149
+ smile = smile[0]
150
+ puts "===> smile: #{smile}" if @options.verbose
151
+ retdict[:smileys][smile] ||= {frown: false}
152
+ retdict[:smileys][smile][:smiley] ||= smile
153
+ retdict[:smileys][smile][:count] = retdict[:smileys][smile][:count].to_i.succ
154
+ retdict[:smileys][smile][:example] ||= { text: tweet['text'], id: tweet['id'] }
155
+ end
156
+
157
+ unescaped_tweet.scan frown_regex do |frown|
158
+ break unless unescaped_tweet !~ /\w+:\/\// # http:// :^)
159
+ frown = frown[0]
160
+ puts "===> frown: #{frown}" if @options.verbose
161
+ retdict[:smileys][frown] ||= {frown: true}
162
+ retdict[:smileys][frown][:smiley] ||= frown
163
+ retdict[:smileys][frown][:count] = retdict[:smileys][frown][:count].to_i.succ
164
+ retdict[:smileys][frown][:example] ||= { text: tweet['text'], id: tweet['id'] }
165
+ end
166
+ end
167
+
168
+ # Tweet source (aka. the client the (re)tweet was made with)
169
+ source_matches = tweet['source'].match SOURCE_REGEX
170
+ retdict[:client][:url] = source_matches[1]
171
+ retdict[:client][:name] = source_matches[2]
172
+
173
+ # Time of day
174
+ retdict[:time_of_day] = (tweet['created_at'].match(/^\d{4}-\d{2}-\d{2} (\d{2})/)[1].to_i + @config[:timezone_difference]) % 24
175
+
176
+ retdict
177
+ end
178
+
179
+ class << self
180
+ # Merges an array which contains dicts returned by self.parse()
181
+ # Increases all counters.
182
+ def merge_parsed(parsed)
183
+ retdict = {
184
+ mentions: {},
185
+ hashtags: {},
186
+ clients: {},
187
+ smileys: {},
188
+ times_of_day: [0] * 24,
189
+ tweet_count: 0,
190
+ retweet_count: 0,
191
+ selftweet_count: 0,
192
+ }
193
+ parsed.each do |elem|
194
+ retdict[:tweet_count] += elem[:tweet_count]
195
+ retdict[:retweet_count] += elem[:retweet_count]
196
+ retdict[:selftweet_count] += elem[:selftweet_count]
197
+
198
+ elem[:mentions].each do |user, data|
199
+ retdict[:mentions][user] ||= { count: 0 }
200
+ retdict[:mentions][user][:count] += data[:count]
201
+ retdict[:mentions][user][:name] = data[:name]
202
+ retdict[:mentions][user][:examples] ||= []
203
+ retdict[:mentions][user][:examples] += data[:examples]
204
+ end
205
+
206
+ elem[:hashtags].each do |hashtag, data|
207
+ retdict[:hashtags][hashtag] ||= { count: 0 }
208
+ retdict[:hashtags][hashtag][:count] += data[:count]
209
+ retdict[:hashtags][hashtag][:hashtag] = data[:hashtag]
210
+ retdict[:hashtags][hashtag][:examples] ||= []
211
+ retdict[:hashtags][hashtag][:examples] += data[:examples]
212
+ end
213
+
214
+ elem[:smileys].each do |smile, data|
215
+ retdict[:smileys][smile] ||= { count: 0 }
216
+ retdict[:smileys][smile][:frown] ||= data[:frown]
217
+ retdict[:smileys][smile][:count] += data[:count]
218
+ retdict[:smileys][smile][:smiley] ||= data[:smiley]
219
+ retdict[:smileys][smile][:examples] ||= []
220
+ retdict[:smileys][smile][:examples] += data[:examples]
221
+ end
222
+
223
+ elem[:clients].each do |client, data|
224
+ retdict[:clients][client] ||= { count: 0 }
225
+ retdict[:clients][client][:count] += data[:count]
226
+ retdict[:clients][client][:name] = data[:name]
227
+ retdict[:clients][client][:url] = data[:url]
228
+ end
229
+
230
+ elem[:times_of_day].each_with_index do |count, index|
231
+ retdict[:times_of_day][index] += elem[:times_of_day][index]
232
+ end
233
+ end
234
+
235
+ # take only one example
236
+ retdict[:mentions].each do |user, data|
237
+ retdict[:mentions][user][:example] = retdict[:mentions][user][:examples].sample
238
+ retdict[:mentions][user].delete(:examples)
239
+ end
240
+ retdict[:hashtags].each do |hashtag, data|
241
+ retdict[:hashtags][hashtag][:example] = retdict[:hashtags][hashtag][:examples].sample
242
+ retdict[:hashtags][hashtag].delete(:examples)
243
+ end
244
+ retdict[:smileys].each do |smile, data|
245
+ retdict[:smileys][smile][:example] = retdict[:smileys][smile][:examples].sample
246
+ retdict[:smileys][smile].delete(:examples)
247
+ end
248
+
249
+ retdict[:mentions] = retdict[:mentions].sort_by { |k, v| v[:count] }.reverse
250
+ retdict[:hashtags] = retdict[:hashtags].sort_by { |k, v| v[:count] }.reverse
251
+ retdict[:clients] = retdict[:clients].sort_by { |k, v| v[:count] }.reverse
252
+ retdict[:smileys] = retdict[:smileys].sort_by { |k, v| v[:count] }.reverse
253
+
254
+ retdict
255
+ end
256
+ end
257
+ end
258
+ end
259
+
260
+ # kate: indent-width 2
data/lib/empyrean.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'empyrean/defaults'
2
+
3
+ module Empyrean
4
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: empyrean
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - nilsding
8
+ - pixeldesu
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-06-05 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: With Empyrean, you can generate full stats of your Twitter account using
15
+ your Twitter archive.
16
+ email:
17
+ - nilsding@nilsding.org
18
+ - andy@pixelde.su
19
+ executables:
20
+ - empyrean
21
+ extensions: []
22
+ extra_rdoc_files: []
23
+ files:
24
+ - ".gitignore"
25
+ - Gemfile
26
+ - Gemfile.lock
27
+ - LICENSE
28
+ - README.md
29
+ - bin/empyrean
30
+ - config.yml.example
31
+ - empyrean.gemspec
32
+ - lib/empyrean.rb
33
+ - lib/empyrean/cli.rb
34
+ - lib/empyrean/configloader.rb
35
+ - lib/empyrean/defaults.rb
36
+ - lib/empyrean/optparser.rb
37
+ - lib/empyrean/templatelister.rb
38
+ - lib/empyrean/templaterenderer.rb
39
+ - lib/empyrean/templates/default.html.erb
40
+ - lib/empyrean/templates/pisg.html.erb
41
+ - lib/empyrean/tweetloader.rb
42
+ - lib/empyrean/tweetparser.rb
43
+ homepage: https://github.com/Leafcat/Empyrean
44
+ licenses:
45
+ - GPLv3
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project:
63
+ rubygems_version: 2.4.6
64
+ signing_key:
65
+ specification_version: 4
66
+ summary: Generates stats using your Twitter archive.
67
+ test_files: []
68
+ has_rdoc: