empyrean 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,260 @@
1
+ # tweetparser.rb - parses tweets
2
+ #
3
+ # This file is part of Empyrean
4
+ # Copyright (C) 2015 nilsding, pixeldesu
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require 'json'
20
+ require 'empyrean/defaults'
21
+
22
+ module Empyrean
23
+ class TweetParser
24
+ def initialize(options, config)
25
+ @options = options
26
+ @config = config
27
+ end
28
+
29
+ # Parses an array of tweets
30
+ #
31
+ # Returns a dict of things
32
+ def parse(tweets)
33
+ retdict = {
34
+ mentions: {},
35
+ hashtags: {},
36
+ clients: {},
37
+ smileys: {},
38
+ times_of_day: [0] * 24,
39
+ tweet_count: 0,
40
+ retweet_count: 0,
41
+ selftweet_count: 0,
42
+ }
43
+ tweets.each do |tweet|
44
+ parsed_tweet = self.parse_one tweet
45
+
46
+ if parsed_tweet[:retweet] # the tweet was a retweet
47
+ # increase retweeted tweets count
48
+ retdict[:retweet_count] += 1
49
+ else
50
+ parsed_tweet[:mentions].each do |user, data| # add mentions to the mentions dict
51
+ retdict[:mentions][user] ||= { count: 0 }
52
+ retdict[:mentions][user][:count] += data[:count]
53
+ retdict[:mentions][user][:name] ||= data[:name]
54
+ retdict[:mentions][user][:examples] ||= []
55
+ retdict[:mentions][user][:examples] << data[:example]
56
+ end
57
+ parsed_tweet[:hashtags].each do |hashtag, data| # add hashtags to the hashtags dict
58
+ retdict[:hashtags][hashtag] ||= { count: 0 }
59
+ retdict[:hashtags][hashtag][:count] += data[:count]
60
+ retdict[:hashtags][hashtag][:hashtag] ||= data[:hashtag]
61
+ retdict[:hashtags][hashtag][:examples] ||= []
62
+ retdict[:hashtags][hashtag][:examples] << data[:example]
63
+ end
64
+
65
+ parsed_tweet[:smileys].each do |smile, data|
66
+ retdict[:smileys][smile] ||= { count: 0 }
67
+ retdict[:smileys][smile][:frown] ||= data[:frown]
68
+ retdict[:smileys][smile][:count] += data[:count]
69
+ retdict[:smileys][smile][:smiley] ||= data[:smiley]
70
+ retdict[:smileys][smile][:examples] ||= []
71
+ retdict[:smileys][smile][:examples] << data[:example]
72
+ end
73
+
74
+ # increase self tweeted tweets count
75
+ retdict[:selftweet_count] += 1
76
+ end
77
+
78
+ # add client to the clients dict
79
+ client_dict = parsed_tweet[:client][:name]
80
+ retdict[:clients][client_dict] ||= { count: 0 }
81
+ retdict[:clients][client_dict][:count] += 1
82
+ retdict[:clients][client_dict][:name] = parsed_tweet[:client][:name]
83
+ retdict[:clients][client_dict][:url] = parsed_tweet[:client][:url]
84
+
85
+ retdict[:times_of_day][parsed_tweet[:time_of_day]] += 1
86
+
87
+ # increase tweet count
88
+ retdict[:tweet_count] += 1
89
+ end
90
+
91
+ retdict
92
+ end
93
+
94
+ # Parses a single tweet object
95
+ #
96
+ # Returns a dict of things.
97
+ def parse_one(tweet)
98
+ puts "==> #{tweet['id']}" if @options.verbose
99
+ retdict = {
100
+ mentions: {},
101
+ hashtags: {},
102
+ time_of_day: 0,
103
+ retweet: false,
104
+ client: {
105
+ name: "",
106
+ url: "",
107
+ },
108
+ smileys: {}
109
+ }
110
+
111
+ # check if the tweet is actually a retweet and ignore the status text
112
+ unless tweet['retweeted_status'].nil?
113
+ retdict[:retweet] = true
114
+ else
115
+ # scan for mentions
116
+ tweet['text'].scan USERNAME_REGEX do |user|
117
+ hash_user = user[0].downcase
118
+ puts "===> mentioned: #{user[0]}" if @options.verbose
119
+ unless @config[:ignored_users].include? hash_user
120
+ if @config[:renamed_users].include? hash_user.to_sym
121
+ hash_user = @config[:renamed_users][hash_user.to_sym]
122
+ end
123
+ retdict[:mentions][hash_user] ||= {}
124
+ retdict[:mentions][hash_user][:name] ||= user[0]
125
+ retdict[:mentions][hash_user][:count] = retdict[:mentions][hash_user][:count].to_i.succ
126
+ retdict[:mentions][hash_user][:example] ||= { text: tweet['text'], id: tweet['id'] }
127
+ end
128
+ end
129
+
130
+ # scan for hashtags
131
+ tweet['text'].scan HASHTAG_REGEX do |hashtag|
132
+ hash_hashtag = hashtag[0].downcase
133
+ puts "===> hashtag: ##{hashtag[0]}" if @options.verbose
134
+ retdict[:hashtags][hash_hashtag] ||= {}
135
+ retdict[:hashtags][hash_hashtag][:hashtag] ||= hashtag[0]
136
+ retdict[:hashtags][hash_hashtag][:count] = retdict[:hashtags][hash_hashtag][:count].to_i.succ
137
+ retdict[:hashtags][hash_hashtag][:example] ||= { text: tweet['text'], id: tweet['id'] }
138
+ end
139
+
140
+ # Smileys :^)
141
+ eyes = "[xX8;:=%]"
142
+ nose = "[-oc*^]"
143
+ smile_regex = /(>?#{eyes}'?#{nose}[\)pPD\}\]>]|[\(\{\[<]#{nose}'?#{eyes}<?|[;:][\)pPD\}\]\>]|\([;:]|\^[_o-]*\^[';]|\\[o.]\/)/
144
+ frown_regex = /(#{eyes}'?#{nose}[\(\[\\\/\{|]|[\)\]\\\/\}|]#{nose}'?#{eyes}|[;:][\(\/]|[\)D]:|;_+;|T_+T|-[._]+-)/
145
+
146
+ unescaped_tweet = tweet['text'].gsub("&amp;", "&").gsub("&lt;", "<").gsub("&gt;", ">")
147
+
148
+ unescaped_tweet.scan smile_regex do |smile|
149
+ smile = smile[0]
150
+ puts "===> smile: #{smile}" if @options.verbose
151
+ retdict[:smileys][smile] ||= {frown: false}
152
+ retdict[:smileys][smile][:smiley] ||= smile
153
+ retdict[:smileys][smile][:count] = retdict[:smileys][smile][:count].to_i.succ
154
+ retdict[:smileys][smile][:example] ||= { text: tweet['text'], id: tweet['id'] }
155
+ end
156
+
157
+ unescaped_tweet.scan frown_regex do |frown|
158
+ break unless unescaped_tweet !~ /\w+:\/\// # http:// :^)
159
+ frown = frown[0]
160
+ puts "===> frown: #{frown}" if @options.verbose
161
+ retdict[:smileys][frown] ||= {frown: true}
162
+ retdict[:smileys][frown][:smiley] ||= frown
163
+ retdict[:smileys][frown][:count] = retdict[:smileys][frown][:count].to_i.succ
164
+ retdict[:smileys][frown][:example] ||= { text: tweet['text'], id: tweet['id'] }
165
+ end
166
+ end
167
+
168
+ # Tweet source (aka. the client the (re)tweet was made with)
169
+ source_matches = tweet['source'].match SOURCE_REGEX
170
+ retdict[:client][:url] = source_matches[1]
171
+ retdict[:client][:name] = source_matches[2]
172
+
173
+ # Time of day
174
+ retdict[:time_of_day] = (tweet['created_at'].match(/^\d{4}-\d{2}-\d{2} (\d{2})/)[1].to_i + @config[:timezone_difference]) % 24
175
+
176
+ retdict
177
+ end
178
+
179
+ class << self
180
+ # Merges an array which contains dicts returned by self.parse()
181
+ # Increases all counters.
182
+ def merge_parsed(parsed)
183
+ retdict = {
184
+ mentions: {},
185
+ hashtags: {},
186
+ clients: {},
187
+ smileys: {},
188
+ times_of_day: [0] * 24,
189
+ tweet_count: 0,
190
+ retweet_count: 0,
191
+ selftweet_count: 0,
192
+ }
193
+ parsed.each do |elem|
194
+ retdict[:tweet_count] += elem[:tweet_count]
195
+ retdict[:retweet_count] += elem[:retweet_count]
196
+ retdict[:selftweet_count] += elem[:selftweet_count]
197
+
198
+ elem[:mentions].each do |user, data|
199
+ retdict[:mentions][user] ||= { count: 0 }
200
+ retdict[:mentions][user][:count] += data[:count]
201
+ retdict[:mentions][user][:name] = data[:name]
202
+ retdict[:mentions][user][:examples] ||= []
203
+ retdict[:mentions][user][:examples] += data[:examples]
204
+ end
205
+
206
+ elem[:hashtags].each do |hashtag, data|
207
+ retdict[:hashtags][hashtag] ||= { count: 0 }
208
+ retdict[:hashtags][hashtag][:count] += data[:count]
209
+ retdict[:hashtags][hashtag][:hashtag] = data[:hashtag]
210
+ retdict[:hashtags][hashtag][:examples] ||= []
211
+ retdict[:hashtags][hashtag][:examples] += data[:examples]
212
+ end
213
+
214
+ elem[:smileys].each do |smile, data|
215
+ retdict[:smileys][smile] ||= { count: 0 }
216
+ retdict[:smileys][smile][:frown] ||= data[:frown]
217
+ retdict[:smileys][smile][:count] += data[:count]
218
+ retdict[:smileys][smile][:smiley] ||= data[:smiley]
219
+ retdict[:smileys][smile][:examples] ||= []
220
+ retdict[:smileys][smile][:examples] += data[:examples]
221
+ end
222
+
223
+ elem[:clients].each do |client, data|
224
+ retdict[:clients][client] ||= { count: 0 }
225
+ retdict[:clients][client][:count] += data[:count]
226
+ retdict[:clients][client][:name] = data[:name]
227
+ retdict[:clients][client][:url] = data[:url]
228
+ end
229
+
230
+ elem[:times_of_day].each_with_index do |count, index|
231
+ retdict[:times_of_day][index] += elem[:times_of_day][index]
232
+ end
233
+ end
234
+
235
+ # take only one example
236
+ retdict[:mentions].each do |user, data|
237
+ retdict[:mentions][user][:example] = retdict[:mentions][user][:examples].sample
238
+ retdict[:mentions][user].delete(:examples)
239
+ end
240
+ retdict[:hashtags].each do |hashtag, data|
241
+ retdict[:hashtags][hashtag][:example] = retdict[:hashtags][hashtag][:examples].sample
242
+ retdict[:hashtags][hashtag].delete(:examples)
243
+ end
244
+ retdict[:smileys].each do |smile, data|
245
+ retdict[:smileys][smile][:example] = retdict[:smileys][smile][:examples].sample
246
+ retdict[:smileys][smile].delete(:examples)
247
+ end
248
+
249
+ retdict[:mentions] = retdict[:mentions].sort_by { |k, v| v[:count] }.reverse
250
+ retdict[:hashtags] = retdict[:hashtags].sort_by { |k, v| v[:count] }.reverse
251
+ retdict[:clients] = retdict[:clients].sort_by { |k, v| v[:count] }.reverse
252
+ retdict[:smileys] = retdict[:smileys].sort_by { |k, v| v[:count] }.reverse
253
+
254
+ retdict
255
+ end
256
+ end
257
+ end
258
+ end
259
+
260
+ # kate: indent-width 2
data/lib/empyrean.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'empyrean/defaults'
2
+
3
+ module Empyrean
4
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: empyrean
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - nilsding
8
+ - pixeldesu
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-06-05 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: With Empyrean, you can generate full stats of your Twitter account using
15
+ your Twitter archive.
16
+ email:
17
+ - nilsding@nilsding.org
18
+ - andy@pixelde.su
19
+ executables:
20
+ - empyrean
21
+ extensions: []
22
+ extra_rdoc_files: []
23
+ files:
24
+ - ".gitignore"
25
+ - Gemfile
26
+ - Gemfile.lock
27
+ - LICENSE
28
+ - README.md
29
+ - bin/empyrean
30
+ - config.yml.example
31
+ - empyrean.gemspec
32
+ - lib/empyrean.rb
33
+ - lib/empyrean/cli.rb
34
+ - lib/empyrean/configloader.rb
35
+ - lib/empyrean/defaults.rb
36
+ - lib/empyrean/optparser.rb
37
+ - lib/empyrean/templatelister.rb
38
+ - lib/empyrean/templaterenderer.rb
39
+ - lib/empyrean/templates/default.html.erb
40
+ - lib/empyrean/templates/pisg.html.erb
41
+ - lib/empyrean/tweetloader.rb
42
+ - lib/empyrean/tweetparser.rb
43
+ homepage: https://github.com/Leafcat/Empyrean
44
+ licenses:
45
+ - GPLv3
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project:
63
+ rubygems_version: 2.4.6
64
+ signing_key:
65
+ specification_version: 4
66
+ summary: Generates stats using your Twitter archive.
67
+ test_files: []
68
+ has_rdoc: