empyrean 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +13 -0
- data/LICENSE +674 -0
- data/README.md +25 -0
- data/bin/empyrean +22 -0
- data/config.yml.example +31 -0
- data/empyrean.gemspec +20 -0
- data/lib/empyrean/cli.rb +97 -0
- data/lib/empyrean/configloader.rb +105 -0
- data/lib/empyrean/defaults.rb +43 -0
- data/lib/empyrean/optparser.rb +151 -0
- data/lib/empyrean/templatelister.rb +35 -0
- data/lib/empyrean/templaterenderer.rb +192 -0
- data/lib/empyrean/templates/default.html.erb +237 -0
- data/lib/empyrean/templates/pisg.html.erb +441 -0
- data/lib/empyrean/tweetloader.rb +50 -0
- data/lib/empyrean/tweetparser.rb +260 -0
- data/lib/empyrean.rb +4 -0
- metadata +68 -0
@@ -0,0 +1,260 @@
|
|
1
|
+
# tweetparser.rb - parses tweets
|
2
|
+
#
|
3
|
+
# This file is part of Empyrean
|
4
|
+
# Copyright (C) 2015 nilsding, pixeldesu
|
5
|
+
#
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
require 'json'
|
20
|
+
require 'empyrean/defaults'
|
21
|
+
|
22
|
+
module Empyrean
|
23
|
+
class TweetParser
|
24
|
+
def initialize(options, config)
|
25
|
+
@options = options
|
26
|
+
@config = config
|
27
|
+
end
|
28
|
+
|
29
|
+
# Parses an array of tweets
|
30
|
+
#
|
31
|
+
# Returns a dict of things
|
32
|
+
def parse(tweets)
|
33
|
+
retdict = {
|
34
|
+
mentions: {},
|
35
|
+
hashtags: {},
|
36
|
+
clients: {},
|
37
|
+
smileys: {},
|
38
|
+
times_of_day: [0] * 24,
|
39
|
+
tweet_count: 0,
|
40
|
+
retweet_count: 0,
|
41
|
+
selftweet_count: 0,
|
42
|
+
}
|
43
|
+
tweets.each do |tweet|
|
44
|
+
parsed_tweet = self.parse_one tweet
|
45
|
+
|
46
|
+
if parsed_tweet[:retweet] # the tweet was a retweet
|
47
|
+
# increase retweeted tweets count
|
48
|
+
retdict[:retweet_count] += 1
|
49
|
+
else
|
50
|
+
parsed_tweet[:mentions].each do |user, data| # add mentions to the mentions dict
|
51
|
+
retdict[:mentions][user] ||= { count: 0 }
|
52
|
+
retdict[:mentions][user][:count] += data[:count]
|
53
|
+
retdict[:mentions][user][:name] ||= data[:name]
|
54
|
+
retdict[:mentions][user][:examples] ||= []
|
55
|
+
retdict[:mentions][user][:examples] << data[:example]
|
56
|
+
end
|
57
|
+
parsed_tweet[:hashtags].each do |hashtag, data| # add hashtags to the hashtags dict
|
58
|
+
retdict[:hashtags][hashtag] ||= { count: 0 }
|
59
|
+
retdict[:hashtags][hashtag][:count] += data[:count]
|
60
|
+
retdict[:hashtags][hashtag][:hashtag] ||= data[:hashtag]
|
61
|
+
retdict[:hashtags][hashtag][:examples] ||= []
|
62
|
+
retdict[:hashtags][hashtag][:examples] << data[:example]
|
63
|
+
end
|
64
|
+
|
65
|
+
parsed_tweet[:smileys].each do |smile, data|
|
66
|
+
retdict[:smileys][smile] ||= { count: 0 }
|
67
|
+
retdict[:smileys][smile][:frown] ||= data[:frown]
|
68
|
+
retdict[:smileys][smile][:count] += data[:count]
|
69
|
+
retdict[:smileys][smile][:smiley] ||= data[:smiley]
|
70
|
+
retdict[:smileys][smile][:examples] ||= []
|
71
|
+
retdict[:smileys][smile][:examples] << data[:example]
|
72
|
+
end
|
73
|
+
|
74
|
+
# increase self tweeted tweets count
|
75
|
+
retdict[:selftweet_count] += 1
|
76
|
+
end
|
77
|
+
|
78
|
+
# add client to the clients dict
|
79
|
+
client_dict = parsed_tweet[:client][:name]
|
80
|
+
retdict[:clients][client_dict] ||= { count: 0 }
|
81
|
+
retdict[:clients][client_dict][:count] += 1
|
82
|
+
retdict[:clients][client_dict][:name] = parsed_tweet[:client][:name]
|
83
|
+
retdict[:clients][client_dict][:url] = parsed_tweet[:client][:url]
|
84
|
+
|
85
|
+
retdict[:times_of_day][parsed_tweet[:time_of_day]] += 1
|
86
|
+
|
87
|
+
# increase tweet count
|
88
|
+
retdict[:tweet_count] += 1
|
89
|
+
end
|
90
|
+
|
91
|
+
retdict
|
92
|
+
end
|
93
|
+
|
94
|
+
# Parses a single tweet object
|
95
|
+
#
|
96
|
+
# Returns a dict of things.
|
97
|
+
def parse_one(tweet)
|
98
|
+
puts "==> #{tweet['id']}" if @options.verbose
|
99
|
+
retdict = {
|
100
|
+
mentions: {},
|
101
|
+
hashtags: {},
|
102
|
+
time_of_day: 0,
|
103
|
+
retweet: false,
|
104
|
+
client: {
|
105
|
+
name: "",
|
106
|
+
url: "",
|
107
|
+
},
|
108
|
+
smileys: {}
|
109
|
+
}
|
110
|
+
|
111
|
+
# check if the tweet is actually a retweet and ignore the status text
|
112
|
+
unless tweet['retweeted_status'].nil?
|
113
|
+
retdict[:retweet] = true
|
114
|
+
else
|
115
|
+
# scan for mentions
|
116
|
+
tweet['text'].scan USERNAME_REGEX do |user|
|
117
|
+
hash_user = user[0].downcase
|
118
|
+
puts "===> mentioned: #{user[0]}" if @options.verbose
|
119
|
+
unless @config[:ignored_users].include? hash_user
|
120
|
+
if @config[:renamed_users].include? hash_user.to_sym
|
121
|
+
hash_user = @config[:renamed_users][hash_user.to_sym]
|
122
|
+
end
|
123
|
+
retdict[:mentions][hash_user] ||= {}
|
124
|
+
retdict[:mentions][hash_user][:name] ||= user[0]
|
125
|
+
retdict[:mentions][hash_user][:count] = retdict[:mentions][hash_user][:count].to_i.succ
|
126
|
+
retdict[:mentions][hash_user][:example] ||= { text: tweet['text'], id: tweet['id'] }
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# scan for hashtags
|
131
|
+
tweet['text'].scan HASHTAG_REGEX do |hashtag|
|
132
|
+
hash_hashtag = hashtag[0].downcase
|
133
|
+
puts "===> hashtag: ##{hashtag[0]}" if @options.verbose
|
134
|
+
retdict[:hashtags][hash_hashtag] ||= {}
|
135
|
+
retdict[:hashtags][hash_hashtag][:hashtag] ||= hashtag[0]
|
136
|
+
retdict[:hashtags][hash_hashtag][:count] = retdict[:hashtags][hash_hashtag][:count].to_i.succ
|
137
|
+
retdict[:hashtags][hash_hashtag][:example] ||= { text: tweet['text'], id: tweet['id'] }
|
138
|
+
end
|
139
|
+
|
140
|
+
# Smileys :^)
|
141
|
+
eyes = "[xX8;:=%]"
|
142
|
+
nose = "[-oc*^]"
|
143
|
+
smile_regex = /(>?#{eyes}'?#{nose}[\)pPD\}\]>]|[\(\{\[<]#{nose}'?#{eyes}<?|[;:][\)pPD\}\]\>]|\([;:]|\^[_o-]*\^[';]|\\[o.]\/)/
|
144
|
+
frown_regex = /(#{eyes}'?#{nose}[\(\[\\\/\{|]|[\)\]\\\/\}|]#{nose}'?#{eyes}|[;:][\(\/]|[\)D]:|;_+;|T_+T|-[._]+-)/
|
145
|
+
|
146
|
+
unescaped_tweet = tweet['text'].gsub("&", "&").gsub("<", "<").gsub(">", ">")
|
147
|
+
|
148
|
+
unescaped_tweet.scan smile_regex do |smile|
|
149
|
+
smile = smile[0]
|
150
|
+
puts "===> smile: #{smile}" if @options.verbose
|
151
|
+
retdict[:smileys][smile] ||= {frown: false}
|
152
|
+
retdict[:smileys][smile][:smiley] ||= smile
|
153
|
+
retdict[:smileys][smile][:count] = retdict[:smileys][smile][:count].to_i.succ
|
154
|
+
retdict[:smileys][smile][:example] ||= { text: tweet['text'], id: tweet['id'] }
|
155
|
+
end
|
156
|
+
|
157
|
+
unescaped_tweet.scan frown_regex do |frown|
|
158
|
+
break unless unescaped_tweet !~ /\w+:\/\// # http:// :^)
|
159
|
+
frown = frown[0]
|
160
|
+
puts "===> frown: #{frown}" if @options.verbose
|
161
|
+
retdict[:smileys][frown] ||= {frown: true}
|
162
|
+
retdict[:smileys][frown][:smiley] ||= frown
|
163
|
+
retdict[:smileys][frown][:count] = retdict[:smileys][frown][:count].to_i.succ
|
164
|
+
retdict[:smileys][frown][:example] ||= { text: tweet['text'], id: tweet['id'] }
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# Tweet source (aka. the client the (re)tweet was made with)
|
169
|
+
source_matches = tweet['source'].match SOURCE_REGEX
|
170
|
+
retdict[:client][:url] = source_matches[1]
|
171
|
+
retdict[:client][:name] = source_matches[2]
|
172
|
+
|
173
|
+
# Time of day
|
174
|
+
retdict[:time_of_day] = (tweet['created_at'].match(/^\d{4}-\d{2}-\d{2} (\d{2})/)[1].to_i + @config[:timezone_difference]) % 24
|
175
|
+
|
176
|
+
retdict
|
177
|
+
end
|
178
|
+
|
179
|
+
class << self
|
180
|
+
# Merges an array which contains dicts returned by self.parse()
|
181
|
+
# Increases all counters.
|
182
|
+
def merge_parsed(parsed)
|
183
|
+
retdict = {
|
184
|
+
mentions: {},
|
185
|
+
hashtags: {},
|
186
|
+
clients: {},
|
187
|
+
smileys: {},
|
188
|
+
times_of_day: [0] * 24,
|
189
|
+
tweet_count: 0,
|
190
|
+
retweet_count: 0,
|
191
|
+
selftweet_count: 0,
|
192
|
+
}
|
193
|
+
parsed.each do |elem|
|
194
|
+
retdict[:tweet_count] += elem[:tweet_count]
|
195
|
+
retdict[:retweet_count] += elem[:retweet_count]
|
196
|
+
retdict[:selftweet_count] += elem[:selftweet_count]
|
197
|
+
|
198
|
+
elem[:mentions].each do |user, data|
|
199
|
+
retdict[:mentions][user] ||= { count: 0 }
|
200
|
+
retdict[:mentions][user][:count] += data[:count]
|
201
|
+
retdict[:mentions][user][:name] = data[:name]
|
202
|
+
retdict[:mentions][user][:examples] ||= []
|
203
|
+
retdict[:mentions][user][:examples] += data[:examples]
|
204
|
+
end
|
205
|
+
|
206
|
+
elem[:hashtags].each do |hashtag, data|
|
207
|
+
retdict[:hashtags][hashtag] ||= { count: 0 }
|
208
|
+
retdict[:hashtags][hashtag][:count] += data[:count]
|
209
|
+
retdict[:hashtags][hashtag][:hashtag] = data[:hashtag]
|
210
|
+
retdict[:hashtags][hashtag][:examples] ||= []
|
211
|
+
retdict[:hashtags][hashtag][:examples] += data[:examples]
|
212
|
+
end
|
213
|
+
|
214
|
+
elem[:smileys].each do |smile, data|
|
215
|
+
retdict[:smileys][smile] ||= { count: 0 }
|
216
|
+
retdict[:smileys][smile][:frown] ||= data[:frown]
|
217
|
+
retdict[:smileys][smile][:count] += data[:count]
|
218
|
+
retdict[:smileys][smile][:smiley] ||= data[:smiley]
|
219
|
+
retdict[:smileys][smile][:examples] ||= []
|
220
|
+
retdict[:smileys][smile][:examples] += data[:examples]
|
221
|
+
end
|
222
|
+
|
223
|
+
elem[:clients].each do |client, data|
|
224
|
+
retdict[:clients][client] ||= { count: 0 }
|
225
|
+
retdict[:clients][client][:count] += data[:count]
|
226
|
+
retdict[:clients][client][:name] = data[:name]
|
227
|
+
retdict[:clients][client][:url] = data[:url]
|
228
|
+
end
|
229
|
+
|
230
|
+
elem[:times_of_day].each_with_index do |count, index|
|
231
|
+
retdict[:times_of_day][index] += elem[:times_of_day][index]
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# take only one example
|
236
|
+
retdict[:mentions].each do |user, data|
|
237
|
+
retdict[:mentions][user][:example] = retdict[:mentions][user][:examples].sample
|
238
|
+
retdict[:mentions][user].delete(:examples)
|
239
|
+
end
|
240
|
+
retdict[:hashtags].each do |hashtag, data|
|
241
|
+
retdict[:hashtags][hashtag][:example] = retdict[:hashtags][hashtag][:examples].sample
|
242
|
+
retdict[:hashtags][hashtag].delete(:examples)
|
243
|
+
end
|
244
|
+
retdict[:smileys].each do |smile, data|
|
245
|
+
retdict[:smileys][smile][:example] = retdict[:smileys][smile][:examples].sample
|
246
|
+
retdict[:smileys][smile].delete(:examples)
|
247
|
+
end
|
248
|
+
|
249
|
+
retdict[:mentions] = retdict[:mentions].sort_by { |k, v| v[:count] }.reverse
|
250
|
+
retdict[:hashtags] = retdict[:hashtags].sort_by { |k, v| v[:count] }.reverse
|
251
|
+
retdict[:clients] = retdict[:clients].sort_by { |k, v| v[:count] }.reverse
|
252
|
+
retdict[:smileys] = retdict[:smileys].sort_by { |k, v| v[:count] }.reverse
|
253
|
+
|
254
|
+
retdict
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
# kate: indent-width 2
|
data/lib/empyrean.rb
ADDED
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: empyrean
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- nilsding
|
8
|
+
- pixeldesu
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-06-05 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: With Empyrean, you can generate full stats of your Twitter account using
|
15
|
+
your Twitter archive.
|
16
|
+
email:
|
17
|
+
- nilsding@nilsding.org
|
18
|
+
- andy@pixelde.su
|
19
|
+
executables:
|
20
|
+
- empyrean
|
21
|
+
extensions: []
|
22
|
+
extra_rdoc_files: []
|
23
|
+
files:
|
24
|
+
- ".gitignore"
|
25
|
+
- Gemfile
|
26
|
+
- Gemfile.lock
|
27
|
+
- LICENSE
|
28
|
+
- README.md
|
29
|
+
- bin/empyrean
|
30
|
+
- config.yml.example
|
31
|
+
- empyrean.gemspec
|
32
|
+
- lib/empyrean.rb
|
33
|
+
- lib/empyrean/cli.rb
|
34
|
+
- lib/empyrean/configloader.rb
|
35
|
+
- lib/empyrean/defaults.rb
|
36
|
+
- lib/empyrean/optparser.rb
|
37
|
+
- lib/empyrean/templatelister.rb
|
38
|
+
- lib/empyrean/templaterenderer.rb
|
39
|
+
- lib/empyrean/templates/default.html.erb
|
40
|
+
- lib/empyrean/templates/pisg.html.erb
|
41
|
+
- lib/empyrean/tweetloader.rb
|
42
|
+
- lib/empyrean/tweetparser.rb
|
43
|
+
homepage: https://github.com/Leafcat/Empyrean
|
44
|
+
licenses:
|
45
|
+
- GPLv3
|
46
|
+
metadata: {}
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
requirements: []
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 2.4.6
|
64
|
+
signing_key:
|
65
|
+
specification_version: 4
|
66
|
+
summary: Generates stats using your Twitter archive.
|
67
|
+
test_files: []
|
68
|
+
has_rdoc:
|