empyrean 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +13 -0
- data/LICENSE +674 -0
- data/README.md +25 -0
- data/bin/empyrean +22 -0
- data/config.yml.example +31 -0
- data/empyrean.gemspec +20 -0
- data/lib/empyrean/cli.rb +97 -0
- data/lib/empyrean/configloader.rb +105 -0
- data/lib/empyrean/defaults.rb +43 -0
- data/lib/empyrean/optparser.rb +151 -0
- data/lib/empyrean/templatelister.rb +35 -0
- data/lib/empyrean/templaterenderer.rb +192 -0
- data/lib/empyrean/templates/default.html.erb +237 -0
- data/lib/empyrean/templates/pisg.html.erb +441 -0
- data/lib/empyrean/tweetloader.rb +50 -0
- data/lib/empyrean/tweetparser.rb +260 -0
- data/lib/empyrean.rb +4 -0
- metadata +68 -0
@@ -0,0 +1,260 @@
|
|
1
|
+
# tweetparser.rb - parses tweets
|
2
|
+
#
|
3
|
+
# This file is part of Empyrean
|
4
|
+
# Copyright (C) 2015 nilsding, pixeldesu
|
5
|
+
#
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
require 'json'
|
20
|
+
require 'empyrean/defaults'
|
21
|
+
|
22
|
+
module Empyrean
|
23
|
+
class TweetParser
|
24
|
+
def initialize(options, config)
|
25
|
+
@options = options
|
26
|
+
@config = config
|
27
|
+
end
|
28
|
+
|
29
|
+
# Parses an array of tweets
|
30
|
+
#
|
31
|
+
# Returns a dict of things
|
32
|
+
def parse(tweets)
|
33
|
+
retdict = {
|
34
|
+
mentions: {},
|
35
|
+
hashtags: {},
|
36
|
+
clients: {},
|
37
|
+
smileys: {},
|
38
|
+
times_of_day: [0] * 24,
|
39
|
+
tweet_count: 0,
|
40
|
+
retweet_count: 0,
|
41
|
+
selftweet_count: 0,
|
42
|
+
}
|
43
|
+
tweets.each do |tweet|
|
44
|
+
parsed_tweet = self.parse_one tweet
|
45
|
+
|
46
|
+
if parsed_tweet[:retweet] # the tweet was a retweet
|
47
|
+
# increase retweeted tweets count
|
48
|
+
retdict[:retweet_count] += 1
|
49
|
+
else
|
50
|
+
parsed_tweet[:mentions].each do |user, data| # add mentions to the mentions dict
|
51
|
+
retdict[:mentions][user] ||= { count: 0 }
|
52
|
+
retdict[:mentions][user][:count] += data[:count]
|
53
|
+
retdict[:mentions][user][:name] ||= data[:name]
|
54
|
+
retdict[:mentions][user][:examples] ||= []
|
55
|
+
retdict[:mentions][user][:examples] << data[:example]
|
56
|
+
end
|
57
|
+
parsed_tweet[:hashtags].each do |hashtag, data| # add hashtags to the hashtags dict
|
58
|
+
retdict[:hashtags][hashtag] ||= { count: 0 }
|
59
|
+
retdict[:hashtags][hashtag][:count] += data[:count]
|
60
|
+
retdict[:hashtags][hashtag][:hashtag] ||= data[:hashtag]
|
61
|
+
retdict[:hashtags][hashtag][:examples] ||= []
|
62
|
+
retdict[:hashtags][hashtag][:examples] << data[:example]
|
63
|
+
end
|
64
|
+
|
65
|
+
parsed_tweet[:smileys].each do |smile, data|
|
66
|
+
retdict[:smileys][smile] ||= { count: 0 }
|
67
|
+
retdict[:smileys][smile][:frown] ||= data[:frown]
|
68
|
+
retdict[:smileys][smile][:count] += data[:count]
|
69
|
+
retdict[:smileys][smile][:smiley] ||= data[:smiley]
|
70
|
+
retdict[:smileys][smile][:examples] ||= []
|
71
|
+
retdict[:smileys][smile][:examples] << data[:example]
|
72
|
+
end
|
73
|
+
|
74
|
+
# increase self tweeted tweets count
|
75
|
+
retdict[:selftweet_count] += 1
|
76
|
+
end
|
77
|
+
|
78
|
+
# add client to the clients dict
|
79
|
+
client_dict = parsed_tweet[:client][:name]
|
80
|
+
retdict[:clients][client_dict] ||= { count: 0 }
|
81
|
+
retdict[:clients][client_dict][:count] += 1
|
82
|
+
retdict[:clients][client_dict][:name] = parsed_tweet[:client][:name]
|
83
|
+
retdict[:clients][client_dict][:url] = parsed_tweet[:client][:url]
|
84
|
+
|
85
|
+
retdict[:times_of_day][parsed_tweet[:time_of_day]] += 1
|
86
|
+
|
87
|
+
# increase tweet count
|
88
|
+
retdict[:tweet_count] += 1
|
89
|
+
end
|
90
|
+
|
91
|
+
retdict
|
92
|
+
end
|
93
|
+
|
94
|
+
# Parses a single tweet object
|
95
|
+
#
|
96
|
+
# Returns a dict of things.
|
97
|
+
def parse_one(tweet)
|
98
|
+
puts "==> #{tweet['id']}" if @options.verbose
|
99
|
+
retdict = {
|
100
|
+
mentions: {},
|
101
|
+
hashtags: {},
|
102
|
+
time_of_day: 0,
|
103
|
+
retweet: false,
|
104
|
+
client: {
|
105
|
+
name: "",
|
106
|
+
url: "",
|
107
|
+
},
|
108
|
+
smileys: {}
|
109
|
+
}
|
110
|
+
|
111
|
+
# check if the tweet is actually a retweet and ignore the status text
|
112
|
+
unless tweet['retweeted_status'].nil?
|
113
|
+
retdict[:retweet] = true
|
114
|
+
else
|
115
|
+
# scan for mentions
|
116
|
+
tweet['text'].scan USERNAME_REGEX do |user|
|
117
|
+
hash_user = user[0].downcase
|
118
|
+
puts "===> mentioned: #{user[0]}" if @options.verbose
|
119
|
+
unless @config[:ignored_users].include? hash_user
|
120
|
+
if @config[:renamed_users].include? hash_user.to_sym
|
121
|
+
hash_user = @config[:renamed_users][hash_user.to_sym]
|
122
|
+
end
|
123
|
+
retdict[:mentions][hash_user] ||= {}
|
124
|
+
retdict[:mentions][hash_user][:name] ||= user[0]
|
125
|
+
retdict[:mentions][hash_user][:count] = retdict[:mentions][hash_user][:count].to_i.succ
|
126
|
+
retdict[:mentions][hash_user][:example] ||= { text: tweet['text'], id: tweet['id'] }
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# scan for hashtags
|
131
|
+
tweet['text'].scan HASHTAG_REGEX do |hashtag|
|
132
|
+
hash_hashtag = hashtag[0].downcase
|
133
|
+
puts "===> hashtag: ##{hashtag[0]}" if @options.verbose
|
134
|
+
retdict[:hashtags][hash_hashtag] ||= {}
|
135
|
+
retdict[:hashtags][hash_hashtag][:hashtag] ||= hashtag[0]
|
136
|
+
retdict[:hashtags][hash_hashtag][:count] = retdict[:hashtags][hash_hashtag][:count].to_i.succ
|
137
|
+
retdict[:hashtags][hash_hashtag][:example] ||= { text: tweet['text'], id: tweet['id'] }
|
138
|
+
end
|
139
|
+
|
140
|
+
# Smileys :^)
|
141
|
+
eyes = "[xX8;:=%]"
|
142
|
+
nose = "[-oc*^]"
|
143
|
+
smile_regex = /(>?#{eyes}'?#{nose}[\)pPD\}\]>]|[\(\{\[<]#{nose}'?#{eyes}<?|[;:][\)pPD\}\]\>]|\([;:]|\^[_o-]*\^[';]|\\[o.]\/)/
|
144
|
+
frown_regex = /(#{eyes}'?#{nose}[\(\[\\\/\{|]|[\)\]\\\/\}|]#{nose}'?#{eyes}|[;:][\(\/]|[\)D]:|;_+;|T_+T|-[._]+-)/
|
145
|
+
|
146
|
+
unescaped_tweet = tweet['text'].gsub("&", "&").gsub("<", "<").gsub(">", ">")
|
147
|
+
|
148
|
+
unescaped_tweet.scan smile_regex do |smile|
|
149
|
+
smile = smile[0]
|
150
|
+
puts "===> smile: #{smile}" if @options.verbose
|
151
|
+
retdict[:smileys][smile] ||= {frown: false}
|
152
|
+
retdict[:smileys][smile][:smiley] ||= smile
|
153
|
+
retdict[:smileys][smile][:count] = retdict[:smileys][smile][:count].to_i.succ
|
154
|
+
retdict[:smileys][smile][:example] ||= { text: tweet['text'], id: tweet['id'] }
|
155
|
+
end
|
156
|
+
|
157
|
+
unescaped_tweet.scan frown_regex do |frown|
|
158
|
+
break unless unescaped_tweet !~ /\w+:\/\// # http:// :^)
|
159
|
+
frown = frown[0]
|
160
|
+
puts "===> frown: #{frown}" if @options.verbose
|
161
|
+
retdict[:smileys][frown] ||= {frown: true}
|
162
|
+
retdict[:smileys][frown][:smiley] ||= frown
|
163
|
+
retdict[:smileys][frown][:count] = retdict[:smileys][frown][:count].to_i.succ
|
164
|
+
retdict[:smileys][frown][:example] ||= { text: tweet['text'], id: tweet['id'] }
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# Tweet source (aka. the client the (re)tweet was made with)
|
169
|
+
source_matches = tweet['source'].match SOURCE_REGEX
|
170
|
+
retdict[:client][:url] = source_matches[1]
|
171
|
+
retdict[:client][:name] = source_matches[2]
|
172
|
+
|
173
|
+
# Time of day
|
174
|
+
retdict[:time_of_day] = (tweet['created_at'].match(/^\d{4}-\d{2}-\d{2} (\d{2})/)[1].to_i + @config[:timezone_difference]) % 24
|
175
|
+
|
176
|
+
retdict
|
177
|
+
end
|
178
|
+
|
179
|
+
class << self
|
180
|
+
# Merges an array which contains dicts returned by self.parse()
|
181
|
+
# Increases all counters.
|
182
|
+
def merge_parsed(parsed)
|
183
|
+
retdict = {
|
184
|
+
mentions: {},
|
185
|
+
hashtags: {},
|
186
|
+
clients: {},
|
187
|
+
smileys: {},
|
188
|
+
times_of_day: [0] * 24,
|
189
|
+
tweet_count: 0,
|
190
|
+
retweet_count: 0,
|
191
|
+
selftweet_count: 0,
|
192
|
+
}
|
193
|
+
parsed.each do |elem|
|
194
|
+
retdict[:tweet_count] += elem[:tweet_count]
|
195
|
+
retdict[:retweet_count] += elem[:retweet_count]
|
196
|
+
retdict[:selftweet_count] += elem[:selftweet_count]
|
197
|
+
|
198
|
+
elem[:mentions].each do |user, data|
|
199
|
+
retdict[:mentions][user] ||= { count: 0 }
|
200
|
+
retdict[:mentions][user][:count] += data[:count]
|
201
|
+
retdict[:mentions][user][:name] = data[:name]
|
202
|
+
retdict[:mentions][user][:examples] ||= []
|
203
|
+
retdict[:mentions][user][:examples] += data[:examples]
|
204
|
+
end
|
205
|
+
|
206
|
+
elem[:hashtags].each do |hashtag, data|
|
207
|
+
retdict[:hashtags][hashtag] ||= { count: 0 }
|
208
|
+
retdict[:hashtags][hashtag][:count] += data[:count]
|
209
|
+
retdict[:hashtags][hashtag][:hashtag] = data[:hashtag]
|
210
|
+
retdict[:hashtags][hashtag][:examples] ||= []
|
211
|
+
retdict[:hashtags][hashtag][:examples] += data[:examples]
|
212
|
+
end
|
213
|
+
|
214
|
+
elem[:smileys].each do |smile, data|
|
215
|
+
retdict[:smileys][smile] ||= { count: 0 }
|
216
|
+
retdict[:smileys][smile][:frown] ||= data[:frown]
|
217
|
+
retdict[:smileys][smile][:count] += data[:count]
|
218
|
+
retdict[:smileys][smile][:smiley] ||= data[:smiley]
|
219
|
+
retdict[:smileys][smile][:examples] ||= []
|
220
|
+
retdict[:smileys][smile][:examples] += data[:examples]
|
221
|
+
end
|
222
|
+
|
223
|
+
elem[:clients].each do |client, data|
|
224
|
+
retdict[:clients][client] ||= { count: 0 }
|
225
|
+
retdict[:clients][client][:count] += data[:count]
|
226
|
+
retdict[:clients][client][:name] = data[:name]
|
227
|
+
retdict[:clients][client][:url] = data[:url]
|
228
|
+
end
|
229
|
+
|
230
|
+
elem[:times_of_day].each_with_index do |count, index|
|
231
|
+
retdict[:times_of_day][index] += elem[:times_of_day][index]
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# take only one example
|
236
|
+
retdict[:mentions].each do |user, data|
|
237
|
+
retdict[:mentions][user][:example] = retdict[:mentions][user][:examples].sample
|
238
|
+
retdict[:mentions][user].delete(:examples)
|
239
|
+
end
|
240
|
+
retdict[:hashtags].each do |hashtag, data|
|
241
|
+
retdict[:hashtags][hashtag][:example] = retdict[:hashtags][hashtag][:examples].sample
|
242
|
+
retdict[:hashtags][hashtag].delete(:examples)
|
243
|
+
end
|
244
|
+
retdict[:smileys].each do |smile, data|
|
245
|
+
retdict[:smileys][smile][:example] = retdict[:smileys][smile][:examples].sample
|
246
|
+
retdict[:smileys][smile].delete(:examples)
|
247
|
+
end
|
248
|
+
|
249
|
+
retdict[:mentions] = retdict[:mentions].sort_by { |k, v| v[:count] }.reverse
|
250
|
+
retdict[:hashtags] = retdict[:hashtags].sort_by { |k, v| v[:count] }.reverse
|
251
|
+
retdict[:clients] = retdict[:clients].sort_by { |k, v| v[:count] }.reverse
|
252
|
+
retdict[:smileys] = retdict[:smileys].sort_by { |k, v| v[:count] }.reverse
|
253
|
+
|
254
|
+
retdict
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
# kate: indent-width 2
|
data/lib/empyrean.rb
ADDED
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: empyrean
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- nilsding
|
8
|
+
- pixeldesu
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-06-05 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: With Empyrean, you can generate full stats of your Twitter account using
|
15
|
+
your Twitter archive.
|
16
|
+
email:
|
17
|
+
- nilsding@nilsding.org
|
18
|
+
- andy@pixelde.su
|
19
|
+
executables:
|
20
|
+
- empyrean
|
21
|
+
extensions: []
|
22
|
+
extra_rdoc_files: []
|
23
|
+
files:
|
24
|
+
- ".gitignore"
|
25
|
+
- Gemfile
|
26
|
+
- Gemfile.lock
|
27
|
+
- LICENSE
|
28
|
+
- README.md
|
29
|
+
- bin/empyrean
|
30
|
+
- config.yml.example
|
31
|
+
- empyrean.gemspec
|
32
|
+
- lib/empyrean.rb
|
33
|
+
- lib/empyrean/cli.rb
|
34
|
+
- lib/empyrean/configloader.rb
|
35
|
+
- lib/empyrean/defaults.rb
|
36
|
+
- lib/empyrean/optparser.rb
|
37
|
+
- lib/empyrean/templatelister.rb
|
38
|
+
- lib/empyrean/templaterenderer.rb
|
39
|
+
- lib/empyrean/templates/default.html.erb
|
40
|
+
- lib/empyrean/templates/pisg.html.erb
|
41
|
+
- lib/empyrean/tweetloader.rb
|
42
|
+
- lib/empyrean/tweetparser.rb
|
43
|
+
homepage: https://github.com/Leafcat/Empyrean
|
44
|
+
licenses:
|
45
|
+
- GPLv3
|
46
|
+
metadata: {}
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
requirements: []
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 2.4.6
|
64
|
+
signing_key:
|
65
|
+
specification_version: 4
|
66
|
+
summary: Generates stats using your Twitter archive.
|
67
|
+
test_files: []
|
68
|
+
has_rdoc:
|