akane 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +27 -0
- data/Rakefile +6 -0
- data/akane.gemspec +28 -0
- data/bin/akane +6 -0
- data/lib/akane.rb +5 -0
- data/lib/akane/cli.rb +83 -0
- data/lib/akane/config.rb +25 -0
- data/lib/akane/manager.rb +97 -0
- data/lib/akane/receivers/abstract_receiver.rb +47 -0
- data/lib/akane/receivers/stream.rb +63 -0
- data/lib/akane/recorder.rb +104 -0
- data/lib/akane/storages/abstract_storage.rb +26 -0
- data/lib/akane/storages/elasticsearch.rb +242 -0
- data/lib/akane/storages/file.rb +142 -0
- data/lib/akane/storages/mock.rb +51 -0
- data/lib/akane/storages/stdout.rb +23 -0
- data/lib/akane/version.rb +3 -0
- data/spec/config_spec.rb +19 -0
- data/spec/manager_spec.rb +107 -0
- data/spec/receivers/abstract_receiver_spec.rb +48 -0
- data/spec/receivers/stream_spec.rb +105 -0
- data/spec/recorder_spec.rb +86 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/storages/abstract_storage_spec.rb +20 -0
- data/spec/storages/mock_spec.rb +59 -0
- data/spec/support/mock_tweetstream.rb +86 -0
- metadata +169 -0
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
module Akane
|
4
|
+
class Recorder
|
5
|
+
def initialize(storages, logger: Logger.new(nil))
|
6
|
+
@storages = storages
|
7
|
+
@logger = logger
|
8
|
+
@queue = Queue.new
|
9
|
+
@recently_performed = RoundrobinFlags.new(1000)
|
10
|
+
end
|
11
|
+
|
12
|
+
def queue_length
|
13
|
+
@queue.size
|
14
|
+
end
|
15
|
+
|
16
|
+
def record_tweet(account, tweet)
|
17
|
+
@queue << [:record_tweet, account, tweet]
|
18
|
+
self
|
19
|
+
end
|
20
|
+
|
21
|
+
def mark_as_deleted(account, user_id, tweet_id)
|
22
|
+
@queue << [:mark_as_deleted, account, user_id, tweet_id]
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def record_message(account, message)
|
27
|
+
@queue << [:record_message, account, message]
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
def record_event(account, event)
|
32
|
+
@queue << [:record_event, account, event]
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def dequeue(raise_errors = false)
|
37
|
+
perform(*@queue.pop, raise_errors: raise_errors)
|
38
|
+
end
|
39
|
+
|
40
|
+
def perform(action, account, *payload, raise_errors: false)
|
41
|
+
if action == :record_tweet
|
42
|
+
return if @recently_performed[payload.last["id"]]
|
43
|
+
@recently_performed.flag!(payload.last["id"])
|
44
|
+
|
45
|
+
if payload.last["retweeted_status"]
|
46
|
+
perform(:record_tweet, account, payload.last["retweeted_status"], raise_errors: raise_errors)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
@storages.each do |storage|
|
51
|
+
begin
|
52
|
+
storage.__send__(action, account, *payload)
|
53
|
+
rescue Exception => e
|
54
|
+
raise e if e === Interrupt
|
55
|
+
raise e if raise_errors
|
56
|
+
@logger.error "Error while recorder performing to #{storage.inspect}: #{e.inspect}"
|
57
|
+
@logger.error e.backtrace
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def run(raise_errors = false)
|
63
|
+
loop do
|
64
|
+
begin
|
65
|
+
self.dequeue(raise_errors)
|
66
|
+
rescue Exception => e
|
67
|
+
raise e if e === Interrupt
|
68
|
+
raise e if raise_errors
|
69
|
+
@logger.error "Error while recorder dequing: #{e.inspect}"
|
70
|
+
@logger.error e.backtrace
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class RoundrobinFlags
|
76
|
+
def initialize(size)
|
77
|
+
@hash = {}
|
78
|
+
@limit_size = size
|
79
|
+
end
|
80
|
+
|
81
|
+
def [](k)
|
82
|
+
@hash[k]
|
83
|
+
end
|
84
|
+
|
85
|
+
def flag!(k)
|
86
|
+
@hash[k] = true
|
87
|
+
if @limit_size < @hash.size
|
88
|
+
@hash.keys.first(@hash.size-@limit_size).each do |overflowed_key|
|
89
|
+
@hash.delete overflowed_key
|
90
|
+
end
|
91
|
+
end
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
|
95
|
+
def unflag!(k)
|
96
|
+
@hash.delete k
|
97
|
+
end
|
98
|
+
|
99
|
+
def flags
|
100
|
+
@hash.keys
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Akane
|
2
|
+
module Storages
|
3
|
+
class AbstractStorage
|
4
|
+
def initialize(config: raise(ArgumentError, 'missing config'), logger: Logger.new($stdout))
|
5
|
+
@config = config
|
6
|
+
@logger = logger
|
7
|
+
end
|
8
|
+
|
9
|
+
def record_tweet(account, tweet)
|
10
|
+
raise NotImplementedError
|
11
|
+
end
|
12
|
+
|
13
|
+
def mark_as_deleted(account, user_id, tweet_id)
|
14
|
+
raise NotImplementedError
|
15
|
+
end
|
16
|
+
|
17
|
+
def record_event(account, event)
|
18
|
+
raise NotImplementedError
|
19
|
+
end
|
20
|
+
|
21
|
+
def record_message(account, message)
|
22
|
+
raise NotImplementedError
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
require 'akane/storages/abstract_storage'
|
2
|
+
require 'elasticsearch'
|
3
|
+
|
4
|
+
module Akane
|
5
|
+
module Storages
|
6
|
+
class Elasticsearch < AbstractStorage
|
7
|
+
def initialize(*)
|
8
|
+
super
|
9
|
+
|
10
|
+
@es = ::Elasticsearch::Client.new(
|
11
|
+
hosts: [@config["host"]],
|
12
|
+
logger: @config["enable_es_log"] ? @logger : nil
|
13
|
+
)
|
14
|
+
@index_name = @config["index"] || 'akane'
|
15
|
+
set_elasticsearch_up
|
16
|
+
end
|
17
|
+
|
18
|
+
def record_tweet(account, tweet)
|
19
|
+
tweet_hash = tweet.attrs
|
20
|
+
tweet_hash[:deleted] = false
|
21
|
+
@es.index(index: @index_name, type: 'tweet', id: tweet_hash[:id_str], body: tweet_hash)
|
22
|
+
end
|
23
|
+
|
24
|
+
def mark_as_deleted(account, user_id, tweet_id)
|
25
|
+
tweet = @es.get(index: @index_name, type: 'tweet', id: tweet_id.to_s)['_source']
|
26
|
+
tweet['deleted'] = true
|
27
|
+
@es.index(index: @index_name, type: 'tweet', id: tweet_id.to_s, body: tweet)
|
28
|
+
minimum_tweet = {
|
29
|
+
id: tweet['id'],
|
30
|
+
id_str: tweet['id_str'],
|
31
|
+
text: tweet['text'],
|
32
|
+
user: {
|
33
|
+
id: tweet['user']['id'],
|
34
|
+
id_str: tweet['user']['id_str'],
|
35
|
+
screen_name: tweet['user']['screen_name'],
|
36
|
+
}
|
37
|
+
}
|
38
|
+
@es.index(index: @index_name, type: 'deleted_tweet', id: tweet_id.to_s, body: {tweet: minimum_tweet, deleted_at: Time.now.strftime('%Y-%m-%d %H:%M:%S %z')})
|
39
|
+
rescue ::Elasticsearch::Transport::Transport::Errors::NotFound => e
|
40
|
+
@logger.debug "Due to 404, skipping Deletion for #{tweet_id}"
|
41
|
+
# do nothing
|
42
|
+
end
|
43
|
+
|
44
|
+
def record_event(account, event)
|
45
|
+
case event["event"]
|
46
|
+
when 'favorite'
|
47
|
+
when 'unfavorite'
|
48
|
+
when 'block'
|
49
|
+
when 'unblock'
|
50
|
+
when 'follow'
|
51
|
+
when 'unfollow'
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def record_message(account, message)
|
56
|
+
@es.index(index: @index_name, type: 'message', id: message[:id_str], body: message.attrs)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def set_elasticsearch_up
|
62
|
+
begin
|
63
|
+
@es.indices.get_mapping(index: @index_name)
|
64
|
+
rescue ::Elasticsearch::Transport::Transport::Errors::NotFound => e
|
65
|
+
raise e unless /IndexMissingException/ === e.message
|
66
|
+
|
67
|
+
@logger.info 'elasticsearch.setup: creating index'
|
68
|
+
|
69
|
+
date_format = "EE MMM d HH:mm:ss Z yyyy"
|
70
|
+
user_properties = {
|
71
|
+
notifications: {type: 'boolean', store: 'no', index: 'no'},
|
72
|
+
follow_request_sent: {type: 'boolean', store: 'no', index: 'no'},
|
73
|
+
following: {type: 'boolean', store: 'no', index: 'no'},
|
74
|
+
default_profile_image: {type: 'boolean', store: 'no', index: 'no'},
|
75
|
+
default_profile: {type: 'boolean', store: 'no', index: 'no'},
|
76
|
+
geo_enabled: {type: 'boolean', store: 'no', index: 'no'},
|
77
|
+
time_zone: {type: 'string', index: 'not_analyzed'},
|
78
|
+
utc_offset: {type: 'integer', store: 'yes', index: 'no'},
|
79
|
+
favourites_count: {type: 'integer', store: 'no', index: 'no'},
|
80
|
+
created_at: {type: 'date', format: date_format, store: 'yes', index: 'no'},
|
81
|
+
listed_count: {type: 'integer', store: 'no', index: 'no'},
|
82
|
+
friends_count: {type: 'integer', store: 'no', index: 'no'},
|
83
|
+
followers_count: {type: 'integer', store: 'no', index: 'no'},
|
84
|
+
id: {type: 'long'},
|
85
|
+
id_str: {type: 'string', index: 'not_analyzed'},
|
86
|
+
name: {type: 'string'}.merge(
|
87
|
+
@config["kuromoji"] ? {analyzer: 'kuromoji'} : {}),
|
88
|
+
screen_name: {type: 'string', index: 'not_analyzed'},
|
89
|
+
location: {type: 'string', index: 'no'},
|
90
|
+
url: {type: 'string', index: 'no'},
|
91
|
+
description: {type: 'string'}.merge(
|
92
|
+
@config["kuromoji"] ? {analyzer: 'kuromoji'} : {}),
|
93
|
+
protected: {type: 'boolean'},
|
94
|
+
verified: {type: 'boolean'},
|
95
|
+
statuses_count: {type: 'long', store: 'yes', index: 'no'},
|
96
|
+
lang: {type: 'string', index: 'not_analyzed'},
|
97
|
+
contributors_enabled: {type: 'boolean', index: 'no'},
|
98
|
+
is_translator: {type: 'boolean', index: 'no'},
|
99
|
+
profile_background_color: {type: 'string', store: 'no', index: 'no'},
|
100
|
+
profile_background_image_url: {type: 'string', store: 'no', index: 'no'},
|
101
|
+
profile_background_image_url_https: {type: 'string', store: 'no', index: 'no'},
|
102
|
+
profile_background_tile: {type: 'boolean', store: 'no', index: 'no'},
|
103
|
+
profile_image_url: {type: 'string', type: 'string', index: 'no'},
|
104
|
+
profile_image_url_https: {type: 'string', index: 'no'},
|
105
|
+
profile_link_color: {type: 'string', store: 'no', index: 'no'},
|
106
|
+
profile_sidebar_border_color: {type: 'string', store: 'no', index: 'no'},
|
107
|
+
profile_sidebar_fill_color: {type: 'string', store: 'no', index: 'no'},
|
108
|
+
profile_use_background_image: {type: 'boolean', store: 'no', index: 'no'},
|
109
|
+
}
|
110
|
+
|
111
|
+
minimum_user_properties = Hash[
|
112
|
+
user_properties.map { |k, v|
|
113
|
+
[k, %i(id id_str screen_name).include?(k) ? v : {type: v[:type], format: v[:format], store: 'no', index: 'no'}] }
|
114
|
+
]
|
115
|
+
|
116
|
+
tweet_properties = {
|
117
|
+
lang: {type: 'string', index: 'not_analyzed'},
|
118
|
+
deleted: {type: 'boolean', null_value: false},
|
119
|
+
filter_level: {type: 'string', index: 'no'},
|
120
|
+
retweeted: {type: 'boolean', store: 'no', index: 'no'},
|
121
|
+
favorited: {type: 'boolean', store: 'no', index: 'no'},
|
122
|
+
entities: {type: 'boolean', store: 'no', index: 'no'},
|
123
|
+
favorite_count: {type: 'integer', store: 'no', index: 'no'},
|
124
|
+
retweet_count: {type: 'integer', store: 'no', index: 'no'},
|
125
|
+
in_reply_to_status_id_str: {type: 'string', index: 'not_analyzed'},
|
126
|
+
in_reply_to_status_id: {type: 'long'},
|
127
|
+
truncated: {type: 'boolean', store: 'no', index: 'no'},
|
128
|
+
source: {type: 'string'},
|
129
|
+
text: {type: 'string', boost: 2.0, }.merge(
|
130
|
+
@config["kuromoji"] ? {analyzer: 'kuromoji'} : {}),
|
131
|
+
id_str: {type: 'string', index: 'not_analyzed'},
|
132
|
+
id: {type: 'long'},
|
133
|
+
created_at: {type: 'date', format: date_format},
|
134
|
+
in_reply_to_user_id_str: {type: 'string', index: 'not_analyzed'},
|
135
|
+
in_reply_to_user_id: {type: 'long'},
|
136
|
+
user: {
|
137
|
+
type: 'object',
|
138
|
+
properties: user_properties,
|
139
|
+
},
|
140
|
+
coordinates: {
|
141
|
+
type: 'object',
|
142
|
+
properties: {
|
143
|
+
coordinates: {type: 'geo_point'},
|
144
|
+
type: {type: 'string', index: 'no'},
|
145
|
+
},
|
146
|
+
},
|
147
|
+
place: {
|
148
|
+
type: 'object',
|
149
|
+
properties: {
|
150
|
+
attributes: {type: 'object', store: 'no', index: 'no'},
|
151
|
+
bounding_box: {type: 'object', index: 'no'},
|
152
|
+
country: {type: 'string', index: 'no'},
|
153
|
+
country_code: {type: 'string', index: 'not_analyzed'},
|
154
|
+
id: {type: 'string', index: 'not_analyzed'},
|
155
|
+
name: {type: 'string'},
|
156
|
+
place_type: {type: 'string', index: 'no'},
|
157
|
+
url: {type: 'string', index: 'no', store: 'yes'},
|
158
|
+
},
|
159
|
+
},
|
160
|
+
contributors: {type: 'object', store: 'no', index: 'no'},
|
161
|
+
}
|
162
|
+
|
163
|
+
minimum_tweet_properties = Hash[
|
164
|
+
tweet_properties.map { |k, v|
|
165
|
+
if k == :user
|
166
|
+
[k, {type: 'object', properties: minimum_user_properties}]
|
167
|
+
else
|
168
|
+
[k, %i(id id_str text).include?(k) ? v : {type: v[:type], format: v[:format], store: 'no', index: 'no'}]
|
169
|
+
end
|
170
|
+
}
|
171
|
+
]
|
172
|
+
tweet_properties[:retweeted_status] = {type: 'object', properties: minimum_tweet_properties}
|
173
|
+
minimum_tweet_properties[:retweeted_status] = {type: 'object', store: 'no', index: 'no'}
|
174
|
+
|
175
|
+
@es.indices.create(index: @index_name, body: {
|
176
|
+
settings: {
|
177
|
+
},
|
178
|
+
analysis: {
|
179
|
+
standard: {
|
180
|
+
type: 'standard'
|
181
|
+
},
|
182
|
+
}.merge( @config["kuromoji"] ?
|
183
|
+
{kuromoji: {
|
184
|
+
type: "kuromoji_tokenizer",
|
185
|
+
mode: "search",
|
186
|
+
}} : {}
|
187
|
+
),
|
188
|
+
mappings: {
|
189
|
+
tweet: {
|
190
|
+
_source: {enabled: true},
|
191
|
+
properties: tweet_properties,
|
192
|
+
},
|
193
|
+
deleted_tweet: {
|
194
|
+
_source: {enabled: true},
|
195
|
+
properties: {
|
196
|
+
tweet: {type: 'object', properties: minimum_tweet_properties},
|
197
|
+
deleted_at: {type: 'date', index: 'no'},
|
198
|
+
},
|
199
|
+
},
|
200
|
+
|
201
|
+
message: {
|
202
|
+
_source: {enabled: true},
|
203
|
+
properties: {
|
204
|
+
created_at: {type: 'date', format: date_format, store: 'yes', index: 'no'},
|
205
|
+
text: {type: 'string', boost: 2.0, store: 'yes', }.merge(
|
206
|
+
@config["kuromoji"] ? {analyzer: 'kuromoji'} : {}),
|
207
|
+
sender_id_str: {type: 'string', store: 'yes', index: 'not_analyzed'},
|
208
|
+
sender_screen_name: {type: 'string', store: 'yes', index: 'not_analyzed'},
|
209
|
+
sender_id: {type: 'long', store: 'yes', },
|
210
|
+
recipient_id_str: {type: 'string', store: 'yes', index: 'not_analyzed'},
|
211
|
+
recipient_id: {type: 'long', store: 'yes', },
|
212
|
+
recipient_screen_name: {type: 'string', store: 'yes', index: 'not_analyzed'},
|
213
|
+
sender: {type: 'object', store: 'yes', properties: minimum_user_properties},
|
214
|
+
recipient: {type: 'object', store: 'yes', properties: minimum_user_properties},
|
215
|
+
},
|
216
|
+
},
|
217
|
+
event_favorite: {
|
218
|
+
_source: {enabled: true},
|
219
|
+
properties: {
|
220
|
+
created_at: {type: 'date', format: date_format, store: 'yes', index: 'no'},
|
221
|
+
event: {type: 'string', store: 'yes', index: 'not_analyzed'},
|
222
|
+
source: {type: 'object', store: 'yes', properties: minimum_user_properties},
|
223
|
+
target: {type: 'object', store: 'yes', properties: minimum_user_properties},
|
224
|
+
target_object: {type: 'object', store: 'yes', properties: minimum_tweet_properties},
|
225
|
+
},
|
226
|
+
},
|
227
|
+
event_user_interaction: {
|
228
|
+
_source: {enabled: true},
|
229
|
+
properties: {
|
230
|
+
created_at: {type: 'date', format: date_format, store: 'yes', index: 'no'},
|
231
|
+
event: {type: 'string', store: 'yes', index: 'not_analyzed'},
|
232
|
+
source: {type: 'object', store: 'yes', properties: minimum_user_properties},
|
233
|
+
target: {type: 'object', store: 'yes', properties: minimum_user_properties},
|
234
|
+
},
|
235
|
+
},
|
236
|
+
},
|
237
|
+
})
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'akane/storages/abstract_storage'
|
2
|
+
require 'date'
|
3
|
+
require 'json'
|
4
|
+
require 'time'
|
5
|
+
require 'pathname'
|
6
|
+
|
7
|
+
module Akane
|
8
|
+
module Storages
|
9
|
+
class File < AbstractStorage
|
10
|
+
def initialize(*)
|
11
|
+
super
|
12
|
+
@screen_name_to_id_cache = {}
|
13
|
+
@dir = Pathname.new(@config["dir"])
|
14
|
+
[@dir, @dir.join('names'), @dir.join('users'), @dir.join('event'), @dir.join('timeline')].each do |d|
|
15
|
+
d.mkdir unless d.exist?
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def record_tweet(account, tweet)
|
20
|
+
timeline_io.puts "[#{tweet["created_at"].xmlschema}][#{account}] #{tweet["user"]["screen_name"]}: " \
|
21
|
+
"#{tweet["text"].gsub(/\r?\n/,' ')} (#{tweet["user"]["id"]},#{tweet["id"]})"
|
22
|
+
|
23
|
+
tweets_io_for_user(tweet["user"]["id"], tweet["user"]["screen_name"]) do |io|
|
24
|
+
io.puts tweet.attrs.to_json
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def mark_as_deleted(account, user_id, tweet_id)
|
29
|
+
timeline_deletion_io.puts "#{Time.now.xmlschema},#{user_id},#{tweet_id}"
|
30
|
+
tweets_deletion_io_for_user(user_id) do |io|
|
31
|
+
io.puts "#{Time.now.xmlschema},#{user_id},#{tweet_id}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def record_event(account, event)
|
36
|
+
event_io.puts event.merge("happened_on" => account).to_json
|
37
|
+
end
|
38
|
+
|
39
|
+
def record_message(account, message)
|
40
|
+
messages_raw_io_for_user(message["sender"]["id"], message["sender"]["screen_name"]) do |io|
|
41
|
+
io.puts message.attrs.to_json
|
42
|
+
end
|
43
|
+
messages_io_for_user(message["sender"]["id"], message["sender"]["screen_name"]) do |io|
|
44
|
+
io.puts "[#{message["created_at"].xmlschema}] #{message["sender"]["screen_name"]} -> #{message["recipient"]["screen_name"]}:" \
|
45
|
+
" #{message["text"]} (#{message["sender"]["id"]} -> #{message["recipient"]["id"]},#{message["id"]})"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def timeline_io
|
52
|
+
if @timeline_io_date != Date.today || !@timeline_io
|
53
|
+
date = @timeline_io_date = Date.today
|
54
|
+
@timeline_io = ::File.open(@dir.join('timeline', date.strftime('%Y-%m-%d.txt')), 'a')
|
55
|
+
@timeline_io.sync = !@config.key?("sync_io") || @config["sync_io"]
|
56
|
+
@timeline_io
|
57
|
+
else
|
58
|
+
@timeline_io
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def timeline_deletion_io
|
63
|
+
if @timeline_deletion_io_date != Date.today || !@timeline_deletion_io
|
64
|
+
date = @timeline_deletion_io_date = Date.today
|
65
|
+
@timeline_deletion_io = ::File.open(@dir.join('timeline', date.strftime('%Y-%m-%d.deleted.txt')), 'a')
|
66
|
+
@timeline_deletion_io.sync = !@config.key?("sync_io") || @config["sync_io"]
|
67
|
+
@timeline_deletion_io
|
68
|
+
else
|
69
|
+
@timeline_deletion_io
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def event_io
|
74
|
+
if @event_io_date != Date.today || !@event_io
|
75
|
+
date = @event_io_date = Date.today
|
76
|
+
@event_io = ::File.open(@dir.join('event', date.strftime('%Y-%m-%d.txt')), 'a')
|
77
|
+
@event_io.sync = !@config.key?("sync_io") || @config["sync_io"]
|
78
|
+
@event_io
|
79
|
+
else
|
80
|
+
@event_io
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def tweets_io_for_user(user_id, screen_name=nil, &block)
|
85
|
+
symlink_user_dir(user_id, screen_name)
|
86
|
+
date = Date.today
|
87
|
+
::File.open(@dir.join('users', user_id.to_s, date.strftime('tweets.%Y-%m.txt')), 'a', &block)
|
88
|
+
end
|
89
|
+
|
90
|
+
def tweets_deletion_io_for_user(user_id, screen_name=nil, &block)
|
91
|
+
symlink_user_dir(user_id, screen_name)
|
92
|
+
date = Date.today
|
93
|
+
::File.open(@dir.join('users', user_id.to_s, date.strftime('deleted-tweets.%Y-%m.txt')), 'a', &block)
|
94
|
+
end
|
95
|
+
|
96
|
+
def messages_io_for_user(user_id, screen_name=nil, &block)
|
97
|
+
symlink_user_dir(user_id, screen_name)
|
98
|
+
date = Date.today
|
99
|
+
::File.open(@dir.join('users', user_id.to_s, date.strftime('messages.%Y-%m.txt')), 'a', &block)
|
100
|
+
end
|
101
|
+
|
102
|
+
def messages_raw_io_for_user(user_id, screen_name=nil, &block)
|
103
|
+
symlink_user_dir(user_id, screen_name)
|
104
|
+
date = Date.today
|
105
|
+
::File.open(@dir.join('users', user_id.to_s, date.strftime('messages-raw.%Y-%m.txt')), 'a', &block)
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
def symlink_user_dir(user_id, screen_name=nil)
|
110
|
+
user_id_dir = @dir.join('users', user_id.to_s)
|
111
|
+
user_id_dir.mkdir unless user_id_dir.exist?
|
112
|
+
|
113
|
+
return unless screen_name
|
114
|
+
screen_name_dir = @dir.join('names', screen_name)
|
115
|
+
|
116
|
+
unless @screen_name_to_id_cache.has_key?(screen_name)
|
117
|
+
@screen_name_to_id_cache[screen_name] = if screen_name_dir.symlink?
|
118
|
+
screen_name_dir.realpath.basename.to_s
|
119
|
+
else
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
@logger.debug "Caching dir for #{screen_name} : #{@screen_name_to_id_cache[screen_name].inspect}"
|
123
|
+
end
|
124
|
+
|
125
|
+
cached_id = @screen_name_to_id_cache[screen_name]
|
126
|
+
|
127
|
+
if cached_id && cached_id != user_id.to_s
|
128
|
+
prev_id = screen_name_dir.realpath.basename
|
129
|
+
@logger.info "Renaming #{screen_name}(#{prev_id}) dir: #{screen_name} -> #{prev_id}-#{screen_name}"
|
130
|
+
screen_name_dir.rename(@dir.join('names',"#{prev_id}-#{screen_name}"))
|
131
|
+
screen_name_dir.make_symlink("../users/#{user_id_dir.basename}")
|
132
|
+
@screen_name_to_id_cache[screen_name] = user_id.to_s
|
133
|
+
|
134
|
+
elsif cached_id.nil?
|
135
|
+
@logger.info "Linking #{screen_name}->#{user_id} dir"
|
136
|
+
screen_name_dir.make_symlink("../users/#{user_id_dir.basename}")
|
137
|
+
@screen_name_to_id_cache[screen_name] = user_id.to_s
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|