grailbird_updater 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/grailbird_updater +15 -3
- data/grailbird_updater.gemspec +1 -0
- data/lib/grailbird_updater.rb +85 -30
- data/lib/grailbird_updater/version.rb +1 -1
- metadata +18 -3
data/bin/grailbird_updater
CHANGED
@@ -2,11 +2,12 @@
|
|
2
2
|
|
3
3
|
require 'uri'
|
4
4
|
require 'net/http'
|
5
|
+
require 'yaml'
|
5
6
|
require 'json'
|
7
|
+
require 'csv'
|
6
8
|
require 'oauth'
|
7
|
-
require 'pp'
|
8
9
|
require 'trollop'
|
9
|
-
require 'colorize'
|
10
|
+
require 'colorize'
|
10
11
|
|
11
12
|
require 'grailbird_updater'
|
12
13
|
|
@@ -17,10 +18,14 @@ Update your Twitter archive (best if used with a cron)
|
|
17
18
|
|
18
19
|
Usage: updater [options] [path to archive]
|
19
20
|
|
21
|
+
Where [options] can be:
|
20
22
|
EOS
|
23
|
+
|
21
24
|
opt :verbose, "Verbose mode"
|
22
25
|
opt :prune, "Prune all but necessary user data from tweets", :default => true
|
26
|
+
opt :csv_write, "Also write to CSV files in archive"
|
23
27
|
opt :directory, "Twitter archive directory", :type => :string
|
28
|
+
opt :key_path, "Alternate path to directory containing a YAML file with the app/user keys for the archive's account. Defaults to the archive directory", :type => :string
|
24
29
|
end
|
25
30
|
|
26
31
|
dir = nil
|
@@ -29,4 +34,11 @@ dir ||= ARGV.first
|
|
29
34
|
dir ||= "."
|
30
35
|
raise ArgumentError, "Must specify a directory" unless File.directory?(dir)
|
31
36
|
|
32
|
-
|
37
|
+
key_path = opts[:key_path]
|
38
|
+
key_path ||= dir
|
39
|
+
|
40
|
+
raise ArgumentError, "#{key_path} is not a directory" unless File.directory?(key_path)
|
41
|
+
raise IOError, "#{key_path} is not readable" unless File.readable?(key_path)
|
42
|
+
raise IOError, "#{key_path} is not writable" unless File.writable?(key_path)
|
43
|
+
|
44
|
+
GrailbirdUpdater.new(dir, opts[:verbose], opts[:prune], key_path, opts[:csv_write]).update_tweets
|
data/grailbird_updater.gemspec
CHANGED
data/lib/grailbird_updater.rb
CHANGED
@@ -4,6 +4,7 @@ class GrailbirdUpdater
|
|
4
4
|
|
5
5
|
KEEP_FIELDS = {'user' => ['name', 'screen_name', 'protected', 'id_str', 'profile_image_url_https', 'id', 'verified']}
|
6
6
|
MAX_REQUEST_SIZE = 200
|
7
|
+
PLATFORM_IS_OSX = (Object::RUBY_PLATFORM =~ /darwin/i) ? true : false
|
7
8
|
|
8
9
|
class JsFile
|
9
10
|
# Read UTF-8 file and return hash of contents (files being read contain JS arrays)
|
@@ -20,7 +21,7 @@ class GrailbirdUpdater
|
|
20
21
|
# @param file_path [String] path to file being read
|
21
22
|
# @raise [IOError] if the required file isn't found
|
22
23
|
def self.read_required(file_path)
|
23
|
-
raise IOError "#{file_path} must exist" unless File.exists?(file_path)
|
24
|
+
raise IOError, "#{file_path} must exist" unless File.exists?(file_path)
|
24
25
|
read(file_path)
|
25
26
|
end
|
26
27
|
|
@@ -35,12 +36,50 @@ class GrailbirdUpdater
|
|
35
36
|
end
|
36
37
|
end
|
37
38
|
|
38
|
-
|
39
|
+
class CsvFile
|
40
|
+
# Write Twitter's archive CSV files
|
41
|
+
# @param tweets [Array] all of the tweets you want to write to the file
|
42
|
+
# @param file_path [String] path to file being written
|
43
|
+
def self.write_tweets_csv (tweets, csv_path)
|
44
|
+
CSV.open(csv_path, "w") do |csv|
|
45
|
+
csv << ["tweet_id", "in_reply_to_status_id", "in_reply_to_user_id", "retweeted_status_id", "retweeted_status_user_id", "timestamp", "source", "text", "expanded_urls"]
|
46
|
+
tweets.each do |tweet|
|
47
|
+
csv << parse_tweet_into_csv_array(tweet)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Auxiliary function that turns a Tweet hash array (a single tweet from the API,
|
53
|
+
# encoded as a Hash) into the Array to write out to Twitter's CSV
|
54
|
+
#
|
55
|
+
# @param tweet [Hash] single tweet, encoded as a Hash
|
56
|
+
# @return [Array] the tweet as an array
|
57
|
+
def self.parse_tweet_into_csv_array (tweet)
|
58
|
+
csv_tweet_array = [tweet["id"],
|
59
|
+
tweet["in_reply_to_status_id"],
|
60
|
+
tweet["in_reply_to_user_id"],
|
61
|
+
tweet.has_key?("retweeted_status") ? tweet["retweeted_status"]["id"] : '',
|
62
|
+
tweet.has_key?("retweeted_status") ? tweet["retweeted_status"]["user"]["id"] : '',
|
63
|
+
tweet["created_at"],
|
64
|
+
tweet["source"],
|
65
|
+
tweet["text"]];
|
66
|
+
if tweet.has_key?("entities") && tweet["entities"].has_key?("urls")
|
67
|
+
tweet["entities"]["urls"].each do |url|
|
68
|
+
csv_tweet_array << url["expanded_url"]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
return csv_tweet_array
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def initialize(dir, verbose, prune, key_dir, write_csv)
|
39
76
|
@base_dir = dir
|
40
77
|
data_path = dir + "/data"
|
41
78
|
@js_path = data_path + "/js"
|
42
79
|
@csv_path = data_path + "/csv"
|
80
|
+
@key_path = key_dir
|
43
81
|
|
82
|
+
@write_csv = write_csv
|
44
83
|
@verbose = verbose
|
45
84
|
@prune = prune
|
46
85
|
@access_token = nil
|
@@ -94,6 +133,7 @@ class GrailbirdUpdater
|
|
94
133
|
|
95
134
|
# overwrite existing file (or create new if doesn't exist)
|
96
135
|
GrailbirdUpdater::JsFile.write_with_heading(all_month_tweets, "#{@js_path}/tweets/#{year_month}.js", "Grailbird.data.tweets_#{year_month}")
|
136
|
+
GrailbirdUpdater::CsvFile.write_tweets_csv(all_month_tweets, "#{@csv_path}/#{year_month}.csv") if @write_csv
|
97
137
|
tweet_index = update_tweet_index(tweet_index, year_month, month_tweets.length)
|
98
138
|
end
|
99
139
|
|
@@ -107,7 +147,7 @@ class GrailbirdUpdater
|
|
107
147
|
end
|
108
148
|
|
109
149
|
def get_twitter_user_timeline_response(screen_name, user_id, last_tweet_id)
|
110
|
-
twitter_url = "http://api.twitter.com/1/statuses/user_timeline.json"
|
150
|
+
twitter_url = "http://api.twitter.com/1.1/statuses/user_timeline.json"
|
111
151
|
twitter_uri = URI(twitter_url)
|
112
152
|
|
113
153
|
params = {
|
@@ -143,17 +183,13 @@ class GrailbirdUpdater
|
|
143
183
|
if !@access_token.nil?
|
144
184
|
response = @access_token.request(:get, twitter_uri.to_s)
|
145
185
|
else
|
146
|
-
|
147
|
-
|
148
|
-
if response.is_a?(Net::HTTPUnauthorized)
|
149
|
-
@access_token = do_oauth_dance(screen_name)
|
150
|
-
response = @access_token.request(:get, twitter_uri.to_s)
|
151
|
-
end
|
186
|
+
@access_token = do_oauth_dance(screen_name)
|
187
|
+
response = @access_token.request(:get, twitter_uri.to_s)
|
152
188
|
end
|
153
189
|
|
154
190
|
if response.is_a?(Net::HTTPUnauthorized)
|
155
191
|
puts "\nSomething went wrong trying to authorize grailbird_updater with the account: " + "@#{screen_name}".blue
|
156
|
-
puts "Please delete #{@
|
192
|
+
puts "Please delete #{@key_path}/#{screen_name}_keys.yaml and follow the authorize steps again."
|
157
193
|
exit
|
158
194
|
end
|
159
195
|
|
@@ -161,8 +197,8 @@ class GrailbirdUpdater
|
|
161
197
|
end
|
162
198
|
|
163
199
|
def do_oauth_dance(screen_name)
|
164
|
-
|
165
|
-
|
200
|
+
key_file_path = "#{@key_path}/#{screen_name}_keys.yaml"
|
201
|
+
|
166
202
|
if File.exists?(key_file_path)
|
167
203
|
keys = YAML.load_file(key_file_path)
|
168
204
|
consumer_key = keys['consumer_key']
|
@@ -171,25 +207,31 @@ class GrailbirdUpdater
|
|
171
207
|
token_secret = keys['secret']
|
172
208
|
else
|
173
209
|
puts <<-EOS
|
174
|
-
|
210
|
+
\nTo be able to retrieve your protected tweets, you will need a consumer key/secret
|
211
|
+
|
212
|
+
Please follow these steps to authorize grailbird_updater to download tweets:
|
213
|
+
1. Go to https://dev.twitter.com/apps/new
|
214
|
+
2. Give it a name (I recommend #{screen_name}_grailbird), description and URL
|
215
|
+
3. Create application
|
216
|
+
4. Go to your application page, you should see a "Consumer key" and a "Consumer secret"
|
217
|
+
5. Enter these here when prompted, go to the URL provided then enter the PIN you receive
|
175
218
|
|
176
|
-
|
177
|
-
1. Go to https://dev.twitter.com/apps/new
|
178
|
-
2. Give it a name (I recommend #{screen_name}_grailbird), description and URL
|
179
|
-
3. Create application
|
180
|
-
4. Go to your application page, you should see a "Consumer key" and a "Consumer secret"
|
219
|
+
#{"Note".underline}: you will only need to create this application once!
|
181
220
|
|
182
|
-
|
221
|
+
So you don't have to enter these again, we'll save a copy of your keys to:
|
222
|
+
#{key_file_path}
|
183
223
|
|
184
|
-
|
224
|
+
You can always change the directory these are saved to by using the -k or --key-path option
|
185
225
|
|
186
|
-
|
187
|
-
|
188
|
-
|
226
|
+
#{"WARNING".red.underline} Do NOT store the folder of your tweets on a public server.
|
227
|
+
If someone gets access to #{screen_name}_keys.yaml they can access your entire account!
|
228
|
+
If you want to share your archived tweets, either control the read access to the key file
|
229
|
+
OR use the --key-path option to store them somewhere else.
|
230
|
+
EOS
|
189
231
|
|
190
|
-
|
232
|
+
print_flush "\nEnter your 'Consumer key': "
|
191
233
|
consumer_key = STDIN.gets.chomp
|
192
|
-
|
234
|
+
print_flush "Enter your 'Consumer secret': "
|
193
235
|
consumer_secret = STDIN.gets.chomp
|
194
236
|
consumer = OAuth::Consumer.new(
|
195
237
|
consumer_key,
|
@@ -200,9 +242,16 @@ class GrailbirdUpdater
|
|
200
242
|
:authorize_path => '/oauth/authorize' }
|
201
243
|
)
|
202
244
|
request_token = consumer.get_request_token
|
203
|
-
|
245
|
+
authorize_url = request_token.authorize_url()
|
246
|
+
puts "\nGo to this URL: #{authorize_url}"
|
204
247
|
puts "Authorize the application and you will receive a PIN"
|
205
|
-
|
248
|
+
# open default browser if on OS X
|
249
|
+
if PLATFORM_IS_OSX
|
250
|
+
sleep(2)
|
251
|
+
`open "#{authorize_url}"`
|
252
|
+
end
|
253
|
+
|
254
|
+
print_flush "Enter the PIN here: "
|
206
255
|
pin = STDIN.gets.chomp
|
207
256
|
access_token = request_token.get_access_token(:oauth_verifier => pin)
|
208
257
|
|
@@ -226,7 +275,7 @@ class GrailbirdUpdater
|
|
226
275
|
:scheme => :header
|
227
276
|
})
|
228
277
|
# now create the access token object from passed values
|
229
|
-
token_hash = {
|
278
|
+
token_hash = {:oauth_token => oauth_token,
|
230
279
|
:oauth_token_secret => oauth_token_secret
|
231
280
|
}
|
232
281
|
access_token = OAuth::AccessToken.from_hash(consumer, token_hash )
|
@@ -241,13 +290,14 @@ class GrailbirdUpdater
|
|
241
290
|
end
|
242
291
|
|
243
292
|
def display_tweet(tweet)
|
293
|
+
tweet = tweet["retweeted_status"] if tweet.has_key?("retweeted_status")
|
244
294
|
tweet_text = tweet['text']
|
245
295
|
if tweet['entities'] && tweet['entities']['urls']
|
246
296
|
tweet['entities']['urls'].each { |url_entity|
|
247
297
|
tweet_text = tweet['text'].gsub("#{url_entity['url']}", "#{url_entity['expanded_url']}")
|
248
298
|
}
|
249
299
|
end
|
250
|
-
tweet = "@#{tweet['user']['screen_name']}".blue + ":
|
300
|
+
tweet = "@#{tweet['user']['screen_name']}".blue + ": #{tweet_text}\n"
|
251
301
|
end
|
252
302
|
|
253
303
|
def update_tweet_index(tweet_index, year_month, count)
|
@@ -262,7 +312,7 @@ class GrailbirdUpdater
|
|
262
312
|
end
|
263
313
|
|
264
314
|
new_month = {"file_name" => "data/js/tweets/#{year_month}.js",
|
265
|
-
|
315
|
+
"year" => year,
|
266
316
|
"var_name" => "tweets_#{year_month}",
|
267
317
|
"tweet_count" => count,
|
268
318
|
"month" => month
|
@@ -276,5 +326,10 @@ class GrailbirdUpdater
|
|
276
326
|
def vputs(str)
|
277
327
|
puts str if @verbose
|
278
328
|
end
|
329
|
+
|
330
|
+
def print_flush(str)
|
331
|
+
print str
|
332
|
+
$stdout.flush
|
333
|
+
end
|
279
334
|
end
|
280
335
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grailbird_updater
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-03-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: oauth
|
@@ -59,6 +59,22 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: minitest
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
62
78
|
description: Twitter now allows you to download your tweets. This tool lets you keep
|
63
79
|
that archive up to date.
|
64
80
|
email:
|
@@ -106,4 +122,3 @@ summary: A way to keep an updated archive of Twitter tweets.
|
|
106
122
|
test_files:
|
107
123
|
- test/grailbird_updater_test.rb
|
108
124
|
- test/test_helper.rb
|
109
|
-
has_rdoc:
|