grailbird_updater 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/grailbird_updater +15 -3
- data/grailbird_updater.gemspec +1 -0
- data/lib/grailbird_updater.rb +85 -30
- data/lib/grailbird_updater/version.rb +1 -1
- metadata +18 -3
data/bin/grailbird_updater
CHANGED
@@ -2,11 +2,12 @@
|
|
2
2
|
|
3
3
|
require 'uri'
|
4
4
|
require 'net/http'
|
5
|
+
require 'yaml'
|
5
6
|
require 'json'
|
7
|
+
require 'csv'
|
6
8
|
require 'oauth'
|
7
|
-
require 'pp'
|
8
9
|
require 'trollop'
|
9
|
-
require 'colorize'
|
10
|
+
require 'colorize'
|
10
11
|
|
11
12
|
require 'grailbird_updater'
|
12
13
|
|
@@ -17,10 +18,14 @@ Update your Twitter archive (best if used with a cron)
|
|
17
18
|
|
18
19
|
Usage: updater [options] [path to archive]
|
19
20
|
|
21
|
+
Where [options] can be:
|
20
22
|
EOS
|
23
|
+
|
21
24
|
opt :verbose, "Verbose mode"
|
22
25
|
opt :prune, "Prune all but necessary user data from tweets", :default => true
|
26
|
+
opt :csv_write, "Also write to CSV files in archive"
|
23
27
|
opt :directory, "Twitter archive directory", :type => :string
|
28
|
+
opt :key_path, "Alternate path to directory containing a YAML file with the app/user keys for the archive's account. Defaults to the archive directory", :type => :string
|
24
29
|
end
|
25
30
|
|
26
31
|
dir = nil
|
@@ -29,4 +34,11 @@ dir ||= ARGV.first
|
|
29
34
|
dir ||= "."
|
30
35
|
raise ArgumentError, "Must specify a directory" unless File.directory?(dir)
|
31
36
|
|
32
|
-
|
37
|
+
key_path = opts[:key_path]
|
38
|
+
key_path ||= dir
|
39
|
+
|
40
|
+
raise ArgumentError, "#{key_path} is not a directory" unless File.directory?(key_path)
|
41
|
+
raise IOError, "#{key_path} is not readable" unless File.readable?(key_path)
|
42
|
+
raise IOError, "#{key_path} is not writable" unless File.writable?(key_path)
|
43
|
+
|
44
|
+
GrailbirdUpdater.new(dir, opts[:verbose], opts[:prune], key_path, opts[:csv_write]).update_tweets
|
data/grailbird_updater.gemspec
CHANGED
data/lib/grailbird_updater.rb
CHANGED
@@ -4,6 +4,7 @@ class GrailbirdUpdater
|
|
4
4
|
|
5
5
|
KEEP_FIELDS = {'user' => ['name', 'screen_name', 'protected', 'id_str', 'profile_image_url_https', 'id', 'verified']}
|
6
6
|
MAX_REQUEST_SIZE = 200
|
7
|
+
PLATFORM_IS_OSX = (Object::RUBY_PLATFORM =~ /darwin/i) ? true : false
|
7
8
|
|
8
9
|
class JsFile
|
9
10
|
# Read UTF-8 file and return hash of contents (files being read contain JS arrays)
|
@@ -20,7 +21,7 @@ class GrailbirdUpdater
|
|
20
21
|
# @param file_path [String] path to file being read
|
21
22
|
# @raise [IOError] if the required file isn't found
|
22
23
|
def self.read_required(file_path)
|
23
|
-
raise IOError "#{file_path} must exist" unless File.exists?(file_path)
|
24
|
+
raise IOError, "#{file_path} must exist" unless File.exists?(file_path)
|
24
25
|
read(file_path)
|
25
26
|
end
|
26
27
|
|
@@ -35,12 +36,50 @@ class GrailbirdUpdater
|
|
35
36
|
end
|
36
37
|
end
|
37
38
|
|
38
|
-
|
39
|
+
class CsvFile
|
40
|
+
# Write Twitter's archive CSV files
|
41
|
+
# @param tweets [Array] all of the tweets you want to write to the file
|
42
|
+
# @param file_path [String] path to file being written
|
43
|
+
def self.write_tweets_csv (tweets, csv_path)
|
44
|
+
CSV.open(csv_path, "w") do |csv|
|
45
|
+
csv << ["tweet_id", "in_reply_to_status_id", "in_reply_to_user_id", "retweeted_status_id", "retweeted_status_user_id", "timestamp", "source", "text", "expanded_urls"]
|
46
|
+
tweets.each do |tweet|
|
47
|
+
csv << parse_tweet_into_csv_array(tweet)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Auxiliary function that turns a Tweet hash array (a single tweet from the API,
|
53
|
+
# encoded as a Hash) into the Array to write out to Twitter's CSV
|
54
|
+
#
|
55
|
+
# @param tweet [Hash] single tweet, encoded as a Hash
|
56
|
+
# @return [Array] the tweet as an array
|
57
|
+
def self.parse_tweet_into_csv_array (tweet)
|
58
|
+
csv_tweet_array = [tweet["id"],
|
59
|
+
tweet["in_reply_to_status_id"],
|
60
|
+
tweet["in_reply_to_user_id"],
|
61
|
+
tweet.has_key?("retweeted_status") ? tweet["retweeted_status"]["id"] : '',
|
62
|
+
tweet.has_key?("retweeted_status") ? tweet["retweeted_status"]["user"]["id"] : '',
|
63
|
+
tweet["created_at"],
|
64
|
+
tweet["source"],
|
65
|
+
tweet["text"]];
|
66
|
+
if tweet.has_key?("entities") && tweet["entities"].has_key?("urls")
|
67
|
+
tweet["entities"]["urls"].each do |url|
|
68
|
+
csv_tweet_array << url["expanded_url"]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
return csv_tweet_array
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def initialize(dir, verbose, prune, key_dir, write_csv)
|
39
76
|
@base_dir = dir
|
40
77
|
data_path = dir + "/data"
|
41
78
|
@js_path = data_path + "/js"
|
42
79
|
@csv_path = data_path + "/csv"
|
80
|
+
@key_path = key_dir
|
43
81
|
|
82
|
+
@write_csv = write_csv
|
44
83
|
@verbose = verbose
|
45
84
|
@prune = prune
|
46
85
|
@access_token = nil
|
@@ -94,6 +133,7 @@ class GrailbirdUpdater
|
|
94
133
|
|
95
134
|
# overwrite existing file (or create new if doesn't exist)
|
96
135
|
GrailbirdUpdater::JsFile.write_with_heading(all_month_tweets, "#{@js_path}/tweets/#{year_month}.js", "Grailbird.data.tweets_#{year_month}")
|
136
|
+
GrailbirdUpdater::CsvFile.write_tweets_csv(all_month_tweets, "#{@csv_path}/#{year_month}.csv") if @write_csv
|
97
137
|
tweet_index = update_tweet_index(tweet_index, year_month, month_tweets.length)
|
98
138
|
end
|
99
139
|
|
@@ -107,7 +147,7 @@ class GrailbirdUpdater
|
|
107
147
|
end
|
108
148
|
|
109
149
|
def get_twitter_user_timeline_response(screen_name, user_id, last_tweet_id)
|
110
|
-
twitter_url = "http://api.twitter.com/1/statuses/user_timeline.json"
|
150
|
+
twitter_url = "http://api.twitter.com/1.1/statuses/user_timeline.json"
|
111
151
|
twitter_uri = URI(twitter_url)
|
112
152
|
|
113
153
|
params = {
|
@@ -143,17 +183,13 @@ class GrailbirdUpdater
|
|
143
183
|
if !@access_token.nil?
|
144
184
|
response = @access_token.request(:get, twitter_uri.to_s)
|
145
185
|
else
|
146
|
-
|
147
|
-
|
148
|
-
if response.is_a?(Net::HTTPUnauthorized)
|
149
|
-
@access_token = do_oauth_dance(screen_name)
|
150
|
-
response = @access_token.request(:get, twitter_uri.to_s)
|
151
|
-
end
|
186
|
+
@access_token = do_oauth_dance(screen_name)
|
187
|
+
response = @access_token.request(:get, twitter_uri.to_s)
|
152
188
|
end
|
153
189
|
|
154
190
|
if response.is_a?(Net::HTTPUnauthorized)
|
155
191
|
puts "\nSomething went wrong trying to authorize grailbird_updater with the account: " + "@#{screen_name}".blue
|
156
|
-
puts "Please delete #{@
|
192
|
+
puts "Please delete #{@key_path}/#{screen_name}_keys.yaml and follow the authorize steps again."
|
157
193
|
exit
|
158
194
|
end
|
159
195
|
|
@@ -161,8 +197,8 @@ class GrailbirdUpdater
|
|
161
197
|
end
|
162
198
|
|
163
199
|
def do_oauth_dance(screen_name)
|
164
|
-
|
165
|
-
|
200
|
+
key_file_path = "#{@key_path}/#{screen_name}_keys.yaml"
|
201
|
+
|
166
202
|
if File.exists?(key_file_path)
|
167
203
|
keys = YAML.load_file(key_file_path)
|
168
204
|
consumer_key = keys['consumer_key']
|
@@ -171,25 +207,31 @@ class GrailbirdUpdater
|
|
171
207
|
token_secret = keys['secret']
|
172
208
|
else
|
173
209
|
puts <<-EOS
|
174
|
-
|
210
|
+
\nTo be able to retrieve your protected tweets, you will need a consumer key/secret
|
211
|
+
|
212
|
+
Please follow these steps to authorize grailbird_updater to download tweets:
|
213
|
+
1. Go to https://dev.twitter.com/apps/new
|
214
|
+
2. Give it a name (I recommend #{screen_name}_grailbird), description and URL
|
215
|
+
3. Create application
|
216
|
+
4. Go to your application page, you should see a "Consumer key" and a "Consumer secret"
|
217
|
+
5. Enter these here when prompted, go to the URL provided then enter the PIN you receive
|
175
218
|
|
176
|
-
|
177
|
-
1. Go to https://dev.twitter.com/apps/new
|
178
|
-
2. Give it a name (I recommend #{screen_name}_grailbird), description and URL
|
179
|
-
3. Create application
|
180
|
-
4. Go to your application page, you should see a "Consumer key" and a "Consumer secret"
|
219
|
+
#{"Note".underline}: you will only need to create this application once!
|
181
220
|
|
182
|
-
|
221
|
+
So you don't have to enter these again, we'll save a copy of your keys to:
|
222
|
+
#{key_file_path}
|
183
223
|
|
184
|
-
|
224
|
+
You can always change the directory these are saved to by using the -k or --key-path option
|
185
225
|
|
186
|
-
|
187
|
-
|
188
|
-
|
226
|
+
#{"WARNING".red.underline} Do NOT store the folder of your tweets on a public server.
|
227
|
+
If someone gets access to #{screen_name}_keys.yaml they can access your entire account!
|
228
|
+
If you want to share your archived tweets, either control the read access to the key file
|
229
|
+
OR use the --key-path option to store them somewhere else.
|
230
|
+
EOS
|
189
231
|
|
190
|
-
|
232
|
+
print_flush "\nEnter your 'Consumer key': "
|
191
233
|
consumer_key = STDIN.gets.chomp
|
192
|
-
|
234
|
+
print_flush "Enter your 'Consumer secret': "
|
193
235
|
consumer_secret = STDIN.gets.chomp
|
194
236
|
consumer = OAuth::Consumer.new(
|
195
237
|
consumer_key,
|
@@ -200,9 +242,16 @@ class GrailbirdUpdater
|
|
200
242
|
:authorize_path => '/oauth/authorize' }
|
201
243
|
)
|
202
244
|
request_token = consumer.get_request_token
|
203
|
-
|
245
|
+
authorize_url = request_token.authorize_url()
|
246
|
+
puts "\nGo to this URL: #{authorize_url}"
|
204
247
|
puts "Authorize the application and you will receive a PIN"
|
205
|
-
|
248
|
+
# open default browser if on OS X
|
249
|
+
if PLATFORM_IS_OSX
|
250
|
+
sleep(2)
|
251
|
+
`open "#{authorize_url}"`
|
252
|
+
end
|
253
|
+
|
254
|
+
print_flush "Enter the PIN here: "
|
206
255
|
pin = STDIN.gets.chomp
|
207
256
|
access_token = request_token.get_access_token(:oauth_verifier => pin)
|
208
257
|
|
@@ -226,7 +275,7 @@ class GrailbirdUpdater
|
|
226
275
|
:scheme => :header
|
227
276
|
})
|
228
277
|
# now create the access token object from passed values
|
229
|
-
token_hash = {
|
278
|
+
token_hash = {:oauth_token => oauth_token,
|
230
279
|
:oauth_token_secret => oauth_token_secret
|
231
280
|
}
|
232
281
|
access_token = OAuth::AccessToken.from_hash(consumer, token_hash )
|
@@ -241,13 +290,14 @@ class GrailbirdUpdater
|
|
241
290
|
end
|
242
291
|
|
243
292
|
def display_tweet(tweet)
|
293
|
+
tweet = tweet["retweeted_status"] if tweet.has_key?("retweeted_status")
|
244
294
|
tweet_text = tweet['text']
|
245
295
|
if tweet['entities'] && tweet['entities']['urls']
|
246
296
|
tweet['entities']['urls'].each { |url_entity|
|
247
297
|
tweet_text = tweet['text'].gsub("#{url_entity['url']}", "#{url_entity['expanded_url']}")
|
248
298
|
}
|
249
299
|
end
|
250
|
-
tweet = "@#{tweet['user']['screen_name']}".blue + ":
|
300
|
+
tweet = "@#{tweet['user']['screen_name']}".blue + ": #{tweet_text}\n"
|
251
301
|
end
|
252
302
|
|
253
303
|
def update_tweet_index(tweet_index, year_month, count)
|
@@ -262,7 +312,7 @@ class GrailbirdUpdater
|
|
262
312
|
end
|
263
313
|
|
264
314
|
new_month = {"file_name" => "data/js/tweets/#{year_month}.js",
|
265
|
-
|
315
|
+
"year" => year,
|
266
316
|
"var_name" => "tweets_#{year_month}",
|
267
317
|
"tweet_count" => count,
|
268
318
|
"month" => month
|
@@ -276,5 +326,10 @@ class GrailbirdUpdater
|
|
276
326
|
def vputs(str)
|
277
327
|
puts str if @verbose
|
278
328
|
end
|
329
|
+
|
330
|
+
def print_flush(str)
|
331
|
+
print str
|
332
|
+
$stdout.flush
|
333
|
+
end
|
279
334
|
end
|
280
335
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grailbird_updater
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-03-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: oauth
|
@@ -59,6 +59,22 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: minitest
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
62
78
|
description: Twitter now allows you to download your tweets. This tool lets you keep
|
63
79
|
that archive up to date.
|
64
80
|
email:
|
@@ -106,4 +122,3 @@ summary: A way to keep an updated archive of Twitter tweets.
|
|
106
122
|
test_files:
|
107
123
|
- test/grailbird_updater_test.rb
|
108
124
|
- test/test_helper.rb
|
109
|
-
has_rdoc:
|