grailbird_updater 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/grailbird_updater.rb +40 -28
- data/lib/grailbird_updater/version.rb +1 -1
- metadata +3 -2
data/lib/grailbird_updater.rb
CHANGED
@@ -4,6 +4,36 @@ class GrailbirdUpdater
|
|
4
4
|
|
5
5
|
KEEP_FIELDS = {'user' => ['name', 'screen_name', 'protected', 'id_str', 'profile_image_url_https', 'id', 'verified']}
|
6
6
|
|
7
|
+
class JsFile
|
8
|
+
# Read UTF-8 file and return hash of contents (files being read contain JS arrays)
|
9
|
+
#
|
10
|
+
# @param file_path [String] path to file being read
|
11
|
+
def self.read(file_path)
|
12
|
+
file_contents = open(file_path).read.force_encoding("UTF-8").split("\n").join(" ")
|
13
|
+
json_file_contents = file_contents.gsub(/^((var)?\s*(.+?)\s+=\s+)/m, '')
|
14
|
+
return JSON.parse(json_file_contents)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Checks if file being read exists, stops everything if it doesn't
|
18
|
+
#
|
19
|
+
# @param file_path [String] path to file being read
|
20
|
+
# @raise [IOError] if the required file isn't found
|
21
|
+
def self.read_required(file_path)
|
22
|
+
raise IOError "#{file_path} must exist" unless File.exists?(file_path)
|
23
|
+
read(file_path)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Write files Twitter's Archive app likes with specific headings
|
27
|
+
#
|
28
|
+
# @param contents [Object] object whose contents are to be written to the file
|
29
|
+
# @param file_path [String] path to file being written
|
30
|
+
# @param heading [String] heading for file, usually "var Something"
|
31
|
+
def self.write_with_heading(contents, file_path, heading)
|
32
|
+
json_pretty_contents = JSON.pretty_generate(contents)
|
33
|
+
File.open(file_path, 'w') {|f| f.write("#{heading} = #{json_pretty_contents}")}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
7
37
|
def initialize(dir, count, verbose, prune)
|
8
38
|
@base_dir = dir
|
9
39
|
data_path = dir + "/data"
|
@@ -17,23 +47,22 @@ class GrailbirdUpdater
|
|
17
47
|
|
18
48
|
def update_tweets
|
19
49
|
# find user_id in data/js/user_details.js
|
20
|
-
user_details =
|
50
|
+
user_details = GrailbirdUpdater::JsFile.read_required("#{@js_path}/user_details.js")
|
21
51
|
user_id = user_details["id"]
|
22
52
|
screen_name = user_details["screen_name"]
|
23
53
|
vputs "Twitter Archive for " + "@#{screen_name}".light_blue + " (##{user_id}) found"
|
24
54
|
|
25
55
|
# find archive details
|
26
|
-
archive_details =
|
56
|
+
archive_details = GrailbirdUpdater::JsFile.read_required("#{@js_path}/payload_details.js")
|
27
57
|
vputs "Found archive payload containing #{archive_details['tweets']} tweets, created at #{archive_details['created_at']}"
|
28
58
|
|
29
59
|
# find latest month file (should be last when sorted alphanumerically)
|
30
60
|
twitter_js_files = Dir.glob("#{@js_path}/tweets/*.js")
|
31
|
-
latest_month =
|
61
|
+
latest_month = GrailbirdUpdater::JsFile.read_required(twitter_js_files.sort.last)
|
32
62
|
|
33
63
|
# find last_tweet_id in latest_month (should be first, because Twitter)
|
34
64
|
last_tweet = latest_month.first
|
35
65
|
last_tweet_id = last_tweet["id_str"]
|
36
|
-
last_tweet_date = Date.parse(last_tweet["created_at"])
|
37
66
|
|
38
67
|
vputs "Last tweet in archive is\n\t" + display_tweet(last_tweet)
|
39
68
|
|
@@ -53,38 +82,27 @@ class GrailbirdUpdater
|
|
53
82
|
end
|
54
83
|
|
55
84
|
# add tweets to json data file
|
56
|
-
tweet_index =
|
85
|
+
tweet_index = GrailbirdUpdater::JsFile.read_required("#{@js_path}/tweet_index.js")
|
57
86
|
collected_months.each do |year_month, month_tweets|
|
58
87
|
month_path = "#{@js_path}/tweets/#{year_month}.js"
|
59
88
|
|
60
|
-
existing_month_tweets = (File.exists?(month_path)) ?
|
89
|
+
existing_month_tweets = (File.exists?(month_path)) ? GrailbirdUpdater::JsFile.read(month_path) : []
|
61
90
|
all_month_tweets = month_tweets | existing_month_tweets
|
62
91
|
# sort new collection of tweets for this month by reverse date
|
63
92
|
all_month_tweets.sort_by {|t| -Date.parse(t['created_at']).strftime("%s").to_i }
|
64
93
|
|
65
94
|
# overwrite existing file (or create new if doesn't exist)
|
66
|
-
|
95
|
+
GrailbirdUpdater::JsFile.write_with_heading(all_month_tweets, "#{@js_path}/tweets/#{year_month}.js", "Grailbird.data.tweets_#{year_month}")
|
67
96
|
tweet_index = update_tweet_index(tweet_index, year_month, month_tweets.length)
|
68
97
|
end
|
69
98
|
|
70
99
|
# write new tweet_index.js once
|
71
|
-
|
100
|
+
GrailbirdUpdater::JsFile.write_with_heading(tweet_index, "#{@js_path}/tweet_index.js", "var tweet_index")
|
72
101
|
|
73
102
|
# add count to payload_details.js
|
74
103
|
archive_details['tweets'] += tweets.length
|
75
104
|
archive_details['updated_at'] = Time.now.getgm.strftime("%a %b %d %T %z %Y")
|
76
|
-
|
77
|
-
end
|
78
|
-
|
79
|
-
def read_required_twitter_js_file(file_path)
|
80
|
-
raise "#{file_path} must exist" unless File.exists?(file_path)
|
81
|
-
read_twitter_js_file(file_path)
|
82
|
-
end
|
83
|
-
|
84
|
-
def read_twitter_js_file(file_path)
|
85
|
-
file_contents = open(file_path).read.force_encoding("UTF-8").split("\n").join(" ")
|
86
|
-
json_file_contents = file_contents.gsub(/^((var)?\s*(.+?)\s+=\s+)/m, '')
|
87
|
-
return JSON.parse(json_file_contents)
|
105
|
+
GrailbirdUpdater::JsFile.write_with_heading(archive_details, "#{@js_path}/payload_details.js", "var payload_details")
|
88
106
|
end
|
89
107
|
|
90
108
|
def get_twitter_user_timeline_response(screen_name, user_id, last_tweet_id, count)
|
@@ -98,7 +116,7 @@ class GrailbirdUpdater
|
|
98
116
|
:include_entities => true}
|
99
117
|
twitter_uri.query = URI.encode_www_form(params)
|
100
118
|
|
101
|
-
vputs "\nMaking request to #{twitter_uri}"
|
119
|
+
vputs "\nMaking request to #{twitter_uri}\n"
|
102
120
|
response = Net::HTTP.get_response(twitter_uri)
|
103
121
|
|
104
122
|
if response.is_a?(Net::HTTPUnauthorized)
|
@@ -137,7 +155,7 @@ class GrailbirdUpdater
|
|
137
155
|
|
138
156
|
So you don't have to enter these again, we'll save a copy of your keys in a file called #{screen_name}_keys.yaml
|
139
157
|
|
140
|
-
#{"IMPORTANT".red.blink} Do NOT store the folder of your tweets on a public server.
|
158
|
+
#{"IMPORTANT".red.blink} Do NOT store the folder of your tweets on a public server.
|
141
159
|
If someone gets access to #{screen_name}_keys.yaml they can access your entire account!
|
142
160
|
EOS
|
143
161
|
|
@@ -187,7 +205,6 @@ class GrailbirdUpdater
|
|
187
205
|
return access_token
|
188
206
|
end
|
189
207
|
|
190
|
-
|
191
208
|
def prune_tweet(tweet)
|
192
209
|
KEEP_FIELDS.each do |parent_field, field_names|
|
193
210
|
tweet[parent_field].delete_if { |key, value| !field_names.include?(key) }
|
@@ -225,11 +242,6 @@ class GrailbirdUpdater
|
|
225
242
|
return tweet_index.unshift(new_month).sort_by {|m| [-m['year'], -m['month']]}
|
226
243
|
end
|
227
244
|
|
228
|
-
def write_twitter_js_to_path_with_heading(contents, path, heading)
|
229
|
-
json_pretty_contents = JSON.pretty_generate(contents)
|
230
|
-
File.open(path, 'w') {|f| f.write("#{heading} = #{json_pretty_contents}")}
|
231
|
-
end
|
232
|
-
|
233
245
|
private
|
234
246
|
|
235
247
|
# only puts if we're verbose
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grailbird_updater
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: oauth
|
@@ -106,3 +106,4 @@ summary: A way to keep an updated archive of Twitter tweets.
|
|
106
106
|
test_files:
|
107
107
|
- test/grailbird_updater_test.rb
|
108
108
|
- test/test_helper.rb
|
109
|
+
has_rdoc:
|