grailbird_updater 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/grailbird_updater.rb +40 -28
- data/lib/grailbird_updater/version.rb +1 -1
- metadata +3 -2
data/lib/grailbird_updater.rb
CHANGED
@@ -4,6 +4,36 @@ class GrailbirdUpdater
|
|
4
4
|
|
5
5
|
KEEP_FIELDS = {'user' => ['name', 'screen_name', 'protected', 'id_str', 'profile_image_url_https', 'id', 'verified']}
|
6
6
|
|
7
|
+
class JsFile
|
8
|
+
# Read UTF-8 file and return hash of contents (files being read contain JS arrays)
|
9
|
+
#
|
10
|
+
# @param file_path [String] path to file being read
|
11
|
+
def self.read(file_path)
|
12
|
+
file_contents = open(file_path).read.force_encoding("UTF-8").split("\n").join(" ")
|
13
|
+
json_file_contents = file_contents.gsub(/^((var)?\s*(.+?)\s+=\s+)/m, '')
|
14
|
+
return JSON.parse(json_file_contents)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Checks if file being read exists, stops everything if it doesn't
|
18
|
+
#
|
19
|
+
# @param file_path [String] path to file being read
|
20
|
+
# @raise [IOError] if the required file isn't found
|
21
|
+
def self.read_required(file_path)
|
22
|
+
raise IOError "#{file_path} must exist" unless File.exists?(file_path)
|
23
|
+
read(file_path)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Write files Twitter's Archive app likes with specific headings
|
27
|
+
#
|
28
|
+
# @param contents [Object] object whose contents are to be written to the file
|
29
|
+
# @param file_path [String] path to file being written
|
30
|
+
# @param heading [String] heading for file, usually "var Something"
|
31
|
+
def self.write_with_heading(contents, file_path, heading)
|
32
|
+
json_pretty_contents = JSON.pretty_generate(contents)
|
33
|
+
File.open(file_path, 'w') {|f| f.write("#{heading} = #{json_pretty_contents}")}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
7
37
|
def initialize(dir, count, verbose, prune)
|
8
38
|
@base_dir = dir
|
9
39
|
data_path = dir + "/data"
|
@@ -17,23 +47,22 @@ class GrailbirdUpdater
|
|
17
47
|
|
18
48
|
def update_tweets
|
19
49
|
# find user_id in data/js/user_details.js
|
20
|
-
user_details =
|
50
|
+
user_details = GrailbirdUpdater::JsFile.read_required("#{@js_path}/user_details.js")
|
21
51
|
user_id = user_details["id"]
|
22
52
|
screen_name = user_details["screen_name"]
|
23
53
|
vputs "Twitter Archive for " + "@#{screen_name}".light_blue + " (##{user_id}) found"
|
24
54
|
|
25
55
|
# find archive details
|
26
|
-
archive_details =
|
56
|
+
archive_details = GrailbirdUpdater::JsFile.read_required("#{@js_path}/payload_details.js")
|
27
57
|
vputs "Found archive payload containing #{archive_details['tweets']} tweets, created at #{archive_details['created_at']}"
|
28
58
|
|
29
59
|
# find latest month file (should be last when sorted alphanumerically)
|
30
60
|
twitter_js_files = Dir.glob("#{@js_path}/tweets/*.js")
|
31
|
-
latest_month =
|
61
|
+
latest_month = GrailbirdUpdater::JsFile.read_required(twitter_js_files.sort.last)
|
32
62
|
|
33
63
|
# find last_tweet_id in latest_month (should be first, because Twitter)
|
34
64
|
last_tweet = latest_month.first
|
35
65
|
last_tweet_id = last_tweet["id_str"]
|
36
|
-
last_tweet_date = Date.parse(last_tweet["created_at"])
|
37
66
|
|
38
67
|
vputs "Last tweet in archive is\n\t" + display_tweet(last_tweet)
|
39
68
|
|
@@ -53,38 +82,27 @@ class GrailbirdUpdater
|
|
53
82
|
end
|
54
83
|
|
55
84
|
# add tweets to json data file
|
56
|
-
tweet_index =
|
85
|
+
tweet_index = GrailbirdUpdater::JsFile.read_required("#{@js_path}/tweet_index.js")
|
57
86
|
collected_months.each do |year_month, month_tweets|
|
58
87
|
month_path = "#{@js_path}/tweets/#{year_month}.js"
|
59
88
|
|
60
|
-
existing_month_tweets = (File.exists?(month_path)) ?
|
89
|
+
existing_month_tweets = (File.exists?(month_path)) ? GrailbirdUpdater::JsFile.read(month_path) : []
|
61
90
|
all_month_tweets = month_tweets | existing_month_tweets
|
62
91
|
# sort new collection of tweets for this month by reverse date
|
63
92
|
all_month_tweets.sort_by {|t| -Date.parse(t['created_at']).strftime("%s").to_i }
|
64
93
|
|
65
94
|
# overwrite existing file (or create new if doesn't exist)
|
66
|
-
|
95
|
+
GrailbirdUpdater::JsFile.write_with_heading(all_month_tweets, "#{@js_path}/tweets/#{year_month}.js", "Grailbird.data.tweets_#{year_month}")
|
67
96
|
tweet_index = update_tweet_index(tweet_index, year_month, month_tweets.length)
|
68
97
|
end
|
69
98
|
|
70
99
|
# write new tweet_index.js once
|
71
|
-
|
100
|
+
GrailbirdUpdater::JsFile.write_with_heading(tweet_index, "#{@js_path}/tweet_index.js", "var tweet_index")
|
72
101
|
|
73
102
|
# add count to payload_details.js
|
74
103
|
archive_details['tweets'] += tweets.length
|
75
104
|
archive_details['updated_at'] = Time.now.getgm.strftime("%a %b %d %T %z %Y")
|
76
|
-
|
77
|
-
end
|
78
|
-
|
79
|
-
def read_required_twitter_js_file(file_path)
|
80
|
-
raise "#{file_path} must exist" unless File.exists?(file_path)
|
81
|
-
read_twitter_js_file(file_path)
|
82
|
-
end
|
83
|
-
|
84
|
-
def read_twitter_js_file(file_path)
|
85
|
-
file_contents = open(file_path).read.force_encoding("UTF-8").split("\n").join(" ")
|
86
|
-
json_file_contents = file_contents.gsub(/^((var)?\s*(.+?)\s+=\s+)/m, '')
|
87
|
-
return JSON.parse(json_file_contents)
|
105
|
+
GrailbirdUpdater::JsFile.write_with_heading(archive_details, "#{@js_path}/payload_details.js", "var payload_details")
|
88
106
|
end
|
89
107
|
|
90
108
|
def get_twitter_user_timeline_response(screen_name, user_id, last_tweet_id, count)
|
@@ -98,7 +116,7 @@ class GrailbirdUpdater
|
|
98
116
|
:include_entities => true}
|
99
117
|
twitter_uri.query = URI.encode_www_form(params)
|
100
118
|
|
101
|
-
vputs "\nMaking request to #{twitter_uri}"
|
119
|
+
vputs "\nMaking request to #{twitter_uri}\n"
|
102
120
|
response = Net::HTTP.get_response(twitter_uri)
|
103
121
|
|
104
122
|
if response.is_a?(Net::HTTPUnauthorized)
|
@@ -137,7 +155,7 @@ class GrailbirdUpdater
|
|
137
155
|
|
138
156
|
So you don't have to enter these again, we'll save a copy of your keys in a file called #{screen_name}_keys.yaml
|
139
157
|
|
140
|
-
#{"IMPORTANT".red.blink} Do NOT store the folder of your tweets on a public server.
|
158
|
+
#{"IMPORTANT".red.blink} Do NOT store the folder of your tweets on a public server.
|
141
159
|
If someone gets access to #{screen_name}_keys.yaml they can access your entire account!
|
142
160
|
EOS
|
143
161
|
|
@@ -187,7 +205,6 @@ class GrailbirdUpdater
|
|
187
205
|
return access_token
|
188
206
|
end
|
189
207
|
|
190
|
-
|
191
208
|
def prune_tweet(tweet)
|
192
209
|
KEEP_FIELDS.each do |parent_field, field_names|
|
193
210
|
tweet[parent_field].delete_if { |key, value| !field_names.include?(key) }
|
@@ -225,11 +242,6 @@ class GrailbirdUpdater
|
|
225
242
|
return tweet_index.unshift(new_month).sort_by {|m| [-m['year'], -m['month']]}
|
226
243
|
end
|
227
244
|
|
228
|
-
def write_twitter_js_to_path_with_heading(contents, path, heading)
|
229
|
-
json_pretty_contents = JSON.pretty_generate(contents)
|
230
|
-
File.open(path, 'w') {|f| f.write("#{heading} = #{json_pretty_contents}")}
|
231
|
-
end
|
232
|
-
|
233
245
|
private
|
234
246
|
|
235
247
|
# only puts if we're verbose
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grailbird_updater
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: oauth
|
@@ -106,3 +106,4 @@ summary: A way to keep an updated archive of Twitter tweets.
|
|
106
106
|
test_files:
|
107
107
|
- test/grailbird_updater_test.rb
|
108
108
|
- test/test_helper.rb
|
109
|
+
has_rdoc:
|