grailbird_updater 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +21 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +37 -0
- data/Rakefile +12 -0
- data/bin/grailbird_updater +32 -0
- data/grailbird_updater.gemspec +22 -0
- data/lib/grailbird_updater/version.rb +3 -0
- data/lib/grailbird_updater.rb +142 -0
- data/test/grailbird_updater_test.rb +12 -0
- data/test/test_helper.rb +0 -0
- metadata +92 -0
data/.gitignore
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
coverage
|
6
|
+
InstalledFiles
|
7
|
+
lib/bundler/man
|
8
|
+
pkg
|
9
|
+
rdoc
|
10
|
+
spec/reports
|
11
|
+
test/tmp
|
12
|
+
test/version_tmp
|
13
|
+
tmp
|
14
|
+
|
15
|
+
# YARD artifacts
|
16
|
+
.yardoc
|
17
|
+
_yardoc
|
18
|
+
doc/
|
19
|
+
|
20
|
+
# gems need this ignored
|
21
|
+
Gemfile.lock
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Dannel Jurado
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# GrailbirdUpdater
|
2
|
+
|
3
|
+
For the most of the people who know me online, I've been dying to get a copy of
|
4
|
+
my Twitter archive from Twitter for forever. I was finally given one and
|
5
|
+
decided to write a quick script to keep my own archive up-to-date.
|
6
|
+
|
7
|
+
Turns out the contents in the archive are partial/trimmed API responses from
|
8
|
+
the Twitter API, so it is actually possible to drop a whole API response in
|
9
|
+
there, do some sorting and update the archive.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
gem 'grailbird_updater'
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install grailbird_updater
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
```
|
28
|
+
grailbird_updater /path/to/twitter/archive
|
29
|
+
```
|
30
|
+
|
31
|
+
## Contributing
|
32
|
+
|
33
|
+
1. Fork it
|
34
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
35
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
36
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
37
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'open-uri'
|
4
|
+
require 'json'
|
5
|
+
require 'trollop'
|
6
|
+
require 'pp'
|
7
|
+
require 'colorize' # if verbose
|
8
|
+
|
9
|
+
require 'grailbird_updater'
|
10
|
+
|
11
|
+
opts = Trollop::options do
|
12
|
+
version "updater #{GrailbirdUpdater::VERSION}"
|
13
|
+
banner <<-EOS
|
14
|
+
Update your Twitter archive (best if used with a cron)
|
15
|
+
|
16
|
+
Usage: updater [options] [path to archive]
|
17
|
+
|
18
|
+
EOS
|
19
|
+
opt :verbose, "Verbose mode"
|
20
|
+
opt :prune, "Prune all but necessary user data from tweets", :default => true
|
21
|
+
opt :limit, "How many tweets to look back at (max: 3200)", :default => 1000
|
22
|
+
opt :directory, "Twitter archive directory", :type => :string
|
23
|
+
end
|
24
|
+
|
25
|
+
dir = nil
|
26
|
+
dir ||= opts[:directory]
|
27
|
+
dir ||= ARGV.first
|
28
|
+
dir ||= "."
|
29
|
+
raise ArgumentError, "Must specify a directory" unless File.directory?(dir)
|
30
|
+
raise ArgumentError, "Cannot look back further than 3200 tweets" if opts[:limit] > 3200
|
31
|
+
|
32
|
+
GrailbirdUpdater.new(dir, opts[:limit], opts[:verbose], opts[:prune]).update_tweets
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'grailbird_updater/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "grailbird_updater"
|
8
|
+
gem.version = GrailbirdUpdater::VERSION
|
9
|
+
gem.authors = ["Dannel Jurado"]
|
10
|
+
gem.email = ["dannelj@gmail.com"]
|
11
|
+
gem.description = %q{Twitter now allows you to download your tweets. This tool lets you keep that archive up to date.}
|
12
|
+
gem.summary = %q{A way to keep an updated archive of Twitter tweets.}
|
13
|
+
gem.homepage = "https://github.com/DeMarko/grailbird_updater"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_dependency "trollop"
|
21
|
+
gem.add_dependency "colorize"
|
22
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
require "grailbird_updater/version"
|
2
|
+
|
3
|
+
class GrailbirdUpdater
|
4
|
+
|
5
|
+
KEEP_FIELDS = {'user' => ['name', 'screen_name', 'protected', 'id_str', 'profile_image_url_https', 'id', 'verified']}
|
6
|
+
|
7
|
+
def initialize(dir, count, verbose, prune)
|
8
|
+
data_path = dir + "/data"
|
9
|
+
|
10
|
+
@js_path = data_path + "/js"
|
11
|
+
@csv_path = data_path + "/csv"
|
12
|
+
|
13
|
+
@count = count
|
14
|
+
@verbose = verbose
|
15
|
+
@prune = prune
|
16
|
+
end
|
17
|
+
|
18
|
+
def update_tweets
|
19
|
+
# find user_id in data/js/user_details.js
|
20
|
+
user_details = read_required_twitter_js_file("#{@js_path}/user_details.js")
|
21
|
+
user_id = user_details["id"]
|
22
|
+
screen_name = user_details["screen_name"]
|
23
|
+
vputs "Twitter Archive for " + "@#{screen_name}".light_blue + " (##{user_id}) found"
|
24
|
+
|
25
|
+
# find archive details
|
26
|
+
archive_details = read_required_twitter_js_file("#{@js_path}/payload_details.js")
|
27
|
+
vputs "Found archive payload containing #{archive_details['tweets']} tweets, created at #{archive_details['created_at']}"
|
28
|
+
|
29
|
+
# find latest month file (should be last when sorted alphanumerically)
|
30
|
+
twitter_js_files = Dir.glob("#{@js_path}/tweets/*.js")
|
31
|
+
latest_month = read_required_twitter_js_file(twitter_js_files.sort.last)
|
32
|
+
|
33
|
+
# find last_tweet_id in latest_month (should be first, because Twitter)
|
34
|
+
last_tweet = latest_month.first
|
35
|
+
last_tweet_id = last_tweet["id_str"]
|
36
|
+
last_tweet_date = Date.parse(last_tweet["created_at"])
|
37
|
+
|
38
|
+
vputs "Last tweet in archive is\n\t" + display_tweet(last_tweet)
|
39
|
+
|
40
|
+
# get response from API
|
41
|
+
twitter_url = "http://api.twitter.com/1/statuses/user_timeline.json?count=#{@count}&user_id=#{user_id}&since_id=#{last_tweet_id}&include_rts=true&include_entities=true"
|
42
|
+
vputs "Making request to #{twitter_url}"
|
43
|
+
tweets = JSON.parse(open(twitter_url).read)
|
44
|
+
|
45
|
+
vputs "There have been #{tweets.length} tweets since the archive" + (archive_details.has_key?('updated_at') ? " was last updated on #{archive_details['updated_at']}" : " was created")
|
46
|
+
|
47
|
+
# collect tweets by year_month
|
48
|
+
collected_months = Hash.new
|
49
|
+
tweets.each do |tweet|
|
50
|
+
tweet = prune_tweet(tweet) if @prune
|
51
|
+
vputs "\t" + display_tweet(tweet)
|
52
|
+
tweet_date = Date.parse(tweet["created_at"])
|
53
|
+
hash_index = tweet_date.strftime('%Y_%m')
|
54
|
+
collected_months[hash_index] = Array(collected_months[hash_index])
|
55
|
+
collected_months[hash_index] << tweet
|
56
|
+
end
|
57
|
+
|
58
|
+
# add tweets to json data file
|
59
|
+
tweet_index = read_required_twitter_js_file("#{@js_path}/tweet_index.js")
|
60
|
+
collected_months.each do |year_month, month_tweets|
|
61
|
+
month_path = "#{@js_path}/tweets/#{year_month}.js"
|
62
|
+
|
63
|
+
existing_month_tweets = (File.exists?(month_path)) ? read_twitter_js_file(month_path) : []
|
64
|
+
all_month_tweets = month_tweets | existing_month_tweets
|
65
|
+
# sort new collection of tweets for this month by reverse date
|
66
|
+
all_month_tweets.sort_by {|t| -Date.parse(t['created_at']).strftime("%s").to_i }
|
67
|
+
|
68
|
+
# overwrite existing file (or create new if doesn't exist)
|
69
|
+
write_twitter_js_to_path_with_heading(all_month_tweets, "#{@js_path}/tweets/#{year_month}.js", "Grailbird.data.tweets_#{year_month}")
|
70
|
+
tweet_index = update_tweet_index(tweet_index, year_month, month_tweets.length)
|
71
|
+
end
|
72
|
+
|
73
|
+
# write new tweet_index.js once
|
74
|
+
write_twitter_js_to_path_with_heading(tweet_index, "#{@js_path}/tweet_index.js", "var tweet_index")
|
75
|
+
|
76
|
+
# add count to payload_details.js
|
77
|
+
archive_details['tweets'] += tweets.length
|
78
|
+
archive_details['updated_at'] = Time.now.getgm.strftime("%a %b %d %T %z %Y")
|
79
|
+
write_twitter_js_to_path_with_heading(archive_details, "#{@js_path}/payload_details.js", "var payload_details")
|
80
|
+
end
|
81
|
+
|
82
|
+
def read_required_twitter_js_file(file_path)
|
83
|
+
raise "#{file_path} must exist" unless File.exists?(file_path)
|
84
|
+
read_twitter_js_file(file_path)
|
85
|
+
end
|
86
|
+
|
87
|
+
def read_twitter_js_file(file_path)
|
88
|
+
file_contents = open(file_path).read.force_encoding("UTF-8").split("\n").join(" ")
|
89
|
+
json_file_contents = file_contents.gsub(/^((var)?\s*(.+?)\s+=\s+)/m, '')
|
90
|
+
json = JSON.parse(json_file_contents)
|
91
|
+
end
|
92
|
+
|
93
|
+
def prune_tweet(tweet)
|
94
|
+
KEEP_FIELDS.each do |parent_field, field_names|
|
95
|
+
tweet[parent_field].delete_if { |key, value| !field_names.include?(key) }
|
96
|
+
end
|
97
|
+
return tweet
|
98
|
+
end
|
99
|
+
|
100
|
+
def display_tweet(tweet)
|
101
|
+
tweet_text = tweet['text']
|
102
|
+
if tweet['entities'] && tweet['entities']['urls']
|
103
|
+
tweet['entities']['urls'].each { |url_entity|
|
104
|
+
tweet_text = tweet['text'].gsub("#{url_entity['url']}", "#{url_entity['expanded_url']}")
|
105
|
+
}
|
106
|
+
end
|
107
|
+
tweet = "@#{tweet['user']['screen_name']}".blue + ": \"#{tweet_text}\"\n"
|
108
|
+
end
|
109
|
+
|
110
|
+
def update_tweet_index(tweet_index, year_month, count)
|
111
|
+
year, month = year_month.split('_')
|
112
|
+
year = year.to_i
|
113
|
+
month = month.to_i
|
114
|
+
tweet_index.each do |index_month|
|
115
|
+
if index_month['year'] == year && index_month['month'] == month
|
116
|
+
index_month['tweet_count'] += count
|
117
|
+
return tweet_index
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
new_month = {"file_name" => "data/js/tweets/#{year_month}.js",
|
122
|
+
"year" => year,
|
123
|
+
"var_name" => "tweets_#{year_month}",
|
124
|
+
"tweet_count" => count,
|
125
|
+
"month" => month
|
126
|
+
}
|
127
|
+
new_index = tweet_index.unshift(new_month).sort_by {|m| [-m['year'], -m['month']]}
|
128
|
+
end
|
129
|
+
|
130
|
+
def write_twitter_js_to_path_with_heading(contents, path, heading)
|
131
|
+
json_pretty_contents = JSON.pretty_generate(contents)
|
132
|
+
File.open(path, 'w') {|f| f.write("#{heading} = #{json_pretty_contents}")}
|
133
|
+
end
|
134
|
+
|
135
|
+
private
|
136
|
+
|
137
|
+
# only puts if we're verbose
|
138
|
+
def vputs(str)
|
139
|
+
puts str if @verbose
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
|
3
|
+
require 'grailbird_updater'
|
4
|
+
|
5
|
+
class GrailbirdUpdaterTest < Minitest::Unit::TestCase
|
6
|
+
|
7
|
+
# this test is stupid, just there to demonstrate infrastructure
|
8
|
+
def test_creation
|
9
|
+
assert_kind_of GrailbirdUpdater, GrailbirdUpdater.new(".", 10, true)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
data/test/test_helper.rb
ADDED
File without changes
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: grailbird_updater
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Dannel Jurado
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-12-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: trollop
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: colorize
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
description: Twitter now allows you to download your tweets. This tool lets you keep
|
47
|
+
that archive up to date.
|
48
|
+
email:
|
49
|
+
- dannelj@gmail.com
|
50
|
+
executables:
|
51
|
+
- grailbird_updater
|
52
|
+
extensions: []
|
53
|
+
extra_rdoc_files: []
|
54
|
+
files:
|
55
|
+
- .gitignore
|
56
|
+
- Gemfile
|
57
|
+
- LICENSE.txt
|
58
|
+
- README.md
|
59
|
+
- Rakefile
|
60
|
+
- bin/grailbird_updater
|
61
|
+
- grailbird_updater.gemspec
|
62
|
+
- lib/grailbird_updater.rb
|
63
|
+
- lib/grailbird_updater/version.rb
|
64
|
+
- test/grailbird_updater_test.rb
|
65
|
+
- test/test_helper.rb
|
66
|
+
homepage: https://github.com/DeMarko/grailbird_updater
|
67
|
+
licenses: []
|
68
|
+
post_install_message:
|
69
|
+
rdoc_options: []
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
none: false
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
requirements: []
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 1.8.23
|
87
|
+
signing_key:
|
88
|
+
specification_version: 3
|
89
|
+
summary: A way to keep an updated archive of Twitter tweets.
|
90
|
+
test_files:
|
91
|
+
- test/grailbird_updater_test.rb
|
92
|
+
- test/test_helper.rb
|