grailbird_updater 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
19
+
20
+ # gems need this ignored
21
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in grailbird.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Dannel Jurado
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # GrailbirdUpdater
2
+
3
+ For the most of the people who know me online, I've been dying to get a copy of
4
+ my Twitter archive from Twitter for forever. I was finally given one and
5
+ decided to write a quick script to keep my own archive up-to-date.
6
+
7
+ Turns out the contents in the archive are partial/trimmed API responses from
8
+ the Twitter API, so it is actually possible to drop a whole API response in
9
+ there, do some sorting and update the archive.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ gem 'grailbird_updater'
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ Or install it yourself as:
22
+
23
+ $ gem install grailbird_updater
24
+
25
+ ## Usage
26
+
27
+ ```
28
+ grailbird_updater /path/to/twitter/archive
29
+ ```
30
+
31
+ ## Contributing
32
+
33
+ 1. Fork it
34
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
35
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
36
+ 4. Push to the branch (`git push origin my-new-feature`)
37
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/*_test.rb']
8
+ t.ruby_opts = ['-r./test/test_helper.rb']
9
+ t.verbose = true
10
+ end
11
+
12
+ task :default => :test
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'open-uri'
4
+ require 'json'
5
+ require 'trollop'
6
+ require 'pp'
7
+ require 'colorize' # if verbose
8
+
9
+ require 'grailbird_updater'
10
+
11
+ opts = Trollop::options do
12
+ version "updater #{GrailbirdUpdater::VERSION}"
13
+ banner <<-EOS
14
+ Update your Twitter archive (best if used with a cron)
15
+
16
+ Usage: updater [options] [path to archive]
17
+
18
+ EOS
19
+ opt :verbose, "Verbose mode"
20
+ opt :prune, "Prune all but necessary user data from tweets", :default => true
21
+ opt :limit, "How many tweets to look back at (max: 3200)", :default => 1000
22
+ opt :directory, "Twitter archive directory", :type => :string
23
+ end
24
+
25
+ dir = nil
26
+ dir ||= opts[:directory]
27
+ dir ||= ARGV.first
28
+ dir ||= "."
29
+ raise ArgumentError, "Must specify a directory" unless File.directory?(dir)
30
+ raise ArgumentError, "Cannot look back further than 3200 tweets" if opts[:limit] > 3200
31
+
32
+ GrailbirdUpdater.new(dir, opts[:limit], opts[:verbose], opts[:prune]).update_tweets
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'grailbird_updater/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "grailbird_updater"
8
+ gem.version = GrailbirdUpdater::VERSION
9
+ gem.authors = ["Dannel Jurado"]
10
+ gem.email = ["dannelj@gmail.com"]
11
+ gem.description = %q{Twitter now allows you to download your tweets. This tool lets you keep that archive up to date.}
12
+ gem.summary = %q{A way to keep an updated archive of Twitter tweets.}
13
+ gem.homepage = "https://github.com/DeMarko/grailbird_updater"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency "trollop"
21
+ gem.add_dependency "colorize"
22
+ end
@@ -0,0 +1,3 @@
1
+ class GrailbirdUpdater
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,142 @@
1
+ require "grailbird_updater/version"
2
+
3
+ class GrailbirdUpdater
4
+
5
+ KEEP_FIELDS = {'user' => ['name', 'screen_name', 'protected', 'id_str', 'profile_image_url_https', 'id', 'verified']}
6
+
7
+ def initialize(dir, count, verbose, prune)
8
+ data_path = dir + "/data"
9
+
10
+ @js_path = data_path + "/js"
11
+ @csv_path = data_path + "/csv"
12
+
13
+ @count = count
14
+ @verbose = verbose
15
+ @prune = prune
16
+ end
17
+
18
+ def update_tweets
19
+ # find user_id in data/js/user_details.js
20
+ user_details = read_required_twitter_js_file("#{@js_path}/user_details.js")
21
+ user_id = user_details["id"]
22
+ screen_name = user_details["screen_name"]
23
+ vputs "Twitter Archive for " + "@#{screen_name}".light_blue + " (##{user_id}) found"
24
+
25
+ # find archive details
26
+ archive_details = read_required_twitter_js_file("#{@js_path}/payload_details.js")
27
+ vputs "Found archive payload containing #{archive_details['tweets']} tweets, created at #{archive_details['created_at']}"
28
+
29
+ # find latest month file (should be last when sorted alphanumerically)
30
+ twitter_js_files = Dir.glob("#{@js_path}/tweets/*.js")
31
+ latest_month = read_required_twitter_js_file(twitter_js_files.sort.last)
32
+
33
+ # find last_tweet_id in latest_month (should be first, because Twitter)
34
+ last_tweet = latest_month.first
35
+ last_tweet_id = last_tweet["id_str"]
36
+ last_tweet_date = Date.parse(last_tweet["created_at"])
37
+
38
+ vputs "Last tweet in archive is\n\t" + display_tweet(last_tweet)
39
+
40
+ # get response from API
41
+ twitter_url = "http://api.twitter.com/1/statuses/user_timeline.json?count=#{@count}&user_id=#{user_id}&since_id=#{last_tweet_id}&include_rts=true&include_entities=true"
42
+ vputs "Making request to #{twitter_url}"
43
+ tweets = JSON.parse(open(twitter_url).read)
44
+
45
+ vputs "There have been #{tweets.length} tweets since the archive" + (archive_details.has_key?('updated_at') ? " was last updated on #{archive_details['updated_at']}" : " was created")
46
+
47
+ # collect tweets by year_month
48
+ collected_months = Hash.new
49
+ tweets.each do |tweet|
50
+ tweet = prune_tweet(tweet) if @prune
51
+ vputs "\t" + display_tweet(tweet)
52
+ tweet_date = Date.parse(tweet["created_at"])
53
+ hash_index = tweet_date.strftime('%Y_%m')
54
+ collected_months[hash_index] = Array(collected_months[hash_index])
55
+ collected_months[hash_index] << tweet
56
+ end
57
+
58
+ # add tweets to json data file
59
+ tweet_index = read_required_twitter_js_file("#{@js_path}/tweet_index.js")
60
+ collected_months.each do |year_month, month_tweets|
61
+ month_path = "#{@js_path}/tweets/#{year_month}.js"
62
+
63
+ existing_month_tweets = (File.exists?(month_path)) ? read_twitter_js_file(month_path) : []
64
+ all_month_tweets = month_tweets | existing_month_tweets
65
+ # sort new collection of tweets for this month by reverse date
66
+ all_month_tweets.sort_by {|t| -Date.parse(t['created_at']).strftime("%s").to_i }
67
+
68
+ # overwrite existing file (or create new if doesn't exist)
69
+ write_twitter_js_to_path_with_heading(all_month_tweets, "#{@js_path}/tweets/#{year_month}.js", "Grailbird.data.tweets_#{year_month}")
70
+ tweet_index = update_tweet_index(tweet_index, year_month, month_tweets.length)
71
+ end
72
+
73
+ # write new tweet_index.js once
74
+ write_twitter_js_to_path_with_heading(tweet_index, "#{@js_path}/tweet_index.js", "var tweet_index")
75
+
76
+ # add count to payload_details.js
77
+ archive_details['tweets'] += tweets.length
78
+ archive_details['updated_at'] = Time.now.getgm.strftime("%a %b %d %T %z %Y")
79
+ write_twitter_js_to_path_with_heading(archive_details, "#{@js_path}/payload_details.js", "var payload_details")
80
+ end
81
+
82
+ def read_required_twitter_js_file(file_path)
83
+ raise "#{file_path} must exist" unless File.exists?(file_path)
84
+ read_twitter_js_file(file_path)
85
+ end
86
+
87
+ def read_twitter_js_file(file_path)
88
+ file_contents = open(file_path).read.force_encoding("UTF-8").split("\n").join(" ")
89
+ json_file_contents = file_contents.gsub(/^((var)?\s*(.+?)\s+=\s+)/m, '')
90
+ json = JSON.parse(json_file_contents)
91
+ end
92
+
93
+ def prune_tweet(tweet)
94
+ KEEP_FIELDS.each do |parent_field, field_names|
95
+ tweet[parent_field].delete_if { |key, value| !field_names.include?(key) }
96
+ end
97
+ return tweet
98
+ end
99
+
100
+ def display_tweet(tweet)
101
+ tweet_text = tweet['text']
102
+ if tweet['entities'] && tweet['entities']['urls']
103
+ tweet['entities']['urls'].each { |url_entity|
104
+ tweet_text = tweet['text'].gsub("#{url_entity['url']}", "#{url_entity['expanded_url']}")
105
+ }
106
+ end
107
+ tweet = "@#{tweet['user']['screen_name']}".blue + ": \"#{tweet_text}\"\n"
108
+ end
109
+
110
+ def update_tweet_index(tweet_index, year_month, count)
111
+ year, month = year_month.split('_')
112
+ year = year.to_i
113
+ month = month.to_i
114
+ tweet_index.each do |index_month|
115
+ if index_month['year'] == year && index_month['month'] == month
116
+ index_month['tweet_count'] += count
117
+ return tweet_index
118
+ end
119
+ end
120
+
121
+ new_month = {"file_name" => "data/js/tweets/#{year_month}.js",
122
+ "year" => year,
123
+ "var_name" => "tweets_#{year_month}",
124
+ "tweet_count" => count,
125
+ "month" => month
126
+ }
127
+ new_index = tweet_index.unshift(new_month).sort_by {|m| [-m['year'], -m['month']]}
128
+ end
129
+
130
+ def write_twitter_js_to_path_with_heading(contents, path, heading)
131
+ json_pretty_contents = JSON.pretty_generate(contents)
132
+ File.open(path, 'w') {|f| f.write("#{heading} = #{json_pretty_contents}")}
133
+ end
134
+
135
+ private
136
+
137
+ # only puts if we're verbose
138
+ def vputs(str)
139
+ puts str if @verbose
140
+ end
141
+ end
142
+
@@ -0,0 +1,12 @@
1
+ require 'minitest/autorun'
2
+
3
+ require 'grailbird_updater'
4
+
5
+ class GrailbirdUpdaterTest < Minitest::Unit::TestCase
6
+
7
+ # this test is stupid, just there to demonstrate infrastructure
8
+ def test_creation
9
+ assert_kind_of GrailbirdUpdater, GrailbirdUpdater.new(".", 10, true)
10
+ end
11
+ end
12
+
File without changes
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: grailbird_updater
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dannel Jurado
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: trollop
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: colorize
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: Twitter now allows you to download your tweets. This tool lets you keep
47
+ that archive up to date.
48
+ email:
49
+ - dannelj@gmail.com
50
+ executables:
51
+ - grailbird_updater
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - .gitignore
56
+ - Gemfile
57
+ - LICENSE.txt
58
+ - README.md
59
+ - Rakefile
60
+ - bin/grailbird_updater
61
+ - grailbird_updater.gemspec
62
+ - lib/grailbird_updater.rb
63
+ - lib/grailbird_updater/version.rb
64
+ - test/grailbird_updater_test.rb
65
+ - test/test_helper.rb
66
+ homepage: https://github.com/DeMarko/grailbird_updater
67
+ licenses: []
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubyforge_project:
86
+ rubygems_version: 1.8.23
87
+ signing_key:
88
+ specification_version: 3
89
+ summary: A way to keep an updated archive of Twitter tweets.
90
+ test_files:
91
+ - test/grailbird_updater_test.rb
92
+ - test/test_helper.rb