grailbird_updater 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
19
+
20
+ # gems need this ignored
21
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in grailbird.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Dannel Jurado
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # GrailbirdUpdater
2
+
3
+ For the most of the people who know me online, I've been dying to get a copy of
4
+ my Twitter archive from Twitter for forever. I was finally given one and
5
+ decided to write a quick script to keep my own archive up-to-date.
6
+
7
+ Turns out the contents in the archive are partial/trimmed API responses from
8
+ the Twitter API, so it is actually possible to drop a whole API response in
9
+ there, do some sorting and update the archive.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ gem 'grailbird_updater'
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ Or install it yourself as:
22
+
23
+ $ gem install grailbird_updater
24
+
25
+ ## Usage
26
+
27
+ ```
28
+ grailbird_updater /path/to/twitter/archive
29
+ ```
30
+
31
+ ## Contributing
32
+
33
+ 1. Fork it
34
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
35
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
36
+ 4. Push to the branch (`git push origin my-new-feature`)
37
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/*_test.rb']
8
+ t.ruby_opts = ['-r./test/test_helper.rb']
9
+ t.verbose = true
10
+ end
11
+
12
+ task :default => :test
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'open-uri'
4
+ require 'json'
5
+ require 'trollop'
6
+ require 'pp'
7
+ require 'colorize' # if verbose
8
+
9
+ require 'grailbird_updater'
10
+
11
+ opts = Trollop::options do
12
+ version "updater #{GrailbirdUpdater::VERSION}"
13
+ banner <<-EOS
14
+ Update your Twitter archive (best if used with a cron)
15
+
16
+ Usage: updater [options] [path to archive]
17
+
18
+ EOS
19
+ opt :verbose, "Verbose mode"
20
+ opt :prune, "Prune all but necessary user data from tweets", :default => true
21
+ opt :limit, "How many tweets to look back at (max: 3200)", :default => 1000
22
+ opt :directory, "Twitter archive directory", :type => :string
23
+ end
24
+
25
+ dir = nil
26
+ dir ||= opts[:directory]
27
+ dir ||= ARGV.first
28
+ dir ||= "."
29
+ raise ArgumentError, "Must specify a directory" unless File.directory?(dir)
30
+ raise ArgumentError, "Cannot look back further than 3200 tweets" if opts[:limit] > 3200
31
+
32
+ GrailbirdUpdater.new(dir, opts[:limit], opts[:verbose], opts[:prune]).update_tweets
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'grailbird_updater/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "grailbird_updater"
8
+ gem.version = GrailbirdUpdater::VERSION
9
+ gem.authors = ["Dannel Jurado"]
10
+ gem.email = ["dannelj@gmail.com"]
11
+ gem.description = %q{Twitter now allows you to download your tweets. This tool lets you keep that archive up to date.}
12
+ gem.summary = %q{A way to keep an updated archive of Twitter tweets.}
13
+ gem.homepage = "https://github.com/DeMarko/grailbird_updater"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency "trollop"
21
+ gem.add_dependency "colorize"
22
+ end
@@ -0,0 +1,3 @@
1
+ class GrailbirdUpdater
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,142 @@
1
+ require "grailbird_updater/version"
2
+
3
+ class GrailbirdUpdater
4
+
5
+ KEEP_FIELDS = {'user' => ['name', 'screen_name', 'protected', 'id_str', 'profile_image_url_https', 'id', 'verified']}
6
+
7
+ def initialize(dir, count, verbose, prune)
8
+ data_path = dir + "/data"
9
+
10
+ @js_path = data_path + "/js"
11
+ @csv_path = data_path + "/csv"
12
+
13
+ @count = count
14
+ @verbose = verbose
15
+ @prune = prune
16
+ end
17
+
18
+ def update_tweets
19
+ # find user_id in data/js/user_details.js
20
+ user_details = read_required_twitter_js_file("#{@js_path}/user_details.js")
21
+ user_id = user_details["id"]
22
+ screen_name = user_details["screen_name"]
23
+ vputs "Twitter Archive for " + "@#{screen_name}".light_blue + " (##{user_id}) found"
24
+
25
+ # find archive details
26
+ archive_details = read_required_twitter_js_file("#{@js_path}/payload_details.js")
27
+ vputs "Found archive payload containing #{archive_details['tweets']} tweets, created at #{archive_details['created_at']}"
28
+
29
+ # find latest month file (should be last when sorted alphanumerically)
30
+ twitter_js_files = Dir.glob("#{@js_path}/tweets/*.js")
31
+ latest_month = read_required_twitter_js_file(twitter_js_files.sort.last)
32
+
33
+ # find last_tweet_id in latest_month (should be first, because Twitter)
34
+ last_tweet = latest_month.first
35
+ last_tweet_id = last_tweet["id_str"]
36
+ last_tweet_date = Date.parse(last_tweet["created_at"])
37
+
38
+ vputs "Last tweet in archive is\n\t" + display_tweet(last_tweet)
39
+
40
+ # get response from API
41
+ twitter_url = "http://api.twitter.com/1/statuses/user_timeline.json?count=#{@count}&user_id=#{user_id}&since_id=#{last_tweet_id}&include_rts=true&include_entities=true"
42
+ vputs "Making request to #{twitter_url}"
43
+ tweets = JSON.parse(open(twitter_url).read)
44
+
45
+ vputs "There have been #{tweets.length} tweets since the archive" + (archive_details.has_key?('updated_at') ? " was last updated on #{archive_details['updated_at']}" : " was created")
46
+
47
+ # collect tweets by year_month
48
+ collected_months = Hash.new
49
+ tweets.each do |tweet|
50
+ tweet = prune_tweet(tweet) if @prune
51
+ vputs "\t" + display_tweet(tweet)
52
+ tweet_date = Date.parse(tweet["created_at"])
53
+ hash_index = tweet_date.strftime('%Y_%m')
54
+ collected_months[hash_index] = Array(collected_months[hash_index])
55
+ collected_months[hash_index] << tweet
56
+ end
57
+
58
+ # add tweets to json data file
59
+ tweet_index = read_required_twitter_js_file("#{@js_path}/tweet_index.js")
60
+ collected_months.each do |year_month, month_tweets|
61
+ month_path = "#{@js_path}/tweets/#{year_month}.js"
62
+
63
+ existing_month_tweets = (File.exists?(month_path)) ? read_twitter_js_file(month_path) : []
64
+ all_month_tweets = month_tweets | existing_month_tweets
65
+ # sort new collection of tweets for this month by reverse date
66
+ all_month_tweets.sort_by {|t| -Date.parse(t['created_at']).strftime("%s").to_i }
67
+
68
+ # overwrite existing file (or create new if doesn't exist)
69
+ write_twitter_js_to_path_with_heading(all_month_tweets, "#{@js_path}/tweets/#{year_month}.js", "Grailbird.data.tweets_#{year_month}")
70
+ tweet_index = update_tweet_index(tweet_index, year_month, month_tweets.length)
71
+ end
72
+
73
+ # write new tweet_index.js once
74
+ write_twitter_js_to_path_with_heading(tweet_index, "#{@js_path}/tweet_index.js", "var tweet_index")
75
+
76
+ # add count to payload_details.js
77
+ archive_details['tweets'] += tweets.length
78
+ archive_details['updated_at'] = Time.now.getgm.strftime("%a %b %d %T %z %Y")
79
+ write_twitter_js_to_path_with_heading(archive_details, "#{@js_path}/payload_details.js", "var payload_details")
80
+ end
81
+
82
+ def read_required_twitter_js_file(file_path)
83
+ raise "#{file_path} must exist" unless File.exists?(file_path)
84
+ read_twitter_js_file(file_path)
85
+ end
86
+
87
+ def read_twitter_js_file(file_path)
88
+ file_contents = open(file_path).read.force_encoding("UTF-8").split("\n").join(" ")
89
+ json_file_contents = file_contents.gsub(/^((var)?\s*(.+?)\s+=\s+)/m, '')
90
+ json = JSON.parse(json_file_contents)
91
+ end
92
+
93
+ def prune_tweet(tweet)
94
+ KEEP_FIELDS.each do |parent_field, field_names|
95
+ tweet[parent_field].delete_if { |key, value| !field_names.include?(key) }
96
+ end
97
+ return tweet
98
+ end
99
+
100
+ def display_tweet(tweet)
101
+ tweet_text = tweet['text']
102
+ if tweet['entities'] && tweet['entities']['urls']
103
+ tweet['entities']['urls'].each { |url_entity|
104
+ tweet_text = tweet['text'].gsub("#{url_entity['url']}", "#{url_entity['expanded_url']}")
105
+ }
106
+ end
107
+ tweet = "@#{tweet['user']['screen_name']}".blue + ": \"#{tweet_text}\"\n"
108
+ end
109
+
110
+ def update_tweet_index(tweet_index, year_month, count)
111
+ year, month = year_month.split('_')
112
+ year = year.to_i
113
+ month = month.to_i
114
+ tweet_index.each do |index_month|
115
+ if index_month['year'] == year && index_month['month'] == month
116
+ index_month['tweet_count'] += count
117
+ return tweet_index
118
+ end
119
+ end
120
+
121
+ new_month = {"file_name" => "data/js/tweets/#{year_month}.js",
122
+ "year" => year,
123
+ "var_name" => "tweets_#{year_month}",
124
+ "tweet_count" => count,
125
+ "month" => month
126
+ }
127
+ new_index = tweet_index.unshift(new_month).sort_by {|m| [-m['year'], -m['month']]}
128
+ end
129
+
130
+ def write_twitter_js_to_path_with_heading(contents, path, heading)
131
+ json_pretty_contents = JSON.pretty_generate(contents)
132
+ File.open(path, 'w') {|f| f.write("#{heading} = #{json_pretty_contents}")}
133
+ end
134
+
135
+ private
136
+
137
+ # only puts if we're verbose
138
+ def vputs(str)
139
+ puts str if @verbose
140
+ end
141
+ end
142
+
@@ -0,0 +1,12 @@
1
+ require 'minitest/autorun'
2
+
3
+ require 'grailbird_updater'
4
+
5
+ class GrailbirdUpdaterTest < Minitest::Unit::TestCase
6
+
7
+ # this test is stupid, just there to demonstrate infrastructure
8
+ def test_creation
9
+ assert_kind_of GrailbirdUpdater, GrailbirdUpdater.new(".", 10, true)
10
+ end
11
+ end
12
+
File without changes
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: grailbird_updater
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dannel Jurado
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: trollop
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: colorize
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: Twitter now allows you to download your tweets. This tool lets you keep
47
+ that archive up to date.
48
+ email:
49
+ - dannelj@gmail.com
50
+ executables:
51
+ - grailbird_updater
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - .gitignore
56
+ - Gemfile
57
+ - LICENSE.txt
58
+ - README.md
59
+ - Rakefile
60
+ - bin/grailbird_updater
61
+ - grailbird_updater.gemspec
62
+ - lib/grailbird_updater.rb
63
+ - lib/grailbird_updater/version.rb
64
+ - test/grailbird_updater_test.rb
65
+ - test/test_helper.rb
66
+ homepage: https://github.com/DeMarko/grailbird_updater
67
+ licenses: []
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubyforge_project:
86
+ rubygems_version: 1.8.23
87
+ signing_key:
88
+ specification_version: 3
89
+ summary: A way to keep an updated archive of Twitter tweets.
90
+ test_files:
91
+ - test/grailbird_updater_test.rb
92
+ - test/test_helper.rb