tweetdump 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Logan Koester
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,22 @@
1
+ = tweetdump
2
+
3
+ Initially I created this because I wanted to compare the Streaming and Search APIs.
4
+ It turns out that completely by accident it can also do the neat trick of connecting indefinitely to either API,
5
+ and feeding fresh tweets to whatever program you pipe the output to.
6
+
7
+ % tweetdump -h # Usage instructions
8
+
9
+
10
+ == Note on Patches/Pull Requests
11
+
12
+ * Fork the project.
13
+ * Make your feature addition or bug fix.
14
+ * Add tests for it. This is important so I don't break it in a
15
+ future version unintentionally.
16
+ * Commit, do not mess with rakefile, version, or history.
17
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
18
+ * Send me a pull request. Bonus points for topic branches.
19
+
20
+ == Copyright
21
+
22
+ Copyright (c) 2010 Logan Koester. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,65 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "tweetdump"
8
+ gem.summary = %Q{Simple CLI program to dump X number of tweets as raw JSON from Twitter's Search or Streaming APIs}
9
+ gem.description = %Q{
10
+ Initially I created this because I wanted to compare the Streaming and Search APIs.
11
+ It turns out that completely by accident it can also do the neat trick of connecting indefinitely to either API,
12
+ and feeding fresh tweets to whatever program you pipe the output to.
13
+
14
+ % tweetdump -h # Usage instructions
15
+ }
16
+ gem.email = "logan@logankoester.com"
17
+ gem.homepage = "http://github.com/logankoester/tweetdump"
18
+ gem.authors = ["Logan Koester"]
19
+ gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
20
+ gem.add_dependency "optiflag", ">= 0.7"
21
+ gem.add_dependency "tweetstream"
22
+ gem.add_dependency "eventmachine"
23
+ gem.add_dependency "hashie"
24
+ gem.add_dependency "twitter"
25
+ gem.executables = ['tweetdump']
26
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
27
+ end
28
+ Jeweler::GemcutterTasks.new
29
+ rescue LoadError
30
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
31
+ end
32
+
33
+ require 'rake/testtask'
34
+ Rake::TestTask.new(:test) do |test|
35
+ test.libs << 'lib' << 'test'
36
+ test.pattern = 'test/**/test_*.rb'
37
+ test.verbose = true
38
+ end
39
+
40
+ begin
41
+ require 'rcov/rcovtask'
42
+ Rcov::RcovTask.new do |test|
43
+ test.libs << 'test'
44
+ test.pattern = 'test/**/test_*.rb'
45
+ test.verbose = true
46
+ end
47
+ rescue LoadError
48
+ task :rcov do
49
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
50
+ end
51
+ end
52
+
53
+ task :test => :check_dependencies
54
+
55
+ task :default => :test
56
+
57
+ require 'rake/rdoctask'
58
+ Rake::RDocTask.new do |rdoc|
59
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
60
+
61
+ rdoc.rdoc_dir = 'rdoc'
62
+ rdoc.title = "tweetdump #{version}"
63
+ rdoc.rdoc_files.include('README*')
64
+ rdoc.rdoc_files.include('lib/**/*.rb')
65
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.1
data/bin/tweetdump ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
3
+ require 'rubygems'
4
+ require 'optiflag'
5
+ require 'tweetdump'
6
+
7
+ module TweetDumpOptions extend OptiFlagSet
8
+ flag "user" do
9
+ alternate_forms "u", "screen_name"
10
+ description "Your Twitter screen name"
11
+ end
12
+ flag "password" do
13
+ alternate_forms "p"
14
+ description "Your Twitter password"
15
+ end
16
+ flag "api" do
17
+ description "Which Twitter API to use (streaming or search)"
18
+ end
19
+ flag "keyword" do
20
+ alternate_forms "k"
21
+ description "Your search query"
22
+ end
23
+ optional_flag "log" do
24
+ description "A file to log output to, defaults to /dev/null"
25
+ end
26
+ optional_flag "limit" do
27
+ description "How many tweets to fetch, defaults to infinity"
28
+ end
29
+ usage_flag "h","help","?"
30
+ and_process!
31
+ end
32
+
33
+ t = TweetDump.new
34
+ t.screen_name = ARGV.flags.screen_name
35
+ t.password = ARGV.flags.password
36
+ t.keyword = ARGV.flags.keyword
37
+ t.logfile = ARGV.flags.log || '/dev/null'
38
+ t.api = ARGV.flags.api
39
+ t.limit = ARGV.flags.limit || 1
40
+ t.run!
data/lib/tweetdump.rb ADDED
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env ruby
2
+ require 'logger'
3
+ require 'rubygems'
4
+ require 'eventmachine'
5
+ require 'tweetstream'
6
+ require 'hashie'
7
+ require 'twitter'
8
+ require 'json'
9
+
10
+ class TweetDump
11
+ attr_accessor :screen_name, :password, :keyword, :logfile, :limit, :api
12
+ def initialize(opts={})
13
+ @screen_name = opts[:screen_name]
14
+ @password = opts[:password]
15
+ @limit = opts[:limit]
16
+ @logfile = opts[:logfile] || '/dev/null'
17
+ @api = opts[:api] || 'streaming'
18
+ @keyword = opts[:keyword]
19
+ end
20
+
21
+ def run!
22
+ @count = 1
23
+ @page = 0
24
+ if @api == 'streaming'
25
+ fetch_streaming
26
+ elsif @api == 'search'
27
+ fetch_search
28
+ else
29
+ logger.error "API is invalid or unsupported"
30
+ raise "Unsupported API"
31
+ end
32
+ end
33
+
34
+ private
35
+ def fetch_streaming
36
+ logger.info "Connecting to Twitter..."
37
+ EventMachine::run {
38
+ stream = Twitter::JSONStream.connect(
39
+ :path => '/1/statuses/filter.json',
40
+ :auth => "#{@screen_name}:#{@password}",
41
+ :content => "track=#{@keyword}",
42
+ :method => 'POST'
43
+ )
44
+
45
+ stream.each_item do |status|
46
+ begin
47
+ puts status
48
+ tweet = Hashie::Mash.new(JSON.parse(status))
49
+ logger.info "Fetched tweet ##{tweet.id} via Streaming API"
50
+ logger.info "Job Status: #{@count}/#{@limit}"
51
+ if @count >= @limit
52
+ logger.info "Limit reached (#{@count} out of #{@limit})"
53
+ logger.info "Disconnecting from Twitter"
54
+ return
55
+ else
56
+ @count += 1
57
+ end
58
+ rescue Exception => e
59
+ logger.error e
60
+ end
61
+ end
62
+
63
+ stream.on_error do |message|
64
+ # No need to worry here. It might be an issue with Twitter.
65
+ # Log message for future reference. JSONStream will try to reconnect after a timeout.
66
+ logger.error message
67
+ puts message
68
+ logger.info "Disconnecting from Twitter"
69
+ return
70
+ end
71
+
72
+ stream.on_max_reconnects do |timeout, retries|
73
+ # Something is wrong on your side.
74
+ msg = "Hit max reconnects to Twitter: timeout=#{timeout}, retries=#{retries}"
75
+ logger.error msg
76
+ puts msg
77
+ return
78
+ end
79
+ }
80
+ end
81
+
82
+ # Note that due to programmer laziness tweets from the Search API are parsed into a Ruby
83
+ # object and then dumped back to JSON again.
84
+ def fetch_search
85
+ tweets = Twitter::Search.new(@keyword, :page => @page).fetch
86
+ tweets.results.each do |tweet|
87
+ puts JSON.dump(tweet)
88
+ logger.info "Fetched tweet ##{tweet.id} via Search API"
89
+ logger.info "Job Status: #{@count}/#{@limit}"
90
+ if @count >= @limit
91
+ logger.info "Limit reached (#{@count} out of #{@limit})"
92
+ return
93
+ else
94
+ @count += 1
95
+ end
96
+ end
97
+ @page += 1
98
+ fetch_search
99
+ end
100
+
101
+ def logger
102
+ @@logger ||= Logger.new(logfile)
103
+ end
104
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'tweetdump'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestTweetdump < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,145 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tweetdump
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 1
9
+ version: 0.1.1
10
+ platform: ruby
11
+ authors:
12
+ - Logan Koester
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-04-13 00:00:00 -04:00
18
+ default_executable: tweetdump
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: thoughtbot-shoulda
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :development
31
+ version_requirements: *id001
32
+ - !ruby/object:Gem::Dependency
33
+ name: optiflag
34
+ prerelease: false
35
+ requirement: &id002 !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ segments:
40
+ - 0
41
+ - 7
42
+ version: "0.7"
43
+ type: :runtime
44
+ version_requirements: *id002
45
+ - !ruby/object:Gem::Dependency
46
+ name: tweetstream
47
+ prerelease: false
48
+ requirement: &id003 !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ segments:
53
+ - 0
54
+ version: "0"
55
+ type: :runtime
56
+ version_requirements: *id003
57
+ - !ruby/object:Gem::Dependency
58
+ name: eventmachine
59
+ prerelease: false
60
+ requirement: &id004 !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ type: :runtime
68
+ version_requirements: *id004
69
+ - !ruby/object:Gem::Dependency
70
+ name: hashie
71
+ prerelease: false
72
+ requirement: &id005 !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ type: :runtime
80
+ version_requirements: *id005
81
+ - !ruby/object:Gem::Dependency
82
+ name: twitter
83
+ prerelease: false
84
+ requirement: &id006 !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ segments:
89
+ - 0
90
+ version: "0"
91
+ type: :runtime
92
+ version_requirements: *id006
93
+ description: "\n Initially I created this because I wanted to compare the Streaming and Search APIs.\n It turns out that completely by accident it can also do the neat trick of connecting indefinitely to either API,\n and feeding fresh tweets to whatever program you pipe the output to.\n\n % tweetdump -h # Usage instructions\n "
94
+ email: logan@logankoester.com
95
+ executables:
96
+ - tweetdump
97
+ extensions: []
98
+
99
+ extra_rdoc_files:
100
+ - LICENSE
101
+ - README.rdoc
102
+ files:
103
+ - .document
104
+ - .gitignore
105
+ - LICENSE
106
+ - README.rdoc
107
+ - Rakefile
108
+ - VERSION
109
+ - bin/tweetdump
110
+ - lib/tweetdump.rb
111
+ - test/helper.rb
112
+ - test/test_tweetdump.rb
113
+ has_rdoc: true
114
+ homepage: http://github.com/logankoester/tweetdump
115
+ licenses: []
116
+
117
+ post_install_message:
118
+ rdoc_options:
119
+ - --charset=UTF-8
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ segments:
127
+ - 0
128
+ version: "0"
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ segments:
134
+ - 0
135
+ version: "0"
136
+ requirements: []
137
+
138
+ rubyforge_project:
139
+ rubygems_version: 1.3.6
140
+ signing_key:
141
+ specification_version: 3
142
+ summary: Simple CLI program to dump X number of tweets as raw JSON from Twitter's Search or Streaming APIs
143
+ test_files:
144
+ - test/test_tweetdump.rb
145
+ - test/helper.rb