tweetdump 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +22 -0
- data/Rakefile +65 -0
- data/VERSION +1 -0
- data/bin/tweetdump +40 -0
- data/lib/tweetdump.rb +104 -0
- data/test/helper.rb +10 -0
- data/test/test_tweetdump.rb +7 -0
- metadata +145 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Logan Koester
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
= tweetdump
|
2
|
+
|
3
|
+
Initially I created this because I wanted to compare the Streaming and Search APIs.
|
4
|
+
It turns out that completely by accident it can also do the neat trick of connecting indefinitely to either API,
|
5
|
+
and feeding fresh tweets to whatever program you pipe the output to.
|
6
|
+
|
7
|
+
% tweetdump -h # Usage instructions
|
8
|
+
|
9
|
+
|
10
|
+
== Note on Patches/Pull Requests
|
11
|
+
|
12
|
+
* Fork the project.
|
13
|
+
* Make your feature addition or bug fix.
|
14
|
+
* Add tests for it. This is important so I don't break it in a
|
15
|
+
future version unintentionally.
|
16
|
+
* Commit, do not mess with rakefile, version, or history.
|
17
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
18
|
+
* Send me a pull request. Bonus points for topic branches.
|
19
|
+
|
20
|
+
== Copyright
|
21
|
+
|
22
|
+
Copyright (c) 2010 Logan Koester. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "tweetdump"
|
8
|
+
gem.summary = %Q{Simple CLI program to dump X number of tweets as raw JSON from Twitter's Search or Streaming APIs}
|
9
|
+
gem.description = %Q{
|
10
|
+
Initially I created this because I wanted to compare the Streaming and Search APIs.
|
11
|
+
It turns out that completely by accident it can also do the neat trick of connecting indefinitely to either API,
|
12
|
+
and feeding fresh tweets to whatever program you pipe the output to.
|
13
|
+
|
14
|
+
% tweetdump -h # Usage instructions
|
15
|
+
}
|
16
|
+
gem.email = "logan@logankoester.com"
|
17
|
+
gem.homepage = "http://github.com/logankoester/tweetdump"
|
18
|
+
gem.authors = ["Logan Koester"]
|
19
|
+
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
20
|
+
gem.add_dependency "optiflag", ">= 0.7"
|
21
|
+
gem.add_dependency "tweetstream"
|
22
|
+
gem.add_dependency "eventmachine"
|
23
|
+
gem.add_dependency "hashie"
|
24
|
+
gem.add_dependency "twitter"
|
25
|
+
gem.executables = ['tweetdump']
|
26
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
27
|
+
end
|
28
|
+
Jeweler::GemcutterTasks.new
|
29
|
+
rescue LoadError
|
30
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'rake/testtask'
|
34
|
+
Rake::TestTask.new(:test) do |test|
|
35
|
+
test.libs << 'lib' << 'test'
|
36
|
+
test.pattern = 'test/**/test_*.rb'
|
37
|
+
test.verbose = true
|
38
|
+
end
|
39
|
+
|
40
|
+
begin
|
41
|
+
require 'rcov/rcovtask'
|
42
|
+
Rcov::RcovTask.new do |test|
|
43
|
+
test.libs << 'test'
|
44
|
+
test.pattern = 'test/**/test_*.rb'
|
45
|
+
test.verbose = true
|
46
|
+
end
|
47
|
+
rescue LoadError
|
48
|
+
task :rcov do
|
49
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
task :test => :check_dependencies
|
54
|
+
|
55
|
+
task :default => :test
|
56
|
+
|
57
|
+
require 'rake/rdoctask'
|
58
|
+
Rake::RDocTask.new do |rdoc|
|
59
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
60
|
+
|
61
|
+
rdoc.rdoc_dir = 'rdoc'
|
62
|
+
rdoc.title = "tweetdump #{version}"
|
63
|
+
rdoc.rdoc_files.include('README*')
|
64
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
65
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.1
|
data/bin/tweetdump
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'optiflag'
|
5
|
+
require 'tweetdump'
|
6
|
+
|
7
|
+
module TweetDumpOptions extend OptiFlagSet
|
8
|
+
flag "user" do
|
9
|
+
alternate_forms "u", "screen_name"
|
10
|
+
description "Your Twitter screen name"
|
11
|
+
end
|
12
|
+
flag "password" do
|
13
|
+
alternate_forms "p"
|
14
|
+
description "Your Twitter password"
|
15
|
+
end
|
16
|
+
flag "api" do
|
17
|
+
description "Which Twitter API to use (streaming or search)"
|
18
|
+
end
|
19
|
+
flag "keyword" do
|
20
|
+
alternate_forms "k"
|
21
|
+
description "Your search query"
|
22
|
+
end
|
23
|
+
optional_flag "log" do
|
24
|
+
description "A file to log output to, defaults to /dev/null"
|
25
|
+
end
|
26
|
+
optional_flag "limit" do
|
27
|
+
description "How many tweets to fetch, defaults to infinity"
|
28
|
+
end
|
29
|
+
usage_flag "h","help","?"
|
30
|
+
and_process!
|
31
|
+
end
|
32
|
+
|
33
|
+
t = TweetDump.new
|
34
|
+
t.screen_name = ARGV.flags.screen_name
|
35
|
+
t.password = ARGV.flags.password
|
36
|
+
t.keyword = ARGV.flags.keyword
|
37
|
+
t.logfile = ARGV.flags.log || '/dev/null'
|
38
|
+
t.api = ARGV.flags.api
|
39
|
+
t.limit = ARGV.flags.limit || 1
|
40
|
+
t.run!
|
data/lib/tweetdump.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'logger'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'eventmachine'
|
5
|
+
require 'tweetstream'
|
6
|
+
require 'hashie'
|
7
|
+
require 'twitter'
|
8
|
+
require 'json'
|
9
|
+
|
10
|
+
class TweetDump
|
11
|
+
attr_accessor :screen_name, :password, :keyword, :logfile, :limit, :api
|
12
|
+
def initialize(opts={})
|
13
|
+
@screen_name = opts[:screen_name]
|
14
|
+
@password = opts[:password]
|
15
|
+
@limit = opts[:limit]
|
16
|
+
@logfile = opts[:logfile] || '/dev/null'
|
17
|
+
@api = opts[:api] || 'streaming'
|
18
|
+
@keyword = opts[:keyword]
|
19
|
+
end
|
20
|
+
|
21
|
+
def run!
|
22
|
+
@count = 1
|
23
|
+
@page = 0
|
24
|
+
if @api == 'streaming'
|
25
|
+
fetch_streaming
|
26
|
+
elsif @api == 'search'
|
27
|
+
fetch_search
|
28
|
+
else
|
29
|
+
logger.error "API is invalid or unsupported"
|
30
|
+
raise "Unsupported API"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
def fetch_streaming
|
36
|
+
logger.info "Connecting to Twitter..."
|
37
|
+
EventMachine::run {
|
38
|
+
stream = Twitter::JSONStream.connect(
|
39
|
+
:path => '/1/statuses/filter.json',
|
40
|
+
:auth => "#{@screen_name}:#{@password}",
|
41
|
+
:content => "track=#{@keyword}",
|
42
|
+
:method => 'POST'
|
43
|
+
)
|
44
|
+
|
45
|
+
stream.each_item do |status|
|
46
|
+
begin
|
47
|
+
puts status
|
48
|
+
tweet = Hashie::Mash.new(JSON.parse(status))
|
49
|
+
logger.info "Fetched tweet ##{tweet.id} via Streaming API"
|
50
|
+
logger.info "Job Status: #{@count}/#{@limit}"
|
51
|
+
if @count >= @limit
|
52
|
+
logger.info "Limit reached (#{@count} out of #{@limit})"
|
53
|
+
logger.info "Disconnecting from Twitter"
|
54
|
+
return
|
55
|
+
else
|
56
|
+
@count += 1
|
57
|
+
end
|
58
|
+
rescue Exception => e
|
59
|
+
logger.error e
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
stream.on_error do |message|
|
64
|
+
# No need to worry here. It might be an issue with Twitter.
|
65
|
+
# Log message for future reference. JSONStream will try to reconnect after a timeout.
|
66
|
+
logger.error message
|
67
|
+
puts message
|
68
|
+
logger.info "Disconnecting from Twitter"
|
69
|
+
return
|
70
|
+
end
|
71
|
+
|
72
|
+
stream.on_max_reconnects do |timeout, retries|
|
73
|
+
# Something is wrong on your side.
|
74
|
+
msg = "Hit max reconnects to Twitter: timeout=#{timeout}, retries=#{retries}"
|
75
|
+
logger.error msg
|
76
|
+
puts msg
|
77
|
+
return
|
78
|
+
end
|
79
|
+
}
|
80
|
+
end
|
81
|
+
|
82
|
+
# Note that due to programmer laziness tweets from the Search API are parsed into a Ruby
|
83
|
+
# object and then dumped back to JSON again.
|
84
|
+
def fetch_search
|
85
|
+
tweets = Twitter::Search.new(@keyword, :page => @page).fetch
|
86
|
+
tweets.results.each do |tweet|
|
87
|
+
puts JSON.dump(tweet)
|
88
|
+
logger.info "Fetched tweet ##{tweet.id} via Search API"
|
89
|
+
logger.info "Job Status: #{@count}/#{@limit}"
|
90
|
+
if @count >= @limit
|
91
|
+
logger.info "Limit reached (#{@count} out of #{@limit})"
|
92
|
+
return
|
93
|
+
else
|
94
|
+
@count += 1
|
95
|
+
end
|
96
|
+
end
|
97
|
+
@page += 1
|
98
|
+
fetch_search
|
99
|
+
end
|
100
|
+
|
101
|
+
def logger
|
102
|
+
@@logger ||= Logger.new(logfile)
|
103
|
+
end
|
104
|
+
end
|
data/test/helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tweetdump
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
version: 0.1.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Logan Koester
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-04-13 00:00:00 -04:00
|
18
|
+
default_executable: tweetdump
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: thoughtbot-shoulda
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
version: "0"
|
30
|
+
type: :development
|
31
|
+
version_requirements: *id001
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: optiflag
|
34
|
+
prerelease: false
|
35
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
segments:
|
40
|
+
- 0
|
41
|
+
- 7
|
42
|
+
version: "0.7"
|
43
|
+
type: :runtime
|
44
|
+
version_requirements: *id002
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: tweetstream
|
47
|
+
prerelease: false
|
48
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
segments:
|
53
|
+
- 0
|
54
|
+
version: "0"
|
55
|
+
type: :runtime
|
56
|
+
version_requirements: *id003
|
57
|
+
- !ruby/object:Gem::Dependency
|
58
|
+
name: eventmachine
|
59
|
+
prerelease: false
|
60
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
segments:
|
65
|
+
- 0
|
66
|
+
version: "0"
|
67
|
+
type: :runtime
|
68
|
+
version_requirements: *id004
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: hashie
|
71
|
+
prerelease: false
|
72
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
type: :runtime
|
80
|
+
version_requirements: *id005
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: twitter
|
83
|
+
prerelease: false
|
84
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
type: :runtime
|
92
|
+
version_requirements: *id006
|
93
|
+
description: "\n Initially I created this because I wanted to compare the Streaming and Search APIs.\n It turns out that completely by accident it can also do the neat trick of connecting indefinitely to either API,\n and feeding fresh tweets to whatever program you pipe the output to.\n\n % tweetdump -h # Usage instructions\n "
|
94
|
+
email: logan@logankoester.com
|
95
|
+
executables:
|
96
|
+
- tweetdump
|
97
|
+
extensions: []
|
98
|
+
|
99
|
+
extra_rdoc_files:
|
100
|
+
- LICENSE
|
101
|
+
- README.rdoc
|
102
|
+
files:
|
103
|
+
- .document
|
104
|
+
- .gitignore
|
105
|
+
- LICENSE
|
106
|
+
- README.rdoc
|
107
|
+
- Rakefile
|
108
|
+
- VERSION
|
109
|
+
- bin/tweetdump
|
110
|
+
- lib/tweetdump.rb
|
111
|
+
- test/helper.rb
|
112
|
+
- test/test_tweetdump.rb
|
113
|
+
has_rdoc: true
|
114
|
+
homepage: http://github.com/logankoester/tweetdump
|
115
|
+
licenses: []
|
116
|
+
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options:
|
119
|
+
- --charset=UTF-8
|
120
|
+
require_paths:
|
121
|
+
- lib
|
122
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
segments:
|
127
|
+
- 0
|
128
|
+
version: "0"
|
129
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
segments:
|
134
|
+
- 0
|
135
|
+
version: "0"
|
136
|
+
requirements: []
|
137
|
+
|
138
|
+
rubyforge_project:
|
139
|
+
rubygems_version: 1.3.6
|
140
|
+
signing_key:
|
141
|
+
specification_version: 3
|
142
|
+
summary: Simple CLI program to dump X number of tweets as raw JSON from Twitter's Search or Streaming APIs
|
143
|
+
test_files:
|
144
|
+
- test/test_tweetdump.rb
|
145
|
+
- test/helper.rb
|