ghtorrent 0.5 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +16 -1
- data/README.md +6 -1
- data/bin/ght-data-retrieval +2 -162
- data/bin/ght-get-more-commits +6 -0
- data/bin/ght-load +1 -224
- data/bin/ght-mirror-events +2 -147
- data/bin/ght-process-event +35 -0
- data/bin/ght-retrieve-repo +6 -0
- data/bin/ght-rm-dupl +2 -130
- data/lib/ghtorrent.rb +10 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +1 -1
- data/lib/ghtorrent/adapters/mongo_persister.rb +12 -1
- data/lib/ghtorrent/api_client.rb +47 -13
- data/lib/ghtorrent/bson_orderedhash.rb +2 -1
- data/lib/ghtorrent/command.rb +18 -0
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +218 -0
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +116 -0
- data/lib/ghtorrent/commands/ght_load.rb +227 -0
- data/lib/ghtorrent/commands/ght_mirror_events.rb +147 -0
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +118 -0
- data/lib/ghtorrent/commands/ght_rm_dupl.rb +132 -0
- data/lib/ghtorrent/ghtorrent.rb +401 -89
- data/lib/ghtorrent/hash.rb +1 -1
- data/lib/ghtorrent/migrations/011_add_issues.rb +74 -0
- data/lib/ghtorrent/retriever.rb +88 -16
- data/lib/ghtorrent/settings.rb +6 -1
- data/lib/version.rb +1 -1
- metadata +36 -26
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'ghtorrent'
|
5
|
+
|
6
|
+
|
7
|
+
class GHTProcessEvent < GHTDataRetrieval
|
8
|
+
|
9
|
+
def prepare_options(options)
|
10
|
+
options.banner <<-BANNER
|
11
|
+
Process one or more event ids
|
12
|
+
#{command_name} [options] eventid [...]
|
13
|
+
BANNER
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
def go
|
18
|
+
ARGV.each do |a|
|
19
|
+
data = ghtorrent.get_event(a)
|
20
|
+
unless data.empty?
|
21
|
+
event = data[0]
|
22
|
+
begin
|
23
|
+
send(event['type'], event)
|
24
|
+
rescue Exception => e
|
25
|
+
puts e
|
26
|
+
puts e.backtrace
|
27
|
+
end
|
28
|
+
else
|
29
|
+
warn "GHTProcessEvent: No event with id #{a}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
GHTProcessEvent.run
|
data/bin/ght-rm-dupl
CHANGED
@@ -1,134 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require '
|
4
|
+
require 'ghtorrent'
|
5
5
|
|
6
|
-
|
7
|
-
require 'ghtorrent/logging'
|
8
|
-
require 'ghtorrent/command'
|
9
|
-
require 'ghtorrent/persister'
|
10
|
-
|
11
|
-
class GHRMDupl < GHTorrent::Command
|
12
|
-
|
13
|
-
include GHTorrent::Settings
|
14
|
-
include GHTorrent::Persister
|
15
|
-
|
16
|
-
def col_info()
|
17
|
-
{
|
18
|
-
:commits => {
|
19
|
-
:unq => "sha",
|
20
|
-
:col => persister.get_underlying_connection.collection(:commits.to_s),
|
21
|
-
},
|
22
|
-
:events => {
|
23
|
-
:unq => "id",
|
24
|
-
:col => persister.get_underlying_connection.collection(:events.to_s),
|
25
|
-
}
|
26
|
-
}
|
27
|
-
end
|
28
|
-
|
29
|
-
def persister
|
30
|
-
@persister ||= connect(:mongo, @settings)
|
31
|
-
@persister
|
32
|
-
end
|
33
|
-
|
34
|
-
def prepare_options(options)
|
35
|
-
options.banner <<-BANNER
|
36
|
-
Removes duplicate entries from collections
|
37
|
-
|
38
|
-
#{command_name} [options] collection
|
39
|
-
|
40
|
-
#{command_name} options:
|
41
|
-
BANNER
|
42
|
-
|
43
|
-
options.opt :earliest, 'Seconds since epoch of earliest item to load',
|
44
|
-
:short => 'e', :default => 0, :type => :int
|
45
|
-
options.opt :snapshot, 'Perform clean up every x records',
|
46
|
-
:short => 's', :default => -1, :type => :int
|
47
|
-
end
|
48
|
-
|
49
|
-
def validate
|
50
|
-
super
|
51
|
-
Trollop::die "no collection specified" unless args[0] && !args[0].empty?
|
52
|
-
end
|
53
|
-
|
54
|
-
# Print MongoDB remove statements that
|
55
|
-
# remove all but one entries for each commit.
|
56
|
-
def remove_duplicates(data, col)
|
57
|
-
removed = 0
|
58
|
-
data.select { |k, v| v.size > 1 }.each do |k, v|
|
59
|
-
v.slice(0..(v.size - 2)).map do |x|
|
60
|
-
removed += 1 if delete_by_id col, x
|
61
|
-
end
|
62
|
-
end
|
63
|
-
removed
|
64
|
-
end
|
65
|
-
|
66
|
-
def delete_by_id(col, id)
|
67
|
-
begin
|
68
|
-
col.remove({'_id' => id})
|
69
|
-
true
|
70
|
-
rescue Mongo::OperationFailure
|
71
|
-
puts "Cannot remove record with id #{id} from #{col.name}"
|
72
|
-
false
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def go
|
77
|
-
collection = case ARGV[0]
|
78
|
-
when "commits" then
|
79
|
-
:commits
|
80
|
-
when "events" then
|
81
|
-
:events
|
82
|
-
else
|
83
|
-
puts "Not a known collection name: #{ARGV[0]}\n"
|
84
|
-
end
|
85
|
-
|
86
|
-
from = {'_id' => {'$gte' => BSON::ObjectId.from_time(Time.at(options[:earliest]))}}
|
87
|
-
|
88
|
-
snapshot = options[:snapshot]
|
89
|
-
|
90
|
-
puts "Deleting duplicates from collection #{collection}"
|
91
|
-
puts "Deleting duplicates after #{Time.at(options[:earliest])}"
|
92
|
-
puts "Perform clean up every #{snapshot} records"
|
93
|
-
|
94
|
-
# Various counters to report stats
|
95
|
-
processed = total_processed = removed = 0
|
96
|
-
|
97
|
-
data = Hash.new
|
98
|
-
|
99
|
-
# The following code needs to save intermediate results to cope
|
100
|
-
# with large datasets
|
101
|
-
col_info[collection][:col].find(from, :fields => col_info[collection][:unq]).each do |r|
|
102
|
-
_id = r["_id"]
|
103
|
-
commit = read_value(r, col_info[collection][:unq])
|
104
|
-
|
105
|
-
# If entries cannot be parsed, remove them
|
106
|
-
if commit.empty?
|
107
|
-
puts "Deleting unknown entry #{_id}"
|
108
|
-
removed += 1 if delete_by_id col_info[collection][:col], _id
|
109
|
-
else
|
110
|
-
data[commit] = [] if data[commit].nil?
|
111
|
-
data[commit] << _id
|
112
|
-
end
|
113
|
-
|
114
|
-
processed += 1
|
115
|
-
total_processed += 1
|
116
|
-
|
117
|
-
print "\rProcessed #{processed} records"
|
118
|
-
|
119
|
-
# Calculate duplicates, save intermediate result
|
120
|
-
if snapshot > 0 and processed > snapshot
|
121
|
-
puts "\nLoaded #{data.size} values, cleaning"
|
122
|
-
removed += remove_duplicates data, col_info[collection][:col]
|
123
|
-
data = Hash.new
|
124
|
-
processed = 0
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
removed += remove_duplicates data, col_info[collection][:col]
|
129
|
-
|
130
|
-
puts "\nProcessed #{total_processed}, deleted #{removed} duplicates"
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
GHRMDupl.run
|
6
|
+
GHRMDupl.run
|
data/lib/ghtorrent.rb
CHANGED
@@ -48,3 +48,13 @@ require 'ghtorrent/retriever'
|
|
48
48
|
|
49
49
|
# SQL database fillup methods
|
50
50
|
require 'ghtorrent/ghtorrent'
|
51
|
+
|
52
|
+
# Commands
|
53
|
+
require 'ghtorrent/commands/ght_data_retrieval'
|
54
|
+
require 'ghtorrent/commands/ght_mirror_events'
|
55
|
+
require 'ghtorrent/commands/ght_get_more_commits'
|
56
|
+
require 'ghtorrent/commands/ght_rm_dupl'
|
57
|
+
require 'ghtorrent/commands/ght_load'
|
58
|
+
require 'ghtorrent/commands/ght_retrieve_repo'
|
59
|
+
|
60
|
+
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|
@@ -4,7 +4,7 @@ module GHTorrent
|
|
4
4
|
|
5
5
|
ENTITIES = [:users, :commits, :followers, :repos, :events, :org_members,
|
6
6
|
:commit_comments, :repo_collaborators, :watchers, :pull_requests,
|
7
|
-
:forks, :pull_request_comments, :issue_comments, :issues
|
7
|
+
:forks, :pull_request_comments, :issue_comments, :issues, :issue_events
|
8
8
|
]
|
9
9
|
|
10
10
|
# Stores +data+ into +entity+. Returns a unique key for the stored entry.
|
@@ -110,8 +110,12 @@ module GHTorrent
|
|
110
110
|
get_collection("forks")
|
111
111
|
when :pull_request_comments
|
112
112
|
get_collection("pull_request_comments")
|
113
|
+
when :issues
|
114
|
+
get_collection("issues")
|
113
115
|
when :issue_comments
|
114
116
|
get_collection("issue_comments")
|
117
|
+
when :issue_events
|
118
|
+
get_collection("issue_events")
|
115
119
|
end
|
116
120
|
end
|
117
121
|
|
@@ -130,7 +134,7 @@ module GHTorrent
|
|
130
134
|
Mongo::ReplSetConnection.new(repl_arr, :read => :secondary)\
|
131
135
|
.db(config(:mongo_db))
|
132
136
|
end
|
133
|
-
init_db(@mongo) if @mongo.collections.size
|
137
|
+
init_db(@mongo) if @mongo.collections.size < ENTITIES.size
|
134
138
|
@mongo
|
135
139
|
else
|
136
140
|
@mongo
|
@@ -183,6 +187,13 @@ module GHTorrent
|
|
183
187
|
ensure_index(:pull_request_comments, "owner")
|
184
188
|
ensure_index(:pull_request_comments, "pullreq_id")
|
185
189
|
ensure_index(:pull_request_comments, "id")
|
190
|
+
ensure_index(:issues, "repo")
|
191
|
+
ensure_index(:issues, "owner")
|
192
|
+
ensure_index(:issues, "issue_id")
|
193
|
+
ensure_index(:issue_events, "repo")
|
194
|
+
ensure_index(:issue_events, "owner")
|
195
|
+
ensure_index(:issue_events, "issue_id")
|
196
|
+
ensure_index(:issue_events, "id")
|
186
197
|
end
|
187
198
|
|
188
199
|
def rescue_connection_failure(max_retries=60)
|
data/lib/ghtorrent/api_client.rb
CHANGED
@@ -8,12 +8,14 @@ require 'ghtorrent/logging'
|
|
8
8
|
require 'ghtorrent/settings'
|
9
9
|
require 'ghtorrent/time'
|
10
10
|
require 'ghtorrent/cache'
|
11
|
+
require 'version'
|
11
12
|
|
12
13
|
module GHTorrent
|
13
14
|
module APIClient
|
14
15
|
include GHTorrent::Logging
|
15
16
|
include GHTorrent::Settings
|
16
17
|
include GHTorrent::Cache
|
18
|
+
include GHTorrent::Logging
|
17
19
|
|
18
20
|
# This is to fix an annoying bug in JRuby's SSL not being able to
|
19
21
|
# verify a valid certificate.
|
@@ -25,10 +27,20 @@ module GHTorrent
|
|
25
27
|
# result pages.
|
26
28
|
def paged_api_request(url, pages = -1, cache = true, last = nil)
|
27
29
|
|
28
|
-
|
29
|
-
|
30
|
-
|
30
|
+
url = if not url.include?("per_page")
|
31
|
+
if url.include?("?")
|
32
|
+
url + "&per_page=100"
|
33
|
+
else
|
34
|
+
url + "?per_page=100"
|
35
|
+
end
|
36
|
+
else
|
37
|
+
url
|
38
|
+
end
|
39
|
+
|
40
|
+
data = if CGI::parse(URI::parse(url).query).has_key?("page")
|
31
41
|
api_request_raw(url, use_cache?(cache, method = :paged))
|
42
|
+
else
|
43
|
+
api_request_raw(url, false)
|
32
44
|
end
|
33
45
|
|
34
46
|
return [] if data.nil?
|
@@ -79,7 +91,7 @@ module GHTorrent
|
|
79
91
|
when "prod"
|
80
92
|
:prod
|
81
93
|
else
|
82
|
-
raise GHTorrentException("")
|
94
|
+
raise GHTorrentException.new("Don't know cache configuration #{@cache_mode}")
|
83
95
|
end
|
84
96
|
case @cache_mode
|
85
97
|
when :dev
|
@@ -128,21 +140,22 @@ module GHTorrent
|
|
128
140
|
# Do the actual request and return the result object
|
129
141
|
def api_request_raw(url, use_cache = false)
|
130
142
|
@num_api_calls ||= 0
|
131
|
-
@ts ||= Time.now
|
143
|
+
@ts ||= Time.now.to_i
|
144
|
+
@started_min ||= Time.now.min
|
132
145
|
|
133
146
|
#Rate limiting to avoid error requests
|
134
147
|
if Time.now().tv_sec() - @ts < 60 then
|
135
148
|
if @num_api_calls >= @settings['mirror']['reqrate'].to_i
|
136
|
-
sleep = 60 - (Time.now
|
149
|
+
sleep = 60 - (Time.now.to_i - @ts)
|
137
150
|
debug "APIClient: Sleeping for #{sleep}"
|
138
151
|
sleep (sleep)
|
139
152
|
@num_api_calls = 0
|
140
|
-
@ts = Time.now
|
153
|
+
@ts = Time.now.to_i
|
141
154
|
end
|
142
155
|
else
|
143
156
|
debug "APIClient: Tick, num_calls = #{@num_api_calls}, zeroing"
|
144
157
|
@num_api_calls = 0
|
145
|
-
@ts = Time.now
|
158
|
+
@ts = Time.now.to_i
|
146
159
|
end
|
147
160
|
|
148
161
|
begin
|
@@ -166,7 +179,17 @@ module GHTorrent
|
|
166
179
|
end
|
167
180
|
|
168
181
|
total = Time.now.to_ms - start_time.to_ms
|
169
|
-
debug "APIClient: Request: #{url} (#{@num_api_calls} calls
|
182
|
+
debug "APIClient: Request: #{url} (#{@num_api_calls} calls #{if from_cache then " from cache," else "(#{contents.meta['x-ratelimit-remaining']} remaining)," end} Total: #{total} ms)"
|
183
|
+
|
184
|
+
if not from_cache and config(:respect_api_ratelimit) and
|
185
|
+
contents.meta['x-ratelimit-remaining'].to_i < 400
|
186
|
+
sleep = 60 - @started_min
|
187
|
+
debug "APIClient: Request limit reached, sleeping for #{sleep} min"
|
188
|
+
sleep(sleep * 60)
|
189
|
+
@started_min = Time.now.min
|
190
|
+
@num_api_calls = 0
|
191
|
+
end
|
192
|
+
|
170
193
|
contents
|
171
194
|
rescue OpenURI::HTTPError => e
|
172
195
|
case e.io.status[0].to_i
|
@@ -176,10 +199,10 @@ module GHTorrent
|
|
176
199
|
403, # Forbidden
|
177
200
|
404, # Not found
|
178
201
|
422 then # Unprocessable entity
|
179
|
-
|
202
|
+
warn "#{url}: #{e.io.status[1]}"
|
180
203
|
return nil
|
181
204
|
else # Server error or HTTP conditions that Github does not report
|
182
|
-
|
205
|
+
warn "#{url}"
|
183
206
|
raise e
|
184
207
|
end
|
185
208
|
end
|
@@ -187,12 +210,23 @@ module GHTorrent
|
|
187
210
|
|
188
211
|
def do_request(url)
|
189
212
|
@attach_ip ||= config(:attach_ip)
|
213
|
+
@username ||= config(:github_username)
|
214
|
+
@passwd ||= config(:github_passwd)
|
215
|
+
@user_agent ||= "ghtorrent-v#{GHTorrent::VERSION}"
|
216
|
+
|
217
|
+
@open_func ||= if @username.nil?
|
218
|
+
lambda {|url| open(url, 'User-Agent' => @user_agent)}
|
219
|
+
else
|
220
|
+
lambda {|url| open(url,
|
221
|
+
'User-Agent' => @user_agent,
|
222
|
+
:http_basic_authentication => [@username, @passwd])}
|
223
|
+
end
|
190
224
|
|
191
225
|
if @attach_ip.nil? or @attach_ip.eql? "0.0.0.0"
|
192
|
-
|
226
|
+
@open_func.call(url)
|
193
227
|
else
|
194
228
|
attach_to(@attach_ip) do
|
195
|
-
|
229
|
+
@open_func.call(url)
|
196
230
|
end
|
197
231
|
end
|
198
232
|
end
|
data/lib/ghtorrent/command.rb
CHANGED
@@ -4,6 +4,7 @@ require 'daemons'
|
|
4
4
|
require 'etc'
|
5
5
|
|
6
6
|
require 'ghtorrent/settings'
|
7
|
+
require 'version'
|
7
8
|
|
8
9
|
module GHTorrent
|
9
10
|
|
@@ -16,6 +17,7 @@ module GHTorrent
|
|
16
17
|
class Command
|
17
18
|
|
18
19
|
include GHTorrent::Settings
|
20
|
+
include GHTorrent::Settings
|
19
21
|
|
20
22
|
# Specify the run method for subclasses.
|
21
23
|
class << self
|
@@ -33,6 +35,8 @@ module GHTorrent
|
|
33
35
|
command.process_options
|
34
36
|
command.validate
|
35
37
|
|
38
|
+
puts "GHTorrent version: #{GHTorrent::VERSION}"
|
39
|
+
|
36
40
|
command.settings = YAML::load_file command.options[:config]
|
37
41
|
|
38
42
|
unless command.options[:addr].nil?
|
@@ -41,6 +45,18 @@ module GHTorrent
|
|
41
45
|
command.options[:addr])
|
42
46
|
end
|
43
47
|
|
48
|
+
unless command.options[:username].nil?
|
49
|
+
command.settings = command.override_config(command.settings,
|
50
|
+
:github_username,
|
51
|
+
command.options[:username])
|
52
|
+
end
|
53
|
+
|
54
|
+
unless command.options[:password].nil?
|
55
|
+
command.settings = command.override_config(command.settings,
|
56
|
+
:github_passwd,
|
57
|
+
command.options[:password])
|
58
|
+
end
|
59
|
+
|
44
60
|
if command.options[:daemon]
|
45
61
|
if Process.uid == 0
|
46
62
|
# Daemonize as a proper system daemon
|
@@ -97,6 +113,8 @@ Standard options:
|
|
97
113
|
opt :daemon, 'run as daemon', :short => 'd'
|
98
114
|
opt :user, 'run as the specified user (only when started as root)',
|
99
115
|
:short => 'u', :type => String
|
116
|
+
opt :username, 'Username at Github', :type => String
|
117
|
+
opt :password, 'Password at Github', :type => String
|
100
118
|
end
|
101
119
|
end
|
102
120
|
|
@@ -0,0 +1,218 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'amqp'
|
3
|
+
require 'json'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
require 'ghtorrent/ghtorrent'
|
7
|
+
require 'ghtorrent/settings'
|
8
|
+
require 'ghtorrent/logging'
|
9
|
+
require 'ghtorrent/command'
|
10
|
+
|
11
|
+
class GHTDataRetrieval < GHTorrent::Command
|
12
|
+
|
13
|
+
include GHTorrent::Settings
|
14
|
+
include GHTorrent::Logging
|
15
|
+
|
16
|
+
def parse(msg)
|
17
|
+
JSON.parse(msg)
|
18
|
+
end
|
19
|
+
|
20
|
+
def PushEvent(data)
|
21
|
+
data['payload']['commits'].each do |c|
|
22
|
+
url = c['url'].split(/\//)
|
23
|
+
|
24
|
+
ghtorrent.get_commit url[4], url[5], url[7]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def WatchEvent(data)
|
29
|
+
owner = data['repo']['name'].split(/\//)[0]
|
30
|
+
repo = data['repo']['name'].split(/\//)[1]
|
31
|
+
watcher = data['actor']['login']
|
32
|
+
created_at = data['created_at']
|
33
|
+
|
34
|
+
ghtorrent.get_watcher owner, repo, watcher, created_at
|
35
|
+
end
|
36
|
+
|
37
|
+
def FollowEvent(data)
|
38
|
+
follower = data['actor']['login']
|
39
|
+
followed = data['payload']['target']['login']
|
40
|
+
created_at = data['created_at']
|
41
|
+
|
42
|
+
ghtorrent.get_follower(follower, followed, created_at)
|
43
|
+
end
|
44
|
+
|
45
|
+
def MemberEvent(data)
|
46
|
+
owner = data['actor']['login']
|
47
|
+
repo = data['repo']['name'].split(/\//)[1]
|
48
|
+
new_member = data['payload']['member']['login']
|
49
|
+
created_at = data['created_at']
|
50
|
+
|
51
|
+
ghtorrent.get_project_member(owner, repo, new_member, created_at)
|
52
|
+
end
|
53
|
+
|
54
|
+
def CommitCommentEvent(data)
|
55
|
+
user = data['actor']['login']
|
56
|
+
repo = data['repo']['name'].split(/\//)[1]
|
57
|
+
id = data['payload']['comment']['id']
|
58
|
+
created_at = data['created_at']
|
59
|
+
|
60
|
+
ghtorrent.get_commit_comment(user, repo, id, created_at)
|
61
|
+
end
|
62
|
+
|
63
|
+
def PullRequestEvent(data)
|
64
|
+
owner = data['payload']['pull_request']['base']['repo']['owner']['login']
|
65
|
+
repo = data['payload']['pull_request']['base']['repo']['name']
|
66
|
+
pullreq_id = data['payload']['number']
|
67
|
+
action = data['payload']['action']
|
68
|
+
created_at = data['created_at']
|
69
|
+
|
70
|
+
ghtorrent.get_pull_request(owner, repo, pullreq_id, action, created_at)
|
71
|
+
end
|
72
|
+
|
73
|
+
def ForkEvent(data)
|
74
|
+
owner = data['repo']['name'].split(/\//)[0]
|
75
|
+
repo = data['repo']['name'].split(/\//)[1]
|
76
|
+
fork_id = data['payload']['forkee']['id']
|
77
|
+
created_at = data['created_at']
|
78
|
+
|
79
|
+
ghtorrent.get_fork(owner, repo, fork_id, created_at)
|
80
|
+
end
|
81
|
+
|
82
|
+
def PullRequestReviewCommentEvent(data)
|
83
|
+
owner = data['repo']['name'].split(/\//)[0]
|
84
|
+
repo = data['repo']['name'].split(/\//)[1]
|
85
|
+
comment_id = data['payload']['comment']['id']
|
86
|
+
pullreq_id = data['payload']['comment']['_links']['pull_request']['href'].split(/\//)[-1]
|
87
|
+
created_at = data['created_at']
|
88
|
+
|
89
|
+
ghtorrent.get_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
90
|
+
end
|
91
|
+
|
92
|
+
def IssuesEvent(data)
|
93
|
+
owner = data['repo']['name'].split(/\//)[0]
|
94
|
+
repo = data['repo']['name'].split(/\//)[1]
|
95
|
+
issue_id = data['payload']['issue']['number']
|
96
|
+
created_at = data['created_at']
|
97
|
+
|
98
|
+
ghtorrent.get_issue(owner, repo, issue_id, created_at)
|
99
|
+
end
|
100
|
+
|
101
|
+
def IssueCommentEvent(data)
|
102
|
+
owner = data['repo']['name'].split(/\//)[0]
|
103
|
+
repo = data['repo']['name'].split(/\//)[1]
|
104
|
+
issue_id = data['payload']['issue']['number']
|
105
|
+
comment_id = data['payload']['comment']['id']
|
106
|
+
created_at = data['created_at']
|
107
|
+
|
108
|
+
ghtorrent.get_issue_comment(owner, repo, issue_id, comment_id)
|
109
|
+
end
|
110
|
+
|
111
|
+
def handlers
|
112
|
+
%w(PushEvent WatchEvent FollowEvent MemberEvent
|
113
|
+
CommitCommentEvent PullRequestEvent ForkEvent
|
114
|
+
PullRequestReviewCommentEvent IssuesEvent IssueCommentEvent)
|
115
|
+
#%w(IssuesEvent IssueCommentEvent)
|
116
|
+
end
|
117
|
+
|
118
|
+
def prepare_options(options)
|
119
|
+
options.banner <<-BANNER
|
120
|
+
Retrieves events from queues and processes them through GHTorrent
|
121
|
+
#{command_name} [options]
|
122
|
+
|
123
|
+
#{command_name} options:
|
124
|
+
BANNER
|
125
|
+
|
126
|
+
options.opt :filter,
|
127
|
+
'Only process messages for repos in the provided file',
|
128
|
+
:short => 'f', :type => String
|
129
|
+
end
|
130
|
+
|
131
|
+
def validate
|
132
|
+
super
|
133
|
+
Trollop::die "Filter file does not exist" if options[:filter] and not File.exist?(options[:filter])
|
134
|
+
end
|
135
|
+
|
136
|
+
def logger
|
137
|
+
ghtorrent.logger
|
138
|
+
end
|
139
|
+
|
140
|
+
def ghtorrent
|
141
|
+
@gh ||= GHTorrent::Mirror.new(@settings)
|
142
|
+
@gh
|
143
|
+
end
|
144
|
+
|
145
|
+
def go
|
146
|
+
filter = Array.new
|
147
|
+
|
148
|
+
if options[:filter]
|
149
|
+
File.open(options[:filter]).each { |l|
|
150
|
+
next if l.match(/^ *#/)
|
151
|
+
parts = l.split(/ /)
|
152
|
+
next if parts.size < 2
|
153
|
+
debug "GHTDataRetrieval: Filtering events by #{parts[0] + "/" + parts[1]}"
|
154
|
+
filter << parts[0] + "/" + parts[1]
|
155
|
+
}
|
156
|
+
end
|
157
|
+
|
158
|
+
# Graceful exit
|
159
|
+
Signal.trap('INT') {
|
160
|
+
info "GHTDataRetrieval: Received SIGINT, exiting"
|
161
|
+
AMQP.stop { EM.stop }
|
162
|
+
}
|
163
|
+
Signal.trap('TERM') {
|
164
|
+
info "GHTDataRetrieval: Received SIGTERM, exiting"
|
165
|
+
AMQP.stop { EM.stop }
|
166
|
+
}
|
167
|
+
|
168
|
+
AMQP.start(:host => config(:amqp_host),
|
169
|
+
:port => config(:amqp_port),
|
170
|
+
:username => config(:amqp_username),
|
171
|
+
:password => config(:amqp_password)) do |connection|
|
172
|
+
|
173
|
+
channel = AMQP::Channel.new(connection, :prefetch => config(:amqp_prefetch))
|
174
|
+
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
175
|
+
:auto_delete => false)
|
176
|
+
|
177
|
+
handlers.each { |h|
|
178
|
+
queue = channel.queue("#{h}s", {:durable => true})\
|
179
|
+
.bind(exchange, :routing_key => "evt.#{h}")
|
180
|
+
|
181
|
+
info "GHTDataRetrieval: Binding handler #{h} to routing key evt.#{h}"
|
182
|
+
|
183
|
+
queue.subscribe(:ack => true) do |headers, msg|
|
184
|
+
begin
|
185
|
+
data = parse(msg)
|
186
|
+
info "GHTDataRetrieval: Processing event: #{data['type']}-#{data['id']}"
|
187
|
+
|
188
|
+
unless options[:filter].nil?
|
189
|
+
if filter.include?(data['repo']['name'])
|
190
|
+
send(h, data)
|
191
|
+
else
|
192
|
+
info "GHTDataRetrieval: Repo #{data['repo']['name']} not in process list. Ignoring event #{data['type']}-#{data['id']}"
|
193
|
+
end
|
194
|
+
else
|
195
|
+
send(h, data)
|
196
|
+
end
|
197
|
+
headers.ack
|
198
|
+
info "GHTDataRetrieval: Processed event: #{data['type']}-#{data['id']}"
|
199
|
+
rescue Exception => e
|
200
|
+
# Give a message a chance to be reprocessed
|
201
|
+
if headers.redelivered?
|
202
|
+
data = parse(msg)
|
203
|
+
warn "GHTDataRetrieval: Could not process event: #{data['type']}-#{data['id']}"
|
204
|
+
headers.reject(:requeue => false)
|
205
|
+
else
|
206
|
+
headers.reject(:requeue => true)
|
207
|
+
end
|
208
|
+
|
209
|
+
STDERR.puts e
|
210
|
+
STDERR.puts e.backtrace.join("\n")
|
211
|
+
end
|
212
|
+
end
|
213
|
+
}
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|