ghtorrent 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +12 -4
- data/bin/ght-data-retrieval +13 -3
- data/bin/ght-load +1 -1
- data/bin/ght-mirror-events +47 -17
- data/bin/ght-periodic-dump +51 -13
- data/lib/ghtorrent.rb +1 -1
- data/lib/ghtorrent/adapters/base_adapter.rb +11 -2
- data/lib/ghtorrent/adapters/mongo_persister.rb +45 -16
- data/lib/ghtorrent/api_client.rb +51 -17
- data/lib/ghtorrent/command.rb +43 -2
- data/lib/ghtorrent/ghtorrent.rb +265 -71
- data/lib/ghtorrent/migrations/001_init_schema.rb +5 -3
- data/lib/ghtorrent/migrations/{003_add_external_ref_ids.rb → 002_add_external_ref_ids.rb} +0 -0
- data/lib/ghtorrent/migrations/003_add_orgs.rb +37 -0
- data/lib/ghtorrent/migrations/004_add_commit_comments.rb +27 -0
- data/lib/ghtorrent/retriever.rb +146 -8
- data/lib/ghtorrent/settings.rb +1 -0
- data/lib/ghtorrent/utils.rb +13 -0
- data/test/callstack_test.rb +1 -1
- metadata +38 -5
- data/lib/ghtorrent/migrations/002_add_followers_created_at.rb +0 -15
@@ -12,7 +12,7 @@ Sequel.migration do
|
|
12
12
|
String :email, :null => true, :unique => true
|
13
13
|
TrueClass :hireable, :null => true
|
14
14
|
String :bio, :null => true
|
15
|
-
|
15
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
16
16
|
end
|
17
17
|
|
18
18
|
puts("Creating table projects")
|
@@ -23,7 +23,7 @@ Sequel.migration do
|
|
23
23
|
String :name, :null => false
|
24
24
|
String :description
|
25
25
|
String :language
|
26
|
-
|
26
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
27
27
|
end
|
28
28
|
|
29
29
|
puts("Creating table commits")
|
@@ -32,7 +32,8 @@ Sequel.migration do
|
|
32
32
|
String :sha, :size => 40, :unique => true
|
33
33
|
foreign_key :author_id, :users
|
34
34
|
foreign_key :committer_id, :users
|
35
|
-
|
35
|
+
foreign_key :project_id, :projects
|
36
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
36
37
|
end
|
37
38
|
|
38
39
|
puts("Creating table commit_parents")
|
@@ -46,6 +47,7 @@ Sequel.migration do
|
|
46
47
|
create_table :followers do
|
47
48
|
foreign_key :user_id, :users, :null => false
|
48
49
|
foreign_key :follower_id, :users, :null => false
|
50
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
49
51
|
primary_key [:user_id, :follower_id]
|
50
52
|
end
|
51
53
|
end
|
File without changes
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
Sequel.migration do
|
4
|
+
up do
|
5
|
+
|
6
|
+
puts("Adding organization descriminator field to table users")
|
7
|
+
|
8
|
+
alter_table :users do
|
9
|
+
add_column :type, "enum('USR', 'ORG')", :null => false
|
10
|
+
end
|
11
|
+
|
12
|
+
puts("Updating users with default values")
|
13
|
+
DB.transaction(:rollback => :reraise, :isolation => :committed) do
|
14
|
+
DB[:users].update(:type => "USR")
|
15
|
+
end
|
16
|
+
|
17
|
+
puts("Creating table organization-members")
|
18
|
+
|
19
|
+
create_table :organization_members do
|
20
|
+
foreign_key :org_id, :users, :null => false
|
21
|
+
foreign_key :user_id, :users, :null => false
|
22
|
+
primary_key [:org_id, :user_id]
|
23
|
+
DateTime :created_at, :null => false,
|
24
|
+
:default => Sequel::CURRENT_TIMESTAMP
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
down do
|
29
|
+
puts("Droping table organization-members")
|
30
|
+
drop_table :organization_members
|
31
|
+
|
32
|
+
puts("Droping organization descriminator field to table users")
|
33
|
+
alter_table :users do
|
34
|
+
drop_column :type
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
Sequel.migration do
|
4
|
+
up do
|
5
|
+
|
6
|
+
puts("Adding table commit comments")
|
7
|
+
|
8
|
+
create_table :commit_comments do
|
9
|
+
primary_key :id
|
10
|
+
foreign_key :commit_id, :commits, :null => false
|
11
|
+
foreign_key :user_id, :users, :null => false
|
12
|
+
String :body
|
13
|
+
Integer :line, :null => true
|
14
|
+
Integer :position, :null => true
|
15
|
+
Integer :comment_id, :null => false, :unique => true
|
16
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
17
|
+
DateTime :created_at, :null => false,
|
18
|
+
:default => Sequel::CURRENT_TIMESTAMP
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
down do
|
23
|
+
|
24
|
+
drop_table :commit_comments
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
data/lib/ghtorrent/retriever.rb
CHANGED
@@ -29,8 +29,8 @@
|
|
29
29
|
module GHTorrent
|
30
30
|
module Retriever
|
31
31
|
|
32
|
+
include GHTorrent::Utils
|
32
33
|
include GHTorrent::APIClient
|
33
|
-
include GHTorrent::Settings
|
34
34
|
|
35
35
|
def initialize(settings)
|
36
36
|
super(settings)
|
@@ -44,16 +44,18 @@ module GHTorrent
|
|
44
44
|
url = ghurl "users/#{user}"
|
45
45
|
u = api_request(url)
|
46
46
|
|
47
|
-
if u.
|
47
|
+
if u.empty?
|
48
48
|
throw GHTorrentException.new("Cannot find user #{user}")
|
49
49
|
end
|
50
50
|
|
51
51
|
unq = @persister.store(:users, u)
|
52
52
|
u[@uniq] = unq
|
53
|
-
|
53
|
+
what = user_type(u['type'])
|
54
|
+
info "Retriever: New #{what} #{user}"
|
54
55
|
u
|
55
56
|
else
|
56
|
-
|
57
|
+
what = user_type(stored_user.first['type'])
|
58
|
+
debug "Retriever: Already got #{what} #{user}"
|
57
59
|
stored_user.first
|
58
60
|
end
|
59
61
|
end
|
@@ -63,10 +65,13 @@ module GHTorrent
|
|
63
65
|
# http://develop.github.com/p/users.html
|
64
66
|
def retrieve_user_byemail(email, name)
|
65
67
|
url = ghurl_v2("user/email/#{email}")
|
66
|
-
api_request(url)
|
68
|
+
r = api_request(url)
|
69
|
+
|
70
|
+
return nil if r.empty?
|
71
|
+
r
|
67
72
|
end
|
68
73
|
|
69
|
-
def
|
74
|
+
def retrieve_user_followers(user)
|
70
75
|
stored_followers = @persister.find(:followers, {'follows' => user})
|
71
76
|
|
72
77
|
followers = paged_api_request(ghurl "users/#{user}/followers")
|
@@ -88,6 +93,7 @@ module GHTorrent
|
|
88
93
|
@persister.find(:followers, {'follows' => user})
|
89
94
|
end
|
90
95
|
|
96
|
+
# Retrieve a single commit from a repo
|
91
97
|
def retrieve_commit(repo, sha, user)
|
92
98
|
commit = @persister.find(:commits, {'sha' => "#{sha}"})
|
93
99
|
|
@@ -95,7 +101,7 @@ module GHTorrent
|
|
95
101
|
url = ghurl "repos/#{user}/#{repo}/commits/#{sha}"
|
96
102
|
c = api_request(url)
|
97
103
|
|
98
|
-
if c.
|
104
|
+
if c.empty?
|
99
105
|
throw GHTorrentException.new("Cannot find commit #{user}/#{repo}/#{sha}")
|
100
106
|
end
|
101
107
|
|
@@ -109,6 +115,31 @@ module GHTorrent
|
|
109
115
|
end
|
110
116
|
end
|
111
117
|
|
118
|
+
# Retrieve all project commits or 500 (whatever comes first),
|
119
|
+
# starting from the provided +sha+
|
120
|
+
def retrieve_commits(repo, sha, user)
|
121
|
+
last_sha = if sha.nil?
|
122
|
+
"master"
|
123
|
+
else
|
124
|
+
sha
|
125
|
+
end
|
126
|
+
|
127
|
+
url = ghurl "repos/#{user}/#{repo}/commits?last_sha=#{last_sha}"
|
128
|
+
commits = paged_api_request(url, config(:mirror_commit_pages_new_repo))
|
129
|
+
|
130
|
+
commits.reduce(Array.new) do |acc, c|
|
131
|
+
commit = @persister.find(:commits, {'sha' => "#{c['sha']}"})
|
132
|
+
|
133
|
+
if commit.empty?
|
134
|
+
acc << retrieve_commit(repo, c['sha'], user)
|
135
|
+
else
|
136
|
+
debug "Retriever: Already got commit #{repo} -> #{c['sha']}"
|
137
|
+
end
|
138
|
+
acc
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
|
112
143
|
def retrieve_repo(user, repo)
|
113
144
|
stored_repo = @persister.find(:repos, {'owner.login' => user,
|
114
145
|
'name' => repo })
|
@@ -116,7 +147,7 @@ module GHTorrent
|
|
116
147
|
url = ghurl "repos/#{user}/#{repo}"
|
117
148
|
r = api_request(url)
|
118
149
|
|
119
|
-
if r.
|
150
|
+
if r.empty?
|
120
151
|
throw GHTorrentException.new("Cannot find repo #{user}/#{repo}")
|
121
152
|
end
|
122
153
|
|
@@ -130,6 +161,94 @@ module GHTorrent
|
|
130
161
|
end
|
131
162
|
end
|
132
163
|
|
164
|
+
# Retrieve organizations the provided user participates into
|
165
|
+
def retrieve_orgs(user)
|
166
|
+
url = ghurl "users/#{user}/orgs"
|
167
|
+
orgs = paged_api_request(url)
|
168
|
+
orgs.map{|o| retrieve_org(o['login'])}
|
169
|
+
end
|
170
|
+
|
171
|
+
# Retrieve a single organization
|
172
|
+
def retrieve_org(org)
|
173
|
+
retrieve_user_byusername(org)
|
174
|
+
end
|
175
|
+
|
176
|
+
# Retrieve organization members
|
177
|
+
def retrieve_org_members(org)
|
178
|
+
url = ghurl "orgs/#{org}/members"
|
179
|
+
stored_org_members = @persister.find(:org_members, {'org' => org})
|
180
|
+
|
181
|
+
org_members = paged_api_request(ghurl "orgs/#{org}/members")
|
182
|
+
org_members.each do |x|
|
183
|
+
x['org'] = org
|
184
|
+
|
185
|
+
exists = !stored_org_members.find { |f|
|
186
|
+
f['org'] == user && f['login'] == x['login']
|
187
|
+
}.nil?
|
188
|
+
|
189
|
+
if not exists
|
190
|
+
@persister.store(:org_members, x)
|
191
|
+
info "Retriever: Added member #{org} -> #{x['login']}"
|
192
|
+
else
|
193
|
+
debug "Retriever: Member #{org} -> #{x['login']} exists"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
@persister.find(:org_members, {'org' => org}).map{|o| retrieve_org(o['login'])}
|
198
|
+
end
|
199
|
+
|
200
|
+
# Retrieve all commit comments for a specific repository
|
201
|
+
def retrieve_repo_comments(repo, user)
|
202
|
+
commit_comments = paged_api_request(ghurl "repos/#{user}/#{repo}/comments")
|
203
|
+
stored_comments = @persister.find(:commit_comments,
|
204
|
+
{'repo' => repo,
|
205
|
+
'user' => user})
|
206
|
+
store_commit_comments(repo, user, commit_comments, stored_comments)
|
207
|
+
end
|
208
|
+
|
209
|
+
# Retrieve all comments for a single commit
|
210
|
+
def retrieve_commit_comments(user, repo, sha, reentrer = false)
|
211
|
+
# Optimization: if no commits comments are registered for the repo
|
212
|
+
# get them en masse
|
213
|
+
#items = @persister.count(:commit_comments, {'repo' => repo, 'user' => user})
|
214
|
+
#if items == 0 && !reentrer
|
215
|
+
# retrieve_repo_comments(repo, user)
|
216
|
+
# return retrieve_commit_comments(user, repo, sha, true)
|
217
|
+
#end
|
218
|
+
|
219
|
+
stored_comments = @persister.find(:commit_comments, {'commit_id' => sha})
|
220
|
+
retrieved_comments = paged_api_request(ghurl "repos/#{user}/#{repo}/commits/#{sha}/comments")
|
221
|
+
store_commit_comments(repo, user, stored_comments, retrieved_comments)
|
222
|
+
@persister.find(:commit_comments, {'commit_id' => sha})
|
223
|
+
end
|
224
|
+
|
225
|
+
# Retrieve a single comment
|
226
|
+
def retrieve_commit_comment(user, repo, id, reentrer = false)
|
227
|
+
# Optimization: if no commits comments are registered for the repo
|
228
|
+
# get them en masse
|
229
|
+
#items = @persister.count(:commit_comments, {'repo' => repo, 'user' => user})
|
230
|
+
#if items == 0 && !reentrer
|
231
|
+
# retrieve_repo_comments(repo, user)
|
232
|
+
# return retrieve_commit_comment(user, repo, id)
|
233
|
+
#end
|
234
|
+
|
235
|
+
comment = @persister.find(:commit_comments, {'repo' => repo,
|
236
|
+
'user' => user, 'id' => id})
|
237
|
+
if comment.empty?
|
238
|
+
r = api_request(ghurl "repos/#{user}/#{repo}/comments/#{id}")
|
239
|
+
r['repo'] = repo
|
240
|
+
r['user'] = user
|
241
|
+
@persister.store(:commit_comments, r)
|
242
|
+
info "Retriever: Added commit comment #{r['commit_id']} -> #{r['id']}"
|
243
|
+
r[@uniq] = r['_id']
|
244
|
+
r
|
245
|
+
else
|
246
|
+
debug "Retriever: Commit comment #{comment['commit_id']} -> #{comment['id']} exists"
|
247
|
+
comment[@uniq] = comment['_id']
|
248
|
+
comment
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
133
252
|
# Get current Github events
|
134
253
|
def get_events
|
135
254
|
api_request "https://api.github.com/events"
|
@@ -144,5 +263,24 @@ module GHTorrent
|
|
144
263
|
def ghurl_v2(path)
|
145
264
|
config(:mirror_urlbase_v2) + path
|
146
265
|
end
|
266
|
+
|
267
|
+
def store_commit_comments(repo, user, stored_comments, retrieved_comments)
|
268
|
+
retrieved_comments.each do |x|
|
269
|
+
|
270
|
+
exists = !stored_comments.find { |f|
|
271
|
+
f['commit_id'] == x['commit_id'] && f['id'] == x['id']
|
272
|
+
}.nil?
|
273
|
+
|
274
|
+
unless exists
|
275
|
+
x['repo'] = repo
|
276
|
+
x['user'] = user
|
277
|
+
|
278
|
+
@persister.store(:commit_comments, x)
|
279
|
+
info "Retriever: Added commit comment #{x['commit_id']} -> #{x['id']}"
|
280
|
+
else
|
281
|
+
debug "Retriever: Commit comment #{x['commit_id']} -> #{x['id']} exists"
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
147
285
|
end
|
148
286
|
end
|
data/lib/ghtorrent/settings.rb
CHANGED
data/lib/ghtorrent/utils.rb
CHANGED
@@ -28,6 +28,11 @@
|
|
28
28
|
|
29
29
|
module GHTorrent
|
30
30
|
module Utils
|
31
|
+
|
32
|
+
def self.included(other)
|
33
|
+
other.extend self
|
34
|
+
end
|
35
|
+
|
31
36
|
# Read a value whose format is "foo.bar.baz" from a hierarchical map
|
32
37
|
# (the result of a JSON parse or a Mongo query), where a dot represents
|
33
38
|
# one level deep in the result hierarchy.
|
@@ -54,5 +59,13 @@ module GHTorrent
|
|
54
59
|
end
|
55
60
|
end
|
56
61
|
end
|
62
|
+
|
63
|
+
def user_type(type)
|
64
|
+
if type == "User"
|
65
|
+
"USR"
|
66
|
+
else
|
67
|
+
"ORG"
|
68
|
+
end
|
69
|
+
end
|
57
70
|
end
|
58
71
|
end
|
data/test/callstack_test.rb
CHANGED
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ghtorrent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 13
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: "0.
|
8
|
+
- 3
|
9
|
+
version: "0.3"
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Georgios Gousios
|
@@ -107,6 +107,38 @@ dependencies:
|
|
107
107
|
version: "3.35"
|
108
108
|
type: :runtime
|
109
109
|
version_requirements: *id006
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: sqlite3-ruby
|
112
|
+
prerelease: false
|
113
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
hash: 31
|
119
|
+
segments:
|
120
|
+
- 1
|
121
|
+
- 3
|
122
|
+
- 2
|
123
|
+
version: 1.3.2
|
124
|
+
type: :runtime
|
125
|
+
version_requirements: *id007
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: daemons
|
128
|
+
prerelease: false
|
129
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
130
|
+
none: false
|
131
|
+
requirements:
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
hash: 3
|
135
|
+
segments:
|
136
|
+
- 1
|
137
|
+
- 1
|
138
|
+
- 8
|
139
|
+
version: 1.1.8
|
140
|
+
type: :runtime
|
141
|
+
version_requirements: *id008
|
110
142
|
description: |-
|
111
143
|
A library and a collection of associated programs
|
112
144
|
to mirror and process Github data
|
@@ -128,8 +160,9 @@ files:
|
|
128
160
|
- lib/ghtorrent/ghtorrent.rb
|
129
161
|
- lib/ghtorrent/logging.rb
|
130
162
|
- lib/ghtorrent/migrations/001_init_schema.rb
|
131
|
-
- lib/ghtorrent/migrations/
|
132
|
-
- lib/ghtorrent/migrations/
|
163
|
+
- lib/ghtorrent/migrations/002_add_external_ref_ids.rb
|
164
|
+
- lib/ghtorrent/migrations/003_add_orgs.rb
|
165
|
+
- lib/ghtorrent/migrations/004_add_commit_comments.rb
|
133
166
|
- lib/ghtorrent/persister.rb
|
134
167
|
- lib/ghtorrent/retriever.rb
|
135
168
|
- lib/ghtorrent/settings.rb
|