ghtorrent 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,7 +12,7 @@ Sequel.migration do
12
12
  String :email, :null => true, :unique => true
13
13
  TrueClass :hireable, :null => true
14
14
  String :bio, :null => true
15
- Time :created_at, :null => false
15
+ DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
16
16
  end
17
17
 
18
18
  puts("Creating table projects")
@@ -23,7 +23,7 @@ Sequel.migration do
23
23
  String :name, :null => false
24
24
  String :description
25
25
  String :language
26
- Time :created_at, :null => false
26
+ DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
27
27
  end
28
28
 
29
29
  puts("Creating table commits")
@@ -32,7 +32,8 @@ Sequel.migration do
32
32
  String :sha, :size => 40, :unique => true
33
33
  foreign_key :author_id, :users
34
34
  foreign_key :committer_id, :users
35
- Time :created_at, :null => false
35
+ foreign_key :project_id, :projects
36
+ DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
36
37
  end
37
38
 
38
39
  puts("Creating table commit_parents")
@@ -46,6 +47,7 @@ Sequel.migration do
46
47
  create_table :followers do
47
48
  foreign_key :user_id, :users, :null => false
48
49
  foreign_key :follower_id, :users, :null => false
50
+ DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
49
51
  primary_key [:user_id, :follower_id]
50
52
  end
51
53
  end
@@ -0,0 +1,37 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+
6
+ puts("Adding organization descriminator field to table users")
7
+
8
+ alter_table :users do
9
+ add_column :type, "enum('USR', 'ORG')", :null => false
10
+ end
11
+
12
+ puts("Updating users with default values")
13
+ DB.transaction(:rollback => :reraise, :isolation => :committed) do
14
+ DB[:users].update(:type => "USR")
15
+ end
16
+
17
+ puts("Creating table organization-members")
18
+
19
+ create_table :organization_members do
20
+ foreign_key :org_id, :users, :null => false
21
+ foreign_key :user_id, :users, :null => false
22
+ primary_key [:org_id, :user_id]
23
+ DateTime :created_at, :null => false,
24
+ :default => Sequel::CURRENT_TIMESTAMP
25
+ end
26
+ end
27
+
28
+ down do
29
+ puts("Droping table organization-members")
30
+ drop_table :organization_members
31
+
32
+ puts("Droping organization descriminator field to table users")
33
+ alter_table :users do
34
+ drop_column :type
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,27 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+
6
+ puts("Adding table commit comments")
7
+
8
+ create_table :commit_comments do
9
+ primary_key :id
10
+ foreign_key :commit_id, :commits, :null => false
11
+ foreign_key :user_id, :users, :null => false
12
+ String :body
13
+ Integer :line, :null => true
14
+ Integer :position, :null => true
15
+ Integer :comment_id, :null => false, :unique => true
16
+ String :ext_ref_id, :null => false, :size => 24, :default => "0"
17
+ DateTime :created_at, :null => false,
18
+ :default => Sequel::CURRENT_TIMESTAMP
19
+ end
20
+ end
21
+
22
+ down do
23
+
24
+ drop_table :commit_comments
25
+
26
+ end
27
+ end
@@ -29,8 +29,8 @@
29
29
  module GHTorrent
30
30
  module Retriever
31
31
 
32
+ include GHTorrent::Utils
32
33
  include GHTorrent::APIClient
33
- include GHTorrent::Settings
34
34
 
35
35
  def initialize(settings)
36
36
  super(settings)
@@ -44,16 +44,18 @@ module GHTorrent
44
44
  url = ghurl "users/#{user}"
45
45
  u = api_request(url)
46
46
 
47
- if u.nil?
47
+ if u.empty?
48
48
  throw GHTorrentException.new("Cannot find user #{user}")
49
49
  end
50
50
 
51
51
  unq = @persister.store(:users, u)
52
52
  u[@uniq] = unq
53
- info "Retriever: New user #{user}"
53
+ what = user_type(u['type'])
54
+ info "Retriever: New #{what} #{user}"
54
55
  u
55
56
  else
56
- debug "Retriever: Already got user #{user}"
57
+ what = user_type(stored_user.first['type'])
58
+ debug "Retriever: Already got #{what} #{user}"
57
59
  stored_user.first
58
60
  end
59
61
  end
@@ -63,10 +65,13 @@ module GHTorrent
63
65
  # http://develop.github.com/p/users.html
64
66
  def retrieve_user_byemail(email, name)
65
67
  url = ghurl_v2("user/email/#{email}")
66
- api_request(url)
68
+ r = api_request(url)
69
+
70
+ return nil if r.empty?
71
+ r
67
72
  end
68
73
 
69
- def retrieve_new_user_followers(user)
74
+ def retrieve_user_followers(user)
70
75
  stored_followers = @persister.find(:followers, {'follows' => user})
71
76
 
72
77
  followers = paged_api_request(ghurl "users/#{user}/followers")
@@ -88,6 +93,7 @@ module GHTorrent
88
93
  @persister.find(:followers, {'follows' => user})
89
94
  end
90
95
 
96
+ # Retrieve a single commit from a repo
91
97
  def retrieve_commit(repo, sha, user)
92
98
  commit = @persister.find(:commits, {'sha' => "#{sha}"})
93
99
 
@@ -95,7 +101,7 @@ module GHTorrent
95
101
  url = ghurl "repos/#{user}/#{repo}/commits/#{sha}"
96
102
  c = api_request(url)
97
103
 
98
- if c.nil?
104
+ if c.empty?
99
105
  throw GHTorrentException.new("Cannot find commit #{user}/#{repo}/#{sha}")
100
106
  end
101
107
 
@@ -109,6 +115,31 @@ module GHTorrent
109
115
  end
110
116
  end
111
117
 
118
+ # Retrieve all project commits or 500 (whatever comes first),
119
+ # starting from the provided +sha+
120
+ def retrieve_commits(repo, sha, user)
121
+ last_sha = if sha.nil?
122
+ "master"
123
+ else
124
+ sha
125
+ end
126
+
127
+ url = ghurl "repos/#{user}/#{repo}/commits?last_sha=#{last_sha}"
128
+ commits = paged_api_request(url, config(:mirror_commit_pages_new_repo))
129
+
130
+ commits.reduce(Array.new) do |acc, c|
131
+ commit = @persister.find(:commits, {'sha' => "#{c['sha']}"})
132
+
133
+ if commit.empty?
134
+ acc << retrieve_commit(repo, c['sha'], user)
135
+ else
136
+ debug "Retriever: Already got commit #{repo} -> #{c['sha']}"
137
+ end
138
+ acc
139
+ end
140
+ end
141
+
142
+
112
143
  def retrieve_repo(user, repo)
113
144
  stored_repo = @persister.find(:repos, {'owner.login' => user,
114
145
  'name' => repo })
@@ -116,7 +147,7 @@ module GHTorrent
116
147
  url = ghurl "repos/#{user}/#{repo}"
117
148
  r = api_request(url)
118
149
 
119
- if r.nil?
150
+ if r.empty?
120
151
  throw GHTorrentException.new("Cannot find repo #{user}/#{repo}")
121
152
  end
122
153
 
@@ -130,6 +161,94 @@ module GHTorrent
130
161
  end
131
162
  end
132
163
 
164
+ # Retrieve organizations the provided user participates into
165
+ def retrieve_orgs(user)
166
+ url = ghurl "users/#{user}/orgs"
167
+ orgs = paged_api_request(url)
168
+ orgs.map{|o| retrieve_org(o['login'])}
169
+ end
170
+
171
+ # Retrieve a single organization
172
+ def retrieve_org(org)
173
+ retrieve_user_byusername(org)
174
+ end
175
+
176
+ # Retrieve organization members
177
+ def retrieve_org_members(org)
178
+ url = ghurl "orgs/#{org}/members"
179
+ stored_org_members = @persister.find(:org_members, {'org' => org})
180
+
181
+ org_members = paged_api_request(ghurl "orgs/#{org}/members")
182
+ org_members.each do |x|
183
+ x['org'] = org
184
+
185
+ exists = !stored_org_members.find { |f|
186
+ f['org'] == user && f['login'] == x['login']
187
+ }.nil?
188
+
189
+ if not exists
190
+ @persister.store(:org_members, x)
191
+ info "Retriever: Added member #{org} -> #{x['login']}"
192
+ else
193
+ debug "Retriever: Member #{org} -> #{x['login']} exists"
194
+ end
195
+ end
196
+
197
+ @persister.find(:org_members, {'org' => org}).map{|o| retrieve_org(o['login'])}
198
+ end
199
+
200
+ # Retrieve all commit comments for a specific repository
201
+ def retrieve_repo_comments(repo, user)
202
+ commit_comments = paged_api_request(ghurl "repos/#{user}/#{repo}/comments")
203
+ stored_comments = @persister.find(:commit_comments,
204
+ {'repo' => repo,
205
+ 'user' => user})
206
+ store_commit_comments(repo, user, commit_comments, stored_comments)
207
+ end
208
+
209
+ # Retrieve all comments for a single commit
210
+ def retrieve_commit_comments(user, repo, sha, reentrer = false)
211
+ # Optimization: if no commits comments are registered for the repo
212
+ # get them en masse
213
+ #items = @persister.count(:commit_comments, {'repo' => repo, 'user' => user})
214
+ #if items == 0 && !reentrer
215
+ # retrieve_repo_comments(repo, user)
216
+ # return retrieve_commit_comments(user, repo, sha, true)
217
+ #end
218
+
219
+ stored_comments = @persister.find(:commit_comments, {'commit_id' => sha})
220
+ retrieved_comments = paged_api_request(ghurl "repos/#{user}/#{repo}/commits/#{sha}/comments")
221
+ store_commit_comments(repo, user, stored_comments, retrieved_comments)
222
+ @persister.find(:commit_comments, {'commit_id' => sha})
223
+ end
224
+
225
+ # Retrieve a single comment
226
+ def retrieve_commit_comment(user, repo, id, reentrer = false)
227
+ # Optimization: if no commits comments are registered for the repo
228
+ # get them en masse
229
+ #items = @persister.count(:commit_comments, {'repo' => repo, 'user' => user})
230
+ #if items == 0 && !reentrer
231
+ # retrieve_repo_comments(repo, user)
232
+ # return retrieve_commit_comment(user, repo, id)
233
+ #end
234
+
235
+ comment = @persister.find(:commit_comments, {'repo' => repo,
236
+ 'user' => user, 'id' => id})
237
+ if comment.empty?
238
+ r = api_request(ghurl "repos/#{user}/#{repo}/comments/#{id}")
239
+ r['repo'] = repo
240
+ r['user'] = user
241
+ @persister.store(:commit_comments, r)
242
+ info "Retriever: Added commit comment #{r['commit_id']} -> #{r['id']}"
243
+ r[@uniq] = r['_id']
244
+ r
245
+ else
246
+ debug "Retriever: Commit comment #{comment['commit_id']} -> #{comment['id']} exists"
247
+ comment[@uniq] = comment['_id']
248
+ comment
249
+ end
250
+ end
251
+
133
252
  # Get current Github events
134
253
  def get_events
135
254
  api_request "https://api.github.com/events"
@@ -144,5 +263,24 @@ module GHTorrent
144
263
  def ghurl_v2(path)
145
264
  config(:mirror_urlbase_v2) + path
146
265
  end
266
+
267
+ def store_commit_comments(repo, user, stored_comments, retrieved_comments)
268
+ retrieved_comments.each do |x|
269
+
270
+ exists = !stored_comments.find { |f|
271
+ f['commit_id'] == x['commit_id'] && f['id'] == x['id']
272
+ }.nil?
273
+
274
+ unless exists
275
+ x['repo'] = repo
276
+ x['user'] = user
277
+
278
+ @persister.store(:commit_comments, x)
279
+ info "Retriever: Added commit comment #{x['commit_id']} -> #{x['id']}"
280
+ else
281
+ debug "Retriever: Commit comment #{x['commit_id']} -> #{x['id']} exists"
282
+ end
283
+ end
284
+ end
147
285
  end
148
286
  end
@@ -47,6 +47,7 @@ module GHTorrent
47
47
  :mirror_reqrate => "mirror.reqrate",
48
48
  :mirror_pollevery => "mirror.pollevery",
49
49
  :mirror_persister => "mirror.persister",
50
+ :mirror_commit_pages_new_repo => "mirror.commit_pages_new_repo",
50
51
 
51
52
  :uniq_id => "uniq_id"
52
53
  }
@@ -28,6 +28,11 @@
28
28
 
29
29
  module GHTorrent
30
30
  module Utils
31
+
32
+ def self.included(other)
33
+ other.extend self
34
+ end
35
+
31
36
  # Read a value whose format is "foo.bar.baz" from a hierarchical map
32
37
  # (the result of a JSON parse or a Mongo query), where a dot represents
33
38
  # one level deep in the result hierarchy.
@@ -54,5 +59,13 @@ module GHTorrent
54
59
  end
55
60
  end
56
61
  end
62
+
63
+ def user_type(type)
64
+ if type == "User"
65
+ "USR"
66
+ else
67
+ "ORG"
68
+ end
69
+ end
57
70
  end
58
71
  end
@@ -1,5 +1,5 @@
1
1
  require "test/unit"
2
- require 'ghtorrent-old'
2
+ require 'ghtorrent'
3
3
 
4
4
  class CallStackTest < Test::Unit::TestCase
5
5
 
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ghtorrent
3
3
  version: !ruby/object:Gem::Version
4
- hash: 15
4
+ hash: 13
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 2
9
- version: "0.2"
8
+ - 3
9
+ version: "0.3"
10
10
  platform: ruby
11
11
  authors:
12
12
  - Georgios Gousios
@@ -107,6 +107,38 @@ dependencies:
107
107
  version: "3.35"
108
108
  type: :runtime
109
109
  version_requirements: *id006
110
+ - !ruby/object:Gem::Dependency
111
+ name: sqlite3-ruby
112
+ prerelease: false
113
+ requirement: &id007 !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ hash: 31
119
+ segments:
120
+ - 1
121
+ - 3
122
+ - 2
123
+ version: 1.3.2
124
+ type: :runtime
125
+ version_requirements: *id007
126
+ - !ruby/object:Gem::Dependency
127
+ name: daemons
128
+ prerelease: false
129
+ requirement: &id008 !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ hash: 3
135
+ segments:
136
+ - 1
137
+ - 1
138
+ - 8
139
+ version: 1.1.8
140
+ type: :runtime
141
+ version_requirements: *id008
110
142
  description: |-
111
143
  A library and a collection of associated programs
112
144
  to mirror and process Github data
@@ -128,8 +160,9 @@ files:
128
160
  - lib/ghtorrent/ghtorrent.rb
129
161
  - lib/ghtorrent/logging.rb
130
162
  - lib/ghtorrent/migrations/001_init_schema.rb
131
- - lib/ghtorrent/migrations/002_add_followers_created_at.rb
132
- - lib/ghtorrent/migrations/003_add_external_ref_ids.rb
163
+ - lib/ghtorrent/migrations/002_add_external_ref_ids.rb
164
+ - lib/ghtorrent/migrations/003_add_orgs.rb
165
+ - lib/ghtorrent/migrations/004_add_commit_comments.rb
133
166
  - lib/ghtorrent/persister.rb
134
167
  - lib/ghtorrent/retriever.rb
135
168
  - lib/ghtorrent/settings.rb
@@ -1,15 +0,0 @@
1
- require 'sequel'
2
-
3
- Sequel.migration do
4
- up do
5
- alter_table :followers do
6
- add_column :created_at, :Time, :null => false, :default => Time.now
7
- end
8
- end
9
-
10
- down do
11
- alter_table :followers do
12
- drop_column :created_at
13
- end
14
- end
15
- end