ghtorrent 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ Sequel.migration do
12
12
  String :email, :null => true, :unique => true
13
13
  TrueClass :hireable, :null => true
14
14
  String :bio, :null => true
15
- Time :created_at, :null => false
15
+ DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
16
16
  end
17
17
 
18
18
  puts("Creating table projects")
@@ -23,7 +23,7 @@ Sequel.migration do
23
23
  String :name, :null => false
24
24
  String :description
25
25
  String :language
26
- Time :created_at, :null => false
26
+ DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
27
27
  end
28
28
 
29
29
  puts("Creating table commits")
@@ -32,7 +32,8 @@ Sequel.migration do
32
32
  String :sha, :size => 40, :unique => true
33
33
  foreign_key :author_id, :users
34
34
  foreign_key :committer_id, :users
35
- Time :created_at, :null => false
35
+ foreign_key :project_id, :projects
36
+ DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
36
37
  end
37
38
 
38
39
  puts("Creating table commit_parents")
@@ -46,6 +47,7 @@ Sequel.migration do
46
47
  create_table :followers do
47
48
  foreign_key :user_id, :users, :null => false
48
49
  foreign_key :follower_id, :users, :null => false
50
+ DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
49
51
  primary_key [:user_id, :follower_id]
50
52
  end
51
53
  end
@@ -0,0 +1,37 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+
6
+ puts("Adding organization descriminator field to table users")
7
+
8
+ alter_table :users do
9
+ add_column :type, "enum('USR', 'ORG')", :null => false
10
+ end
11
+
12
+ puts("Updating users with default values")
13
+ DB.transaction(:rollback => :reraise, :isolation => :committed) do
14
+ DB[:users].update(:type => "USR")
15
+ end
16
+
17
+ puts("Creating table organization-members")
18
+
19
+ create_table :organization_members do
20
+ foreign_key :org_id, :users, :null => false
21
+ foreign_key :user_id, :users, :null => false
22
+ primary_key [:org_id, :user_id]
23
+ DateTime :created_at, :null => false,
24
+ :default => Sequel::CURRENT_TIMESTAMP
25
+ end
26
+ end
27
+
28
+ down do
29
+ puts("Droping table organization-members")
30
+ drop_table :organization_members
31
+
32
+ puts("Droping organization descriminator field to table users")
33
+ alter_table :users do
34
+ drop_column :type
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,27 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+
6
+ puts("Adding table commit comments")
7
+
8
+ create_table :commit_comments do
9
+ primary_key :id
10
+ foreign_key :commit_id, :commits, :null => false
11
+ foreign_key :user_id, :users, :null => false
12
+ String :body
13
+ Integer :line, :null => true
14
+ Integer :position, :null => true
15
+ Integer :comment_id, :null => false, :unique => true
16
+ String :ext_ref_id, :null => false, :size => 24, :default => "0"
17
+ DateTime :created_at, :null => false,
18
+ :default => Sequel::CURRENT_TIMESTAMP
19
+ end
20
+ end
21
+
22
+ down do
23
+
24
+ drop_table :commit_comments
25
+
26
+ end
27
+ end
@@ -29,8 +29,8 @@
29
29
  module GHTorrent
30
30
  module Retriever
31
31
 
32
+ include GHTorrent::Utils
32
33
  include GHTorrent::APIClient
33
- include GHTorrent::Settings
34
34
 
35
35
  def initialize(settings)
36
36
  super(settings)
@@ -44,16 +44,18 @@ module GHTorrent
44
44
  url = ghurl "users/#{user}"
45
45
  u = api_request(url)
46
46
 
47
- if u.nil?
47
+ if u.empty?
48
48
  throw GHTorrentException.new("Cannot find user #{user}")
49
49
  end
50
50
 
51
51
  unq = @persister.store(:users, u)
52
52
  u[@uniq] = unq
53
- info "Retriever: New user #{user}"
53
+ what = user_type(u['type'])
54
+ info "Retriever: New #{what} #{user}"
54
55
  u
55
56
  else
56
- debug "Retriever: Already got user #{user}"
57
+ what = user_type(stored_user.first['type'])
58
+ debug "Retriever: Already got #{what} #{user}"
57
59
  stored_user.first
58
60
  end
59
61
  end
@@ -63,10 +65,13 @@ module GHTorrent
63
65
  # http://develop.github.com/p/users.html
64
66
  def retrieve_user_byemail(email, name)
65
67
  url = ghurl_v2("user/email/#{email}")
66
- api_request(url)
68
+ r = api_request(url)
69
+
70
+ return nil if r.empty?
71
+ r
67
72
  end
68
73
 
69
- def retrieve_new_user_followers(user)
74
+ def retrieve_user_followers(user)
70
75
  stored_followers = @persister.find(:followers, {'follows' => user})
71
76
 
72
77
  followers = paged_api_request(ghurl "users/#{user}/followers")
@@ -88,6 +93,7 @@ module GHTorrent
88
93
  @persister.find(:followers, {'follows' => user})
89
94
  end
90
95
 
96
+ # Retrieve a single commit from a repo
91
97
  def retrieve_commit(repo, sha, user)
92
98
  commit = @persister.find(:commits, {'sha' => "#{sha}"})
93
99
 
@@ -95,7 +101,7 @@ module GHTorrent
95
101
  url = ghurl "repos/#{user}/#{repo}/commits/#{sha}"
96
102
  c = api_request(url)
97
103
 
98
- if c.nil?
104
+ if c.empty?
99
105
  throw GHTorrentException.new("Cannot find commit #{user}/#{repo}/#{sha}")
100
106
  end
101
107
 
@@ -109,6 +115,31 @@ module GHTorrent
109
115
  end
110
116
  end
111
117
 
118
+ # Retrieve all project commits or 500 (whatever comes first),
119
+ # starting from the provided +sha+
120
+ def retrieve_commits(repo, sha, user)
121
+ last_sha = if sha.nil?
122
+ "master"
123
+ else
124
+ sha
125
+ end
126
+
127
+ url = ghurl "repos/#{user}/#{repo}/commits?last_sha=#{last_sha}"
128
+ commits = paged_api_request(url, config(:mirror_commit_pages_new_repo))
129
+
130
+ commits.reduce(Array.new) do |acc, c|
131
+ commit = @persister.find(:commits, {'sha' => "#{c['sha']}"})
132
+
133
+ if commit.empty?
134
+ acc << retrieve_commit(repo, c['sha'], user)
135
+ else
136
+ debug "Retriever: Already got commit #{repo} -> #{c['sha']}"
137
+ end
138
+ acc
139
+ end
140
+ end
141
+
142
+
112
143
  def retrieve_repo(user, repo)
113
144
  stored_repo = @persister.find(:repos, {'owner.login' => user,
114
145
  'name' => repo })
@@ -116,7 +147,7 @@ module GHTorrent
116
147
  url = ghurl "repos/#{user}/#{repo}"
117
148
  r = api_request(url)
118
149
 
119
- if r.nil?
150
+ if r.empty?
120
151
  throw GHTorrentException.new("Cannot find repo #{user}/#{repo}")
121
152
  end
122
153
 
@@ -130,6 +161,94 @@ module GHTorrent
130
161
  end
131
162
  end
132
163
 
164
+ # Retrieve organizations the provided user participates into
165
+ def retrieve_orgs(user)
166
+ url = ghurl "users/#{user}/orgs"
167
+ orgs = paged_api_request(url)
168
+ orgs.map{|o| retrieve_org(o['login'])}
169
+ end
170
+
171
+ # Retrieve a single organization
172
+ def retrieve_org(org)
173
+ retrieve_user_byusername(org)
174
+ end
175
+
176
+ # Retrieve organization members
177
+ def retrieve_org_members(org)
178
+ url = ghurl "orgs/#{org}/members"
179
+ stored_org_members = @persister.find(:org_members, {'org' => org})
180
+
181
+ org_members = paged_api_request(ghurl "orgs/#{org}/members")
182
+ org_members.each do |x|
183
+ x['org'] = org
184
+
185
+ exists = !stored_org_members.find { |f|
186
+ f['org'] == user && f['login'] == x['login']
187
+ }.nil?
188
+
189
+ if not exists
190
+ @persister.store(:org_members, x)
191
+ info "Retriever: Added member #{org} -> #{x['login']}"
192
+ else
193
+ debug "Retriever: Member #{org} -> #{x['login']} exists"
194
+ end
195
+ end
196
+
197
+ @persister.find(:org_members, {'org' => org}).map{|o| retrieve_org(o['login'])}
198
+ end
199
+
200
+ # Retrieve all commit comments for a specific repository
201
+ def retrieve_repo_comments(repo, user)
202
+ commit_comments = paged_api_request(ghurl "repos/#{user}/#{repo}/comments")
203
+ stored_comments = @persister.find(:commit_comments,
204
+ {'repo' => repo,
205
+ 'user' => user})
206
+ store_commit_comments(repo, user, commit_comments, stored_comments)
207
+ end
208
+
209
+ # Retrieve all comments for a single commit
210
+ def retrieve_commit_comments(user, repo, sha, reentrer = false)
211
+ # Optimization: if no commits comments are registered for the repo
212
+ # get them en masse
213
+ #items = @persister.count(:commit_comments, {'repo' => repo, 'user' => user})
214
+ #if items == 0 && !reentrer
215
+ # retrieve_repo_comments(repo, user)
216
+ # return retrieve_commit_comments(user, repo, sha, true)
217
+ #end
218
+
219
+ stored_comments = @persister.find(:commit_comments, {'commit_id' => sha})
220
+ retrieved_comments = paged_api_request(ghurl "repos/#{user}/#{repo}/commits/#{sha}/comments")
221
+ store_commit_comments(repo, user, stored_comments, retrieved_comments)
222
+ @persister.find(:commit_comments, {'commit_id' => sha})
223
+ end
224
+
225
+ # Retrieve a single comment
226
+ def retrieve_commit_comment(user, repo, id, reentrer = false)
227
+ # Optimization: if no commits comments are registered for the repo
228
+ # get them en masse
229
+ #items = @persister.count(:commit_comments, {'repo' => repo, 'user' => user})
230
+ #if items == 0 && !reentrer
231
+ # retrieve_repo_comments(repo, user)
232
+ # return retrieve_commit_comment(user, repo, id)
233
+ #end
234
+
235
+ comment = @persister.find(:commit_comments, {'repo' => repo,
236
+ 'user' => user, 'id' => id})
237
+ if comment.empty?
238
+ r = api_request(ghurl "repos/#{user}/#{repo}/comments/#{id}")
239
+ r['repo'] = repo
240
+ r['user'] = user
241
+ @persister.store(:commit_comments, r)
242
+ info "Retriever: Added commit comment #{r['commit_id']} -> #{r['id']}"
243
+ r[@uniq] = r['_id']
244
+ r
245
+ else
246
+ debug "Retriever: Commit comment #{comment['commit_id']} -> #{comment['id']} exists"
247
+ comment[@uniq] = comment['_id']
248
+ comment
249
+ end
250
+ end
251
+
133
252
  # Get current Github events
134
253
  def get_events
135
254
  api_request "https://api.github.com/events"
@@ -144,5 +263,24 @@ module GHTorrent
144
263
  def ghurl_v2(path)
145
264
  config(:mirror_urlbase_v2) + path
146
265
  end
266
+
267
+ def store_commit_comments(repo, user, stored_comments, retrieved_comments)
268
+ retrieved_comments.each do |x|
269
+
270
+ exists = !stored_comments.find { |f|
271
+ f['commit_id'] == x['commit_id'] && f['id'] == x['id']
272
+ }.nil?
273
+
274
+ unless exists
275
+ x['repo'] = repo
276
+ x['user'] = user
277
+
278
+ @persister.store(:commit_comments, x)
279
+ info "Retriever: Added commit comment #{x['commit_id']} -> #{x['id']}"
280
+ else
281
+ debug "Retriever: Commit comment #{x['commit_id']} -> #{x['id']} exists"
282
+ end
283
+ end
284
+ end
147
285
  end
148
286
  end
@@ -47,6 +47,7 @@ module GHTorrent
47
47
  :mirror_reqrate => "mirror.reqrate",
48
48
  :mirror_pollevery => "mirror.pollevery",
49
49
  :mirror_persister => "mirror.persister",
50
+ :mirror_commit_pages_new_repo => "mirror.commit_pages_new_repo",
50
51
 
51
52
  :uniq_id => "uniq_id"
52
53
  }
@@ -28,6 +28,11 @@
28
28
 
29
29
  module GHTorrent
30
30
  module Utils
31
+
32
+ def self.included(other)
33
+ other.extend self
34
+ end
35
+
31
36
  # Read a value whose format is "foo.bar.baz" from a hierarchical map
32
37
  # (the result of a JSON parse or a Mongo query), where a dot represents
33
38
  # one level deep in the result hierarchy.
@@ -54,5 +59,13 @@ module GHTorrent
54
59
  end
55
60
  end
56
61
  end
62
+
63
+ def user_type(type)
64
+ if type == "User"
65
+ "USR"
66
+ else
67
+ "ORG"
68
+ end
69
+ end
57
70
  end
58
71
  end
@@ -1,5 +1,5 @@
1
1
  require "test/unit"
2
- require 'ghtorrent-old'
2
+ require 'ghtorrent'
3
3
 
4
4
  class CallStackTest < Test::Unit::TestCase
5
5
 
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ghtorrent
3
3
  version: !ruby/object:Gem::Version
4
- hash: 15
4
+ hash: 13
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 2
9
- version: "0.2"
8
+ - 3
9
+ version: "0.3"
10
10
  platform: ruby
11
11
  authors:
12
12
  - Georgios Gousios
@@ -107,6 +107,38 @@ dependencies:
107
107
  version: "3.35"
108
108
  type: :runtime
109
109
  version_requirements: *id006
110
+ - !ruby/object:Gem::Dependency
111
+ name: sqlite3-ruby
112
+ prerelease: false
113
+ requirement: &id007 !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ hash: 31
119
+ segments:
120
+ - 1
121
+ - 3
122
+ - 2
123
+ version: 1.3.2
124
+ type: :runtime
125
+ version_requirements: *id007
126
+ - !ruby/object:Gem::Dependency
127
+ name: daemons
128
+ prerelease: false
129
+ requirement: &id008 !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ hash: 3
135
+ segments:
136
+ - 1
137
+ - 1
138
+ - 8
139
+ version: 1.1.8
140
+ type: :runtime
141
+ version_requirements: *id008
110
142
  description: |-
111
143
  A library and a collection of associated programs
112
144
  to mirror and process Github data
@@ -128,8 +160,9 @@ files:
128
160
  - lib/ghtorrent/ghtorrent.rb
129
161
  - lib/ghtorrent/logging.rb
130
162
  - lib/ghtorrent/migrations/001_init_schema.rb
131
- - lib/ghtorrent/migrations/002_add_followers_created_at.rb
132
- - lib/ghtorrent/migrations/003_add_external_ref_ids.rb
163
+ - lib/ghtorrent/migrations/002_add_external_ref_ids.rb
164
+ - lib/ghtorrent/migrations/003_add_orgs.rb
165
+ - lib/ghtorrent/migrations/004_add_commit_comments.rb
133
166
  - lib/ghtorrent/persister.rb
134
167
  - lib/ghtorrent/retriever.rb
135
168
  - lib/ghtorrent/settings.rb
@@ -1,15 +0,0 @@
1
- require 'sequel'
2
-
3
- Sequel.migration do
4
- up do
5
- alter_table :followers do
6
- add_column :created_at, :Time, :null => false, :default => Time.now
7
- end
8
- end
9
-
10
- down do
11
- alter_table :followers do
12
- drop_column :created_at
13
- end
14
- end
15
- end