hubba 0.5.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,83 @@
1
+ module Hubba
2
+
3
+
4
+ ## orgs - include repos form org(anizations) too
5
+ ## cache - save json response to cache_dir - change to/use debug/tmp_dir? - why? why not?
6
+ def self.reposet( *users, orgs: true,
7
+ cache: false )
8
+ # users = [users] if users.is_a?( String ) ### wrap in array if single user
9
+
10
+ gh = Github.new
11
+
12
+ forks = []
13
+
14
+ h = {}
15
+ users.each do |user|
16
+ res = gh.user_repos( user )
17
+ save_json( "#{config.cache_dir}/users~#{user}~repos.json", res.data ) if cache
18
+
19
+ repos = []
20
+ ####
21
+ # check for forked repos (auto-exclude personal by default)
22
+ # note: forked repos in orgs get NOT auto-excluded!!!
23
+ res.data.each do |repo|
24
+ fork = repo['fork']
25
+ if fork
26
+ print "FORK "
27
+ forks << "#{repo['full_name']} (AUTO-EXCLUDED)"
28
+ else
29
+ print " "
30
+ repos << repo['name']
31
+ end
32
+ print repo['full_name']
33
+ print "\n"
34
+ end
35
+
36
+
37
+ h[ "#{user} (#{repos.size})" ] = repos.sort
38
+ end
39
+
40
+
41
+ ## all repos from orgs
42
+ ## note: for now only use first (primary user) - why? why not?
43
+ if orgs
44
+ user = users[0]
45
+ res = gh.user_orgs( user )
46
+ save_json( "#{config.cache_dir}/users~#{user}~orgs.json", res.data ) if cache
47
+
48
+
49
+ logins = res.logins.each do |login|
50
+ ## next if ['xxx'].include?( login ) ## add orgs here to skip
51
+
52
+ res = gh.org_repos( login )
53
+ save_json( "#{config.cache_dir}/orgs~#{login}~repos.json", res.data ) if cache
54
+
55
+ repos = []
56
+ res.data.each do |repo|
57
+ fork = repo['fork']
58
+ if fork
59
+ print "FORK "
60
+ forks << repo['full_name']
61
+ repos << repo['name']
62
+ else
63
+ print " "
64
+ repos << repo['name']
65
+ end
66
+ print repo['full_name']
67
+ print "\n"
68
+ end
69
+
70
+ h[ "#{login} (#{repos.size})" ] = repos.sort
71
+ end
72
+ end
73
+
74
+ if forks.size > 0
75
+ puts
76
+ puts "#{forks.size} fork(s):"
77
+ puts forks
78
+ end
79
+
80
+ h
81
+ end ## method reposet
82
+
83
+ end # module Hubba
@@ -5,221 +5,112 @@ module Hubba
5
5
 
6
6
  class Stats ## todo/check: rename to GithubRepoStats or RepoStats - why? why not?
7
7
 
8
- attr_reader :data
9
-
10
8
  def initialize( full_name )
11
9
  @data = {}
12
10
  @data['full_name'] = full_name # e.g. poole/hyde etc.
13
- end
14
-
15
-
16
- def full_name() @full_name ||= @data['full_name']; end
17
-
18
- ## note: return datetime objects (NOT strings); if not present/available return nil/null
19
- def created_at() @created_at ||= @data['created_at'] ? DateTime.strptime( @data['created_at'], '%Y-%m-%dT%H:%M:%S') : nil; end
20
- def updated_at() @updated_at ||= @data['updated_at'] ? DateTime.strptime( @data['updated_at'], '%Y-%m-%dT%H:%M:%S') : nil; end
21
- def pushed_at() @pushed_at ||= @data['pushed_at'] ? DateTime.strptime( @data['pushed_at'], '%Y-%m-%dT%H:%M:%S') : nil; end
22
-
23
- ## date (only) versions
24
- def created() @created ||= @data['created_at'] ? Date.strptime( @data['created_at'], '%Y-%m-%d') : nil; end
25
- def updated() @updated ||= @data['updated_at'] ? Date.strptime( @data['updated_at'], '%Y-%m-%d') : nil; end
26
- def pushed() @pushed ||= @data['pushed_at'] ? Date.strptime( @data['pushed_at'], '%Y-%m-%d') : nil; end
27
-
28
-
29
-
30
- def history() @history ||= @data['history'] ? build_history( @data['history'] ) : nil; end
31
-
32
- def size
33
- # size of repo in kb (as reported by github api)
34
- @size ||= @data['size'] || 0 ## return 0 if not found - why? why not? (return nil - why? why not??)
35
- end
36
-
37
- def stars
38
- ## return last stargazers_count entry (as number; 0 if not found)
39
- @stars ||= history ? history[0].stars : 0
40
- end
41
-
42
-
43
- def commits() @data['commits']; end
44
-
45
- def last_commit() ## convenience shortcut; get first/last commit (use [0]) or nil
46
- if @data['commits'] && @data['commits'][0]
47
- @data['commits'][0]
48
- else
49
- nil
50
- end
51
- end
52
-
53
- def committed() ## last commit date (from author NOT committer)
54
- @committed ||= last_commit ? Date.strptime( last_commit['author']['date'], '%Y-%m-%d') : nil
55
- end
56
11
 
57
- def committed_at() ## last commit date (from author NOT committer)
58
- @committed_at ||= last_commit ? DateTime.strptime( last_commit['author']['date'], '%Y-%m-%dT%H:%M:%S') : nil
12
+ @cache = {} ## keep a lookup cache - why? why not?
59
13
  end
60
14
 
61
15
 
62
- def last_commit_message() ## convenience shortcut; last commit message
63
- h = last_commit
64
-
65
- committer_name = h['committer']['name']
66
- author_name = h['author']['name']
67
- message = h['message']
68
-
69
- buf = ""
70
- buf << message
71
- buf << " by #{author_name}"
72
-
73
- if committer_name != author_name
74
- buf << " w/ #{committer_name}"
75
- end
76
- end # method commit_message
77
-
78
-
79
-
80
- def reset_cache
81
- ## reset (invalidate) cached values from data hash
82
- ## use after reading or fetching
83
- @full_name = nil
84
- @created_at = @updated_at = @pushed_at = nil
85
- @created = @updated = @pused = nil
86
- @history = nil
87
- @size = nil
88
- @stars = nil
89
-
90
- @committed_at = nil
91
- @committed = nil
16
+ ##################
17
+ ## update
18
+ def update_traffic( clones: nil,
19
+ views: nil,
20
+ paths: nil,
21
+ referrers: nil )
22
+
23
+ traffic = @data[ 'traffic' ] ||= {}
24
+
25
+ summary = traffic['summary'] ||= {}
26
+ history = traffic['history'] ||= {}
27
+
28
+
29
+ if views
30
+ raise ArgumentError, "Github::Resource expected; got #{views.class.name}" unless views.is_a?( Github::Resource )
31
+ =begin
32
+ {"count"=>1526,
33
+ "uniques"=>287,
34
+ "views"=>
35
+ [{"timestamp"=>"2020-09-27T00:00:00Z", "count"=>52, "uniques"=>13},
36
+ {"timestamp"=>"2020-09-28T00:00:00Z", "count"=>108, "uniques"=>28},
37
+ ...
38
+ ]}>
39
+ =end
40
+
41
+ ## keep lastest (summary) record of last two weeks (14 days)
42
+ summary['views'] = { 'count' => views.data['count'],
43
+ 'uniques' => views.data['uniques'] }
44
+
45
+ ## update history / day-by-day items / timeline
46
+ views.data['views'].each do |view|
47
+ # e.g. "2020-09-27T00:00:00Z"
48
+ timestamp = DateTime.strptime( view['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
49
+
50
+ item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
51
+ ## note: merge "in-place"
52
+ item.merge!( { 'views' => { 'count' => view['count'],
53
+ 'uniques' => view['uniques'] }} )
92
54
  end
55
+ end
93
56
 
94
-
95
- ########
96
- ## build history items (structs)
97
-
98
- class HistoryItem
99
-
100
- attr_reader :date, :stars ## read-only attributes
101
- attr_accessor :prev, :next ## read/write attributes (for double linked list/nodes/items)
102
-
103
- def initialize( date:, stars: )
104
- @date = date
105
- @stars = stars
106
- @next = nil
107
- end
108
-
109
- ## link items (append item at the end/tail)
110
- def append( item )
111
- @next = item
112
- item.prev = self
113
- end
114
-
115
- def diff_days
116
- if @next
117
- ## note: use jd=julian days for calculation
118
- @date.jd - @next.date.jd
119
- else
120
- nil ## last item (tail)
121
- end
122
- end
123
-
124
- def diff_stars
125
- if @next
126
- @stars - @next.stars
127
- else
128
- nil ## last item (tail)
129
- end
130
- end
131
- end ## class HistoryItem
132
-
133
-
134
- def build_history( timeseries )
135
- items = []
136
-
137
- keys = timeseries.keys.sort.reverse ## newest (latest) items first
138
- keys.each do |key|
139
- h = timeseries[ key ]
140
-
141
- item = HistoryItem.new(
142
- date: Date.strptime( key, '%Y-%m-%d' ),
143
- stars: h['stargazers_count'] || 0 )
144
-
145
- ## link items
146
- last_item = items[-1]
147
- last_item.append( item ) if last_item ## if not nil? append (note first item has no prev item)
148
-
149
- items << item
150
- end
151
-
152
- ## todo/check: return [] for empty items array (items.empty?) - why?? why not??
153
- if items.empty?
154
- nil
155
- else
156
- items
157
- end
158
- end ## method build_history
159
-
160
-
161
-
162
- def calc_diff_stars( samples: 3, days: 30 )
163
- ## samples: use n history item samples e.g. 3 samples
164
- ## days e.g. 7 days (per week), 30 days (per month)
165
-
166
- if history.nil?
167
- nil ## todo/check: return 0.0 too - why? why not?
168
- elsif history.size == 1
169
- ## just one item; CANNOT calc diff; return zero
170
- 0.0
171
- else
172
- idx = [history.size, samples].min ## calc last index
173
- last = history[idx-1]
174
- first = history[0]
175
-
176
- diff_days = first.date.jd - last.date.jd
177
- diff_stars = first.stars - last.stars
178
-
179
- ## note: use factor 1000 for fixed integer division
180
- ## converts to float at the end
181
-
182
- ## todo: check for better way (convert to float upfront - why? why not?)
183
-
184
- diff = (diff_stars * days * 1000) / diff_days
185
- puts "diff=#{diff}:#{diff.class.name}" ## check if it's a float
186
- (diff.to_f/1000.0)
57
+ if clones
58
+ raise ArgumentError, "Github::Resource expected; got #{clones.class.name}" unless clones.is_a?( Github::Resource )
59
+ =begin
60
+ {"count"=>51,
61
+ "uniques"=>17,
62
+ "clones"=>
63
+ [{"timestamp"=>"2020-09-26T00:00:00Z", "count"=>1, "uniques"=>1},
64
+ {"timestamp"=>"2020-09-27T00:00:00Z", "count"=>2, "uniques"=>1},
65
+ ...
66
+ ]}
67
+ =end
68
+
69
+ ## keep lastest (summary) record of last two weeks (14 days)
70
+ summary['clones'] = { 'count' => clones.data['count'],
71
+ 'uniques' => clones.data['uniques'] }
72
+
73
+ ## update history / day-by-day items / timeline
74
+ clones.data['clones'].each do |clone|
75
+ # e.g. "2020-09-27T00:00:00Z"
76
+ timestamp = DateTime.strptime( clone['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
77
+
78
+ item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
79
+ ## note: merge "in-place"
80
+ item.merge!( { 'clones' => { 'count' => clone['count'],
81
+ 'uniques' => clone['uniques'] }} )
187
82
  end
188
83
  end
189
84
 
190
- def history_str
191
- ## returns "pretty printed" history as string buffer
192
- buf = ''
193
- buf << "[#{history.size}]: "
194
-
195
- history.each do |item|
196
- buf << "#{item.stars}"
197
-
198
- diff_stars = item.diff_stars
199
- diff_days = item.diff_days
200
- if diff_stars && diff_days ## note: last item has no diffs
201
- if diff_stars > 0 || diff_stars < 0
202
- if diff_stars > 0
203
- buf << " (+#{diff_stars}"
204
- else
205
- buf << " (#{diff_stars}"
206
- end
207
- buf << " in #{diff_days}d) "
208
- else ## diff_stars == 0
209
- buf << " (#{diff_days}d) "
210
- end
211
- end
212
- end
213
- buf
214
- end # method history_str
85
+ if paths
86
+ raise ArgumentError, "Github::Resource expected; got #{paths.class.name}" unless paths.is_a?( Github::Resource )
87
+ =begin
88
+ [{"path"=>"/openfootball/england",
89
+ "title"=>
90
+ "openfootball/england: Free open public domain football data for England (and ...",
91
+ "count"=>394,
92
+ "uniques"=>227},
93
+ =end
94
+ summary['paths'] = paths.data
95
+ end
215
96
 
97
+ if referrers
98
+ raise ArgumentError, "Github::Resource expected; got #{referrers.class.name}" unless referrers.is_a?( Github::Resource )
99
+ =begin
100
+ [{"referrer"=>"github.com", "count"=>327, "uniques"=>198},
101
+ {"referrer"=>"openfootball.github.io", "count"=>71, "uniques"=>54},
102
+ {"referrer"=>"Google", "count"=>5, "uniques"=>5},
103
+ {"referrer"=>"reddit.com", "count"=>4, "uniques"=>4}]
104
+ =end
105
+ summary['referrers'] = referrers.data
106
+ end
107
+ end # method update_traffic
216
108
 
217
- ###############################
218
- ## fetch / read / write methods
219
109
 
220
- def fetch( gh ) ## update stats / fetch data from github via api
221
- puts "fetching #{full_name}..."
222
- repo = gh.repo( full_name )
110
+ def update( repo,
111
+ commits: nil,
112
+ topics: nil ) ## update stats / fetch data from github via api
113
+ raise ArgumentError, "Github::Resource expected; got #{repo.class.name}" unless repo.is_a?( Github::Resource )
223
114
 
224
115
  ## e.g. 2015-05-11T20:21:43Z
225
116
  ## puts Time.iso8601( repo.data['created_at'] )
@@ -227,74 +118,128 @@ module Hubba
227
118
  @data['updated_at'] = repo.data['updated_at']
228
119
  @data['pushed_at'] = repo.data['pushed_at']
229
120
 
230
- @data['size'] = repo.data['size'] # size in kb (kilobyte)
121
+ @data['size'] = repo.data['size'] # note: size in kb (kilobyte)
122
+
123
+ @data['description'] = repo.data['description']
124
+ @data['language'] = repo.data['language'] ## note: might be nil!!!
125
+
231
126
 
127
+
128
+ ########################################
129
+ #### history / by date record
232
130
  rec = {}
233
131
 
234
- puts "stargazers_count"
235
- puts repo.data['stargazers_count']
236
132
  rec['stargazers_count'] = repo.data['stargazers_count']
133
+ rec['forks_count'] = repo.data['forks_count']
134
+
237
135
 
238
136
  today = Date.today.strftime( '%Y-%m-%d' ) ## e.g. 2016-09-27
239
137
  puts "add record #{today} to history..."
240
138
  pp rec # check if stargazers_count is a number (NOT a string)
241
139
 
242
- @data[ 'history' ] ||= {}
243
- @data[ 'history' ][ today ] = rec
140
+ history = @data[ 'history' ] ||= {}
141
+ item = history[ today ] ||= {}
142
+ ## note: merge "in-place" (overwrite with new - but keep other key/value pairs if any e.g. pageviews, clones, etc.)
143
+ item.merge!( rec )
144
+
145
+
244
146
 
245
147
  ##########################
246
148
  ## also check / keep track of (latest) commit
247
- commits = gh.repo_commits( full_name )
248
- puts "last commit/update:"
249
- ## pp commits
250
- commit = {
251
- 'committer' => {
252
- 'date' => commits.data[0]['commit']['committer']['date'],
253
- 'name' => commits.data[0]['commit']['committer']['name']
254
- },
255
- 'author' => {
256
- 'date' => commits.data[0]['commit']['author']['date'],
257
- 'name' => commits.data[0]['commit']['author']['name']
258
- },
259
- 'message' => commits.data[0]['commit']['message']
260
- }
261
-
262
- ## for now store only the latest commit (e.g. a single commit in an array)
263
- @data[ 'commits' ] = [commit]
149
+ if commits
150
+ raise ArgumentError, "Github::Resource expected; got #{commits.class.name}" unless commits.is_a?( Github::Resource )
151
+
152
+ puts "update - last commit:"
153
+ ## pp commits
154
+ commit = {
155
+ 'committer' => {
156
+ 'date' => commits.data[0]['commit']['committer']['date'],
157
+ 'name' => commits.data[0]['commit']['committer']['name']
158
+ },
159
+ 'author' => {
160
+ 'date' => commits.data[0]['commit']['author']['date'],
161
+ 'name' => commits.data[0]['commit']['author']['name']
162
+ },
163
+ 'message' => commits.data[0]['commit']['message']
164
+ }
165
+
166
+ ## for now store only the latest commit (e.g. a single commit in an array)
167
+ @data[ 'commits' ] = [commit]
168
+ end
169
+
170
+ if topics
171
+ raise ArgumentError, "Github::Resource expected; got #{topics.class.name}" unless topics.is_a?( Github::Resource )
172
+
173
+ puts "update - topics:"
174
+ ## e.g.
175
+ # {"names"=>
176
+ # ["opendata",
177
+ # "football",
178
+ # "seriea",
179
+ # "italia",
180
+ # "italy",
181
+ # "juve",
182
+ # "inter",
183
+ # "napoli",
184
+ # "roma",
185
+ # "sqlite"]}
186
+ #
187
+ # {"names"=>[]}
188
+
189
+ @data[ 'topics' ] = topics.data['names']
190
+ end
191
+
264
192
 
265
193
  pp @data
266
194
 
267
- reset_cache
195
+
196
+
197
+ ## reset (invalidate) cached values from data hash
198
+ ## use after reading or fetching
199
+ @cache = {}
200
+
268
201
  self ## return self for (easy chaining)
269
202
  end
270
203
 
271
204
 
205
+ ########################################
206
+ ## read / write methods / helpers
207
+ def write
208
+ basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
209
+ letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
210
+ data_dir = "#{Hubba.config.data_dir}/#{letter}"
211
+ path = "#{data_dir}/#{basename}.json"
272
212
 
273
- def write( data_dir: './data' )
274
- basename = full_name.gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
275
- puts "writing stats to #{basename}..."
276
- File.open( "#{data_dir}/#{basename}.json", 'w:utf-8' ) do |f|
277
- f.write JSON.pretty_generate( data )
213
+ puts " writing stats to #{basename} (#{data_dir})..."
214
+
215
+ FileUtils.mkdir_p( File.dirname( path )) ## make sure path exists
216
+ File.open( path, 'w:utf-8' ) do |f|
217
+ f.write( JSON.pretty_generate( @data ))
278
218
  end
279
219
  self ## return self for (easy chaining)
280
- end
220
+ end # method write
281
221
 
282
222
 
283
- def read( data_dir: './data' )
223
+ def read
284
224
  ## note: skip reading if file not present
285
- basename = full_name.gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
286
- filename = "#{data_dir}/#{basename}.json"
287
- if File.exist?( filename )
288
- puts "reading stats from #{basename}..."
289
- json = File.open( filename, 'r:utf-8' ) { |file| file.read } ## todo/fix: use read_utf8
225
+ basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
226
+ letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
227
+ data_dir = "#{Hubba.config.data_dir}/#{letter}"
228
+ path = "#{data_dir}/#{basename}.json"
229
+
230
+ if File.exist?( path )
231
+ puts " reading stats from #{basename} (#{data_dir})..."
232
+ json = File.open( path, 'r:utf-8' ) { |f| f.read }
290
233
  @data = JSON.parse( json )
291
- reset_cache
234
+
235
+ ## reset (invalidate) cached values from data hash
236
+ ## use after reading or fetching
237
+ @cache = {}
292
238
  else
293
- puts "skipping reading stats from #{basename} -- file not found"
239
+ puts "!! WARN: - skipping reading stats from #{basename} -- file not found"
294
240
  end
295
241
  self ## return self for (easy chaining)
296
- end
297
-
242
+ end # method read
298
243
  end # class Stats
299
244
 
300
245