hubba 0.5.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,83 @@
1
+ module Hubba
2
+
3
+
4
+ ## orgs - include repos form org(anizations) too
5
+ ## cache - save json response to cache_dir - change to/use debug/tmp_dir? - why? why not?
6
+ def self.reposet( *users, orgs: true,
7
+ cache: false )
8
+ # users = [users] if users.is_a?( String ) ### wrap in array if single user
9
+
10
+ gh = Github.new
11
+
12
+ forks = []
13
+
14
+ h = {}
15
+ users.each do |user|
16
+ res = gh.user_repos( user )
17
+ save_json( "#{config.cache_dir}/users~#{user}~repos.json", res.data ) if cache
18
+
19
+ repos = []
20
+ ####
21
+ # check for forked repos (auto-exclude personal by default)
22
+ # note: forked repos in orgs get NOT auto-excluded!!!
23
+ res.data.each do |repo|
24
+ fork = repo['fork']
25
+ if fork
26
+ print "FORK "
27
+ forks << "#{repo['full_name']} (AUTO-EXCLUDED)"
28
+ else
29
+ print " "
30
+ repos << repo['name']
31
+ end
32
+ print repo['full_name']
33
+ print "\n"
34
+ end
35
+
36
+
37
+ h[ "#{user} (#{repos.size})" ] = repos.sort
38
+ end
39
+
40
+
41
+ ## all repos from orgs
42
+ ## note: for now only use first (primary user) - why? why not?
43
+ if orgs
44
+ user = users[0]
45
+ res = gh.user_orgs( user )
46
+ save_json( "#{config.cache_dir}/users~#{user}~orgs.json", res.data ) if cache
47
+
48
+
49
+ logins = res.logins.each do |login|
50
+ ## next if ['xxx'].include?( login ) ## add orgs here to skip
51
+
52
+ res = gh.org_repos( login )
53
+ save_json( "#{config.cache_dir}/orgs~#{login}~repos.json", res.data ) if cache
54
+
55
+ repos = []
56
+ res.data.each do |repo|
57
+ fork = repo['fork']
58
+ if fork
59
+ print "FORK "
60
+ forks << repo['full_name']
61
+ repos << repo['name']
62
+ else
63
+ print " "
64
+ repos << repo['name']
65
+ end
66
+ print repo['full_name']
67
+ print "\n"
68
+ end
69
+
70
+ h[ "#{login} (#{repos.size})" ] = repos.sort
71
+ end
72
+ end
73
+
74
+ if forks.size > 0
75
+ puts
76
+ puts "#{forks.size} fork(s):"
77
+ puts forks
78
+ end
79
+
80
+ h
81
+ end ## method reposet
82
+
83
+ end # module Hubba
@@ -5,296 +5,257 @@ module Hubba
5
5
 
6
6
  class Stats ## todo/check: rename to GithubRepoStats or RepoStats - why? why not?
7
7
 
8
- attr_reader :data
9
-
10
8
  def initialize( full_name )
11
9
  @data = {}
12
10
  @data['full_name'] = full_name # e.g. poole/hyde etc.
13
- end
14
-
15
-
16
- def full_name() @full_name ||= @data['full_name']; end
17
-
18
- ## note: return datetime objects (NOT strings); if not present/available return nil/null
19
- def created_at() @created_at ||= @data['created_at'] ? DateTime.strptime( @data['created_at'], '%Y-%m-%dT%H:%M:%S') : nil; end
20
- def updated_at() @updated_at ||= @data['updated_at'] ? DateTime.strptime( @data['updated_at'], '%Y-%m-%dT%H:%M:%S') : nil; end
21
- def pushed_at() @pushed_at ||= @data['pushed_at'] ? DateTime.strptime( @data['pushed_at'], '%Y-%m-%dT%H:%M:%S') : nil; end
22
-
23
- ## date (only) versions
24
- def created() @created ||= @data['created_at'] ? Date.strptime( @data['created_at'], '%Y-%m-%d') : nil; end
25
- def updated() @updated ||= @data['updated_at'] ? Date.strptime( @data['updated_at'], '%Y-%m-%d') : nil; end
26
- def pushed() @pushed ||= @data['pushed_at'] ? Date.strptime( @data['pushed_at'], '%Y-%m-%d') : nil; end
27
-
28
-
29
-
30
- def history() @history ||= @data['history'] ? build_history( @data['history'] ) : nil; end
31
-
32
- def size
33
- # size of repo in kb (as reported by github api)
34
- @size ||= @data['size'] || 0 ## return 0 if not found - why? why not? (return nil - why? why not??)
35
- end
36
11
 
37
- def stars
38
- ## return last stargazers_count entry (as number; 0 if not found)
39
- @stars ||= history ? history[0].stars : 0
12
+ @cache = {} ## keep a lookup cache - why? why not?
40
13
  end
41
14
 
42
15
 
43
- def commits() @data['commits']; end
44
-
45
- def last_commit() ## convenience shortcut; get first/last commit (use [0]) or nil
46
- if @data['commits'] && @data['commits'][0]
47
- @data['commits'][0]
48
- else
49
- nil
50
- end
51
- end
52
-
53
- def committed() ## last commit date (from author NOT committer)
54
- @committed ||= last_commit ? Date.strptime( last_commit['author']['date'], '%Y-%m-%d') : nil
16
+ ##################
17
+ ## update
18
+ def update_traffic( clones: nil,
19
+ views: nil,
20
+ paths: nil,
21
+ referrers: nil )
22
+
23
+ traffic = @data[ 'traffic' ] ||= {}
24
+
25
+ summary = traffic['summary'] ||= {}
26
+ history = traffic['history'] ||= {}
27
+
28
+
29
+ if views
30
+ raise ArgumentError, "Github::Resource expected; got #{views.class.name}" unless views.is_a?( Github::Resource )
31
+ =begin
32
+ {"count"=>1526,
33
+ "uniques"=>287,
34
+ "views"=>
35
+ [{"timestamp"=>"2020-09-27T00:00:00Z", "count"=>52, "uniques"=>13},
36
+ {"timestamp"=>"2020-09-28T00:00:00Z", "count"=>108, "uniques"=>28},
37
+ ...
38
+ ]}>
39
+ =end
40
+
41
+ ## keep lastest (summary) record of last two weeks (14 days)
42
+ summary['views'] = { 'count' => views.data['count'],
43
+ 'uniques' => views.data['uniques'] }
44
+
45
+ ## update history / day-by-day items / timeline
46
+ views.data['views'].each do |view|
47
+ # e.g. "2020-09-27T00:00:00Z"
48
+ timestamp = DateTime.strptime( view['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
49
+
50
+ item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
51
+ ## note: merge "in-place"
52
+ item.merge!( { 'views' => { 'count' => view['count'],
53
+ 'uniques' => view['uniques'] }} )
55
54
  end
55
+ end
56
56
 
57
- def committed_at() ## last commit date (from author NOT committer)
58
- @committed_at ||= last_commit ? DateTime.strptime( last_commit['author']['date'], '%Y-%m-%dT%H:%M:%S') : nil
57
+ if clones
58
+ raise ArgumentError, "Github::Resource expected; got #{clones.class.name}" unless clones.is_a?( Github::Resource )
59
+ =begin
60
+ {"count"=>51,
61
+ "uniques"=>17,
62
+ "clones"=>
63
+ [{"timestamp"=>"2020-09-26T00:00:00Z", "count"=>1, "uniques"=>1},
64
+ {"timestamp"=>"2020-09-27T00:00:00Z", "count"=>2, "uniques"=>1},
65
+ ...
66
+ ]}
67
+ =end
68
+
69
+ ## keep lastest (summary) record of last two weeks (14 days)
70
+ summary['clones'] = { 'count' => clones.data['count'],
71
+ 'uniques' => clones.data['uniques'] }
72
+
73
+ ## update history / day-by-day items / timeline
74
+ clones.data['clones'].each do |clone|
75
+ # e.g. "2020-09-27T00:00:00Z"
76
+ timestamp = DateTime.strptime( clone['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
77
+
78
+ item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
79
+ ## note: merge "in-place"
80
+ item.merge!( { 'clones' => { 'count' => clone['count'],
81
+ 'uniques' => clone['uniques'] }} )
59
82
  end
83
+ end
60
84
 
85
+ if paths
86
+ raise ArgumentError, "Github::Resource expected; got #{paths.class.name}" unless paths.is_a?( Github::Resource )
87
+ =begin
88
+ [{"path"=>"/openfootball/england",
89
+ "title"=>
90
+ "openfootball/england: Free open public domain football data for England (and ...",
91
+ "count"=>394,
92
+ "uniques"=>227},
93
+ =end
94
+ summary['paths'] = paths.data
95
+ end
61
96
 
62
- def last_commit_message() ## convenience shortcut; last commit message
63
- h = last_commit
64
-
65
- committer_name = h['committer']['name']
66
- author_name = h['author']['name']
67
- message = h['message']
68
-
69
- buf = ""
70
- buf << message
71
- buf << " by #{author_name}"
72
-
73
- if committer_name != author_name
74
- buf << " w/ #{committer_name}"
75
- end
76
- end # method commit_message
77
-
97
+ if referrers
98
+ raise ArgumentError, "Github::Resource expected; got #{referrers.class.name}" unless referrers.is_a?( Github::Resource )
99
+ =begin
100
+ [{"referrer"=>"github.com", "count"=>327, "uniques"=>198},
101
+ {"referrer"=>"openfootball.github.io", "count"=>71, "uniques"=>54},
102
+ {"referrer"=>"Google", "count"=>5, "uniques"=>5},
103
+ {"referrer"=>"reddit.com", "count"=>4, "uniques"=>4}]
104
+ =end
105
+ summary['referrers'] = referrers.data
106
+ end
107
+ end # method update_traffic
78
108
 
79
109
 
80
- def reset_cache
81
- ## reset (invalidate) cached values from data hash
82
- ## use after reading or fetching
83
- @full_name = nil
84
- @created_at = @updated_at = @pushed_at = nil
85
- @created = @updated = @pused = nil
86
- @history = nil
87
- @size = nil
88
- @stars = nil
89
-
90
- @committed_at = nil
91
- @committed = nil
92
- end
110
+ def update( repo,
111
+ commits: nil,
112
+ topics: nil,
113
+ languages: nil ) ## update stats / fetch data from github via api
114
+ raise ArgumentError, "Github::Resource expected; got #{repo.class.name}" unless repo.is_a?( Github::Resource )
93
115
 
116
+ ## e.g. 2015-05-11T20:21:43Z
117
+ ## puts Time.iso8601( repo.data['created_at'] )
118
+ @data['created_at'] = repo.data['created_at']
119
+ @data['updated_at'] = repo.data['updated_at']
120
+ @data['pushed_at'] = repo.data['pushed_at']
94
121
 
95
- ########
96
- ## build history items (structs)
122
+ @data['size'] = repo.data['size'] # note: size in kb (kilobyte)
97
123
 
98
- class HistoryItem
124
+ @data['description'] = repo.data['description']
99
125
 
100
- attr_reader :date, :stars ## read-only attributes
101
- attr_accessor :prev, :next ## read/write attributes (for double linked list/nodes/items)
126
+ ### todo/check - remove language (always use languages - see below) - why? why not?
127
+ @data['language'] = repo.data['language'] ## note: might be nil!!!
102
128
 
103
- def initialize( date:, stars: )
104
- @date = date
105
- @stars = stars
106
- @next = nil
107
- end
108
129
 
109
- ## link items (append item at the end/tail)
110
- def append( item )
111
- @next = item
112
- item.prev = self
113
- end
114
130
 
115
- def diff_days
116
- if @next
117
- ## note: use jd=julian days for calculation
118
- @date.jd - @next.date.jd
119
- else
120
- nil ## last item (tail)
121
- end
122
- end
131
+ ########################################
132
+ #### history / by date record
133
+ rec = {}
123
134
 
124
- def diff_stars
125
- if @next
126
- @stars - @next.stars
127
- else
128
- nil ## last item (tail)
129
- end
130
- end
131
- end ## class HistoryItem
135
+ rec['stargazers_count'] = repo.data['stargazers_count']
136
+ rec['forks_count'] = repo.data['forks_count']
132
137
 
133
138
 
134
- def build_history( timeseries )
135
- items = []
139
+ today = Date.today.strftime( '%Y-%m-%d' ) ## e.g. 2016-09-27
140
+ puts "add record #{today} to history..."
141
+ pp rec # check if stargazers_count is a number (NOT a string)
136
142
 
137
- keys = timeseries.keys.sort.reverse ## newest (latest) items first
138
- keys.each do |key|
139
- h = timeseries[ key ]
143
+ history = @data[ 'history' ] ||= {}
144
+ item = history[ today ] ||= {}
145
+ ## note: merge "in-place" (overwrite with new - but keep other key/value pairs if any e.g. pageviews, clones, etc.)
146
+ item.merge!( rec )
140
147
 
141
- item = HistoryItem.new(
142
- date: Date.strptime( key, '%Y-%m-%d' ),
143
- stars: h['stargazers_count'] || 0 )
144
148
 
145
- ## link items
146
- last_item = items[-1]
147
- last_item.append( item ) if last_item ## if not nil? append (note first item has no prev item)
148
149
 
149
- items << item
150
+ ##########################
151
+ ## also check / keep track of (latest) commit
152
+ if commits
153
+ raise ArgumentError, "Github::Resource expected; got #{commits.class.name}" unless commits.is_a?( Github::Resource )
154
+
155
+ puts "update - last commit:"
156
+ ## pp commits
157
+ commit = {
158
+ 'committer' => {
159
+ 'date' => commits.data[0]['commit']['committer']['date'],
160
+ 'name' => commits.data[0]['commit']['committer']['name']
161
+ },
162
+ 'author' => {
163
+ 'date' => commits.data[0]['commit']['author']['date'],
164
+ 'name' => commits.data[0]['commit']['author']['name']
165
+ },
166
+ 'message' => commits.data[0]['commit']['message']
167
+ }
168
+
169
+ ## for now store only the latest commit (e.g. a single commit in an array)
170
+ @data[ 'commits' ] = [commit]
150
171
  end
151
172
 
152
- ## todo/check: return [] for empty items array (items.empty?) - why?? why not??
153
- if items.empty?
154
- nil
155
- else
156
- items
173
+ if topics
174
+ raise ArgumentError, "Github::Resource expected; got #{topics.class.name}" unless topics.is_a?( Github::Resource )
175
+
176
+ puts "update - topics:"
177
+ ## e.g.
178
+ # {"names"=>
179
+ # ["opendata",
180
+ # "football",
181
+ # "seriea",
182
+ # "italia",
183
+ # "italy",
184
+ # "juve",
185
+ # "inter",
186
+ # "napoli",
187
+ # "roma",
188
+ # "sqlite"]}
189
+ #
190
+ # {"names"=>[]}
191
+
192
+ @data[ 'topics' ] = topics.data['names']
157
193
  end
158
- end ## method build_history
159
194
 
160
195
 
196
+ if languages
197
+ raise ArgumentError, "Github::Resource expected; got #{languages.class.name}" unless languages.is_a?( Github::Resource )
161
198
 
162
- def calc_diff_stars( samples: 3, days: 30 )
163
- ## samples: use n history item samples e.g. 3 samples
164
- ## days e.g. 7 days (per week), 30 days (per month)
199
+ puts "update - languages:"
165
200
 
166
- if history.nil?
167
- nil ## todo/check: return 0.0 too - why? why not?
168
- elsif history.size == 1
169
- ## just one item; CANNOT calc diff; return zero
170
- 0.0
171
- else
172
- idx = [history.size, samples].min ## calc last index
173
- last = history[idx-1]
174
- first = history[0]
175
201
 
176
- diff_days = first.date.jd - last.date.jd
177
- diff_stars = first.stars - last.stars
178
-
179
- ## note: use factor 1000 for fixed integer division
180
- ## converts to float at the end
181
-
182
- ## todo: check for better way (convert to float upfront - why? why not?)
183
-
184
- diff = (diff_stars * days * 1000) / diff_days
185
- puts "diff=#{diff}:#{diff.class.name}" ## check if it's a float
186
- (diff.to_f/1000.0)
187
- end
188
- end
202
+ ## e.g.
203
+ ## {"Ruby"=>1020599, "HTML"=>3219, "SCSS"=>508, "CSS"=>388}
204
+ ## or might be empty
205
+ ## {}
189
206
 
190
- def history_str
191
- ## returns "pretty printed" history as string buffer
192
- buf = ''
193
- buf << "[#{history.size}]: "
194
-
195
- history.each do |item|
196
- buf << "#{item.stars}"
197
-
198
- diff_stars = item.diff_stars
199
- diff_days = item.diff_days
200
- if diff_stars && diff_days ## note: last item has no diffs
201
- if diff_stars > 0 || diff_stars < 0
202
- if diff_stars > 0
203
- buf << " (+#{diff_stars}"
204
- else
205
- buf << " (#{diff_stars}"
206
- end
207
- buf << " in #{diff_days}d) "
208
- else ## diff_stars == 0
209
- buf << " (#{diff_days}d) "
210
- end
207
+ @data[ 'languages' ] = languages.data
211
208
  end
212
- end
213
- buf
214
- end # method history_str
215
-
216
-
217
- ###############################
218
- ## fetch / read / write methods
219
-
220
- def fetch( gh ) ## update stats / fetch data from github via api
221
- puts "fetching #{full_name}..."
222
- repo = gh.repo( full_name )
223
-
224
- ## e.g. 2015-05-11T20:21:43Z
225
- ## puts Time.iso8601( repo.data['created_at'] )
226
- @data['created_at'] = repo.data['created_at']
227
- @data['updated_at'] = repo.data['updated_at']
228
- @data['pushed_at'] = repo.data['pushed_at']
229
209
 
230
- @data['size'] = repo.data['size'] # size in kb (kilobyte)
231
-
232
- rec = {}
233
-
234
- puts "stargazers_count"
235
- puts repo.data['stargazers_count']
236
- rec['stargazers_count'] = repo.data['stargazers_count']
237
-
238
- today = Date.today.strftime( '%Y-%m-%d' ) ## e.g. 2016-09-27
239
- puts "add record #{today} to history..."
240
- pp rec # check if stargazers_count is a number (NOT a string)
241
-
242
- @data[ 'history' ] ||= {}
243
- @data[ 'history' ][ today ] = rec
210
+ pp @data
244
211
 
245
- ##########################
246
- ## also check / keep track of (latest) commit
247
- commits = gh.repo_commits( full_name )
248
- puts "last commit/update:"
249
- ## pp commits
250
- commit = {
251
- 'committer' => {
252
- 'date' => commits.data[0]['commit']['committer']['date'],
253
- 'name' => commits.data[0]['commit']['committer']['name']
254
- },
255
- 'author' => {
256
- 'date' => commits.data[0]['commit']['author']['date'],
257
- 'name' => commits.data[0]['commit']['author']['name']
258
- },
259
- 'message' => commits.data[0]['commit']['message']
260
- }
261
-
262
- ## for now store only the latest commit (e.g. a single commit in an array)
263
- @data[ 'commits' ] = [commit]
264
212
 
265
- pp @data
213
+ ## reset (invalidate) cached values from data hash
214
+ ## use after reading or fetching
215
+ @cache = {}
266
216
 
267
- reset_cache
268
217
  self ## return self for (easy chaining)
269
218
  end
270
219
 
271
220
 
221
+ ########################################
222
+ ## read / write methods / helpers
223
+ def write
224
+ basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
225
+ letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
226
+ data_dir = "#{Hubba.config.data_dir}/#{letter}"
227
+ path = "#{data_dir}/#{basename}.json"
272
228
 
273
- def write( data_dir: './data' )
274
- basename = full_name.gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
275
- puts "writing stats to #{basename}..."
276
- File.open( "#{data_dir}/#{basename}.json", 'w:utf-8' ) do |f|
277
- f.write JSON.pretty_generate( data )
229
+ puts " writing stats to #{basename} (#{data_dir})..."
230
+
231
+ FileUtils.mkdir_p( File.dirname( path )) ## make sure path exists
232
+ File.open( path, 'w:utf-8' ) do |f|
233
+ f.write( JSON.pretty_generate( @data ))
278
234
  end
279
235
  self ## return self for (easy chaining)
280
- end
236
+ end # method write
281
237
 
282
238
 
283
- def read( data_dir: './data' )
239
+ def read
284
240
  ## note: skip reading if file not present
285
- basename = full_name.gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
286
- filename = "#{data_dir}/#{basename}.json"
287
- if File.exist?( filename )
288
- puts "reading stats from #{basename}..."
289
- json = File.open( filename, 'r:utf-8' ) { |file| file.read } ## todo/fix: use read_utf8
241
+ basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
242
+ letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
243
+ data_dir = "#{Hubba.config.data_dir}/#{letter}"
244
+ path = "#{data_dir}/#{basename}.json"
245
+
246
+ if File.exist?( path )
247
+ puts " reading stats from #{basename} (#{data_dir})..."
248
+ json = File.open( path, 'r:utf-8' ) { |f| f.read }
290
249
  @data = JSON.parse( json )
291
- reset_cache
250
+
251
+ ## reset (invalidate) cached values from data hash
252
+ ## use after reading or fetching
253
+ @cache = {}
292
254
  else
293
- puts "skipping reading stats from #{basename} -- file not found"
255
+ puts "!! WARN: - skipping reading stats from #{basename} -- file not found"
294
256
  end
295
257
  self ## return self for (easy chaining)
296
- end
297
-
258
+ end # method read
298
259
  end # class Stats
299
260
 
300
261