hubba 0.7.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/hubba/stats.rb CHANGED
@@ -1,246 +1,284 @@
1
- module Hubba
2
-
3
- ####
4
- # keep track of repo stats over time (with history hash)
5
-
6
- class Stats ## todo/check: rename to GithubRepoStats or RepoStats - why? why not?
7
-
8
- def initialize( full_name )
9
- @data = {}
10
- @data['full_name'] = full_name # e.g. poole/hyde etc.
11
-
12
- @cache = {} ## keep a lookup cache - why? why not?
13
- end
14
-
15
-
16
- ##################
17
- ## update
18
- def update_traffic( clones: nil,
19
- views: nil,
20
- paths: nil,
21
- referrers: nil )
22
-
23
- traffic = @data[ 'traffic' ] ||= {}
24
-
25
- summary = traffic['summary'] ||= {}
26
- history = traffic['history'] ||= {}
27
-
28
-
29
- if views
30
- raise ArgumentError, "Github::Resource expected; got #{views.class.name}" unless views.is_a?( Github::Resource )
31
- =begin
32
- {"count"=>1526,
33
- "uniques"=>287,
34
- "views"=>
35
- [{"timestamp"=>"2020-09-27T00:00:00Z", "count"=>52, "uniques"=>13},
36
- {"timestamp"=>"2020-09-28T00:00:00Z", "count"=>108, "uniques"=>28},
37
- ...
38
- ]}>
39
- =end
40
-
41
- ## keep lastest (summary) record of last two weeks (14 days)
42
- summary['views'] = { 'count' => views.data['count'],
43
- 'uniques' => views.data['uniques'] }
44
-
45
- ## update history / day-by-day items / timeline
46
- views.data['views'].each do |view|
47
- # e.g. "2020-09-27T00:00:00Z"
48
- timestamp = DateTime.strptime( view['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
49
-
50
- item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
51
- ## note: merge "in-place"
52
- item.merge!( { 'views' => { 'count' => view['count'],
53
- 'uniques' => view['uniques'] }} )
54
- end
55
- end
56
-
57
- if clones
58
- raise ArgumentError, "Github::Resource expected; got #{clones.class.name}" unless clones.is_a?( Github::Resource )
59
- =begin
60
- {"count"=>51,
61
- "uniques"=>17,
62
- "clones"=>
63
- [{"timestamp"=>"2020-09-26T00:00:00Z", "count"=>1, "uniques"=>1},
64
- {"timestamp"=>"2020-09-27T00:00:00Z", "count"=>2, "uniques"=>1},
65
- ...
66
- ]}
67
- =end
68
-
69
- ## keep lastest (summary) record of last two weeks (14 days)
70
- summary['clones'] = { 'count' => clones.data['count'],
71
- 'uniques' => clones.data['uniques'] }
72
-
73
- ## update history / day-by-day items / timeline
74
- clones.data['clones'].each do |clone|
75
- # e.g. "2020-09-27T00:00:00Z"
76
- timestamp = DateTime.strptime( clone['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
77
-
78
- item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
79
- ## note: merge "in-place"
80
- item.merge!( { 'clones' => { 'count' => clone['count'],
81
- 'uniques' => clone['uniques'] }} )
82
- end
83
- end
84
-
85
- if paths
86
- raise ArgumentError, "Github::Resource expected; got #{paths.class.name}" unless paths.is_a?( Github::Resource )
87
- =begin
88
- [{"path"=>"/openfootball/england",
89
- "title"=>
90
- "openfootball/england: Free open public domain football data for England (and ...",
91
- "count"=>394,
92
- "uniques"=>227},
93
- =end
94
- summary['paths'] = paths.data
95
- end
96
-
97
- if referrers
98
- raise ArgumentError, "Github::Resource expected; got #{referrers.class.name}" unless referrers.is_a?( Github::Resource )
99
- =begin
100
- [{"referrer"=>"github.com", "count"=>327, "uniques"=>198},
101
- {"referrer"=>"openfootball.github.io", "count"=>71, "uniques"=>54},
102
- {"referrer"=>"Google", "count"=>5, "uniques"=>5},
103
- {"referrer"=>"reddit.com", "count"=>4, "uniques"=>4}]
104
- =end
105
- summary['referrers'] = referrers.data
106
- end
107
- end # method update_traffic
108
-
109
-
110
- def update( repo,
111
- commits: nil,
112
- topics: nil ) ## update stats / fetch data from github via api
113
- raise ArgumentError, "Github::Resource expected; got #{repo.class.name}" unless repo.is_a?( Github::Resource )
114
-
115
- ## e.g. 2015-05-11T20:21:43Z
116
- ## puts Time.iso8601( repo.data['created_at'] )
117
- @data['created_at'] = repo.data['created_at']
118
- @data['updated_at'] = repo.data['updated_at']
119
- @data['pushed_at'] = repo.data['pushed_at']
120
-
121
- @data['size'] = repo.data['size'] # note: size in kb (kilobyte)
122
-
123
- @data['description'] = repo.data['description']
124
- @data['language'] = repo.data['language'] ## note: might be nil!!!
125
-
126
-
127
-
128
- ########################################
129
- #### history / by date record
130
- rec = {}
131
-
132
- rec['stargazers_count'] = repo.data['stargazers_count']
133
- rec['forks_count'] = repo.data['forks_count']
134
-
135
-
136
- today = Date.today.strftime( '%Y-%m-%d' ) ## e.g. 2016-09-27
137
- puts "add record #{today} to history..."
138
- pp rec # check if stargazers_count is a number (NOT a string)
139
-
140
- history = @data[ 'history' ] ||= {}
141
- item = history[ today ] ||= {}
142
- ## note: merge "in-place" (overwrite with new - but keep other key/value pairs if any e.g. pageviews, clones, etc.)
143
- item.merge!( rec )
144
-
145
-
146
-
147
- ##########################
148
- ## also check / keep track of (latest) commit
149
- if commits
150
- raise ArgumentError, "Github::Resource expected; got #{commits.class.name}" unless commits.is_a?( Github::Resource )
151
-
152
- puts "update - last commit:"
153
- ## pp commits
154
- commit = {
155
- 'committer' => {
156
- 'date' => commits.data[0]['commit']['committer']['date'],
157
- 'name' => commits.data[0]['commit']['committer']['name']
158
- },
159
- 'author' => {
160
- 'date' => commits.data[0]['commit']['author']['date'],
161
- 'name' => commits.data[0]['commit']['author']['name']
162
- },
163
- 'message' => commits.data[0]['commit']['message']
164
- }
165
-
166
- ## for now store only the latest commit (e.g. a single commit in an array)
167
- @data[ 'commits' ] = [commit]
168
- end
169
-
170
- if topics
171
- raise ArgumentError, "Github::Resource expected; got #{topics.class.name}" unless topics.is_a?( Github::Resource )
172
-
173
- puts "update - topics:"
174
- ## e.g.
175
- # {"names"=>
176
- # ["opendata",
177
- # "football",
178
- # "seriea",
179
- # "italia",
180
- # "italy",
181
- # "juve",
182
- # "inter",
183
- # "napoli",
184
- # "roma",
185
- # "sqlite"]}
186
- #
187
- # {"names"=>[]}
188
-
189
- @data[ 'topics' ] = topics.data['names']
190
- end
191
-
192
-
193
- pp @data
194
-
195
-
196
-
197
- ## reset (invalidate) cached values from data hash
198
- ## use after reading or fetching
199
- @cache = {}
200
-
201
- self ## return self for (easy chaining)
202
- end
203
-
204
-
205
- ########################################
206
- ## read / write methods / helpers
207
- def write
208
- basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
209
- letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
210
- data_dir = "#{Hubba.config.data_dir}/#{letter}"
211
- path = "#{data_dir}/#{basename}.json"
212
-
213
- puts " writing stats to #{basename} (#{data_dir})..."
214
-
215
- FileUtils.mkdir_p( File.dirname( path )) ## make sure path exists
216
- File.open( path, 'w:utf-8' ) do |f|
217
- f.write( JSON.pretty_generate( @data ))
218
- end
219
- self ## return self for (easy chaining)
220
- end # method write
221
-
222
-
223
- def read
224
- ## note: skip reading if file not present
225
- basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
226
- letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
227
- data_dir = "#{Hubba.config.data_dir}/#{letter}"
228
- path = "#{data_dir}/#{basename}.json"
229
-
230
- if File.exist?( path )
231
- puts " reading stats from #{basename} (#{data_dir})..."
232
- json = File.open( path, 'r:utf-8' ) { |f| f.read }
233
- @data = JSON.parse( json )
234
-
235
- ## reset (invalidate) cached values from data hash
236
- ## use after reading or fetching
237
- @cache = {}
238
- else
239
- puts "!! WARN: - skipping reading stats from #{basename} -- file not found"
240
- end
241
- self ## return self for (easy chaining)
242
- end # method read
243
- end # class Stats
244
-
245
-
246
- end # module Hubba
1
+ module Hubba
2
+
3
+ ####
4
+ # keep track of repo stats over time (with history hash)
5
+
6
+ class Stats ## todo/check: rename to GithubRepoStats or RepoStats - why? why not?
7
+
8
+ def initialize( full_name )
9
+ @data = {}
10
+ @data['full_name'] = full_name # e.g. poole/hyde etc.
11
+
12
+ @cache = {} ## keep a lookup cache - why? why not?
13
+ end
14
+
15
+
16
+ ##################
17
+ ## update
18
+ def update_traffic( clones: nil,
19
+ views: nil,
20
+ paths: nil,
21
+ referrers: nil )
22
+
23
+ traffic = @data[ 'traffic' ] ||= {}
24
+
25
+ summary = traffic['summary'] ||= {}
26
+ history = traffic['history'] ||= {}
27
+
28
+
29
+ if views
30
+ raise ArgumentError, "Github::Resource expected; got #{views.class.name}" unless views.is_a?( Github::Resource )
31
+ =begin
32
+ {"count"=>1526,
33
+ "uniques"=>287,
34
+ "views"=>
35
+ [{"timestamp"=>"2020-09-27T00:00:00Z", "count"=>52, "uniques"=>13},
36
+ {"timestamp"=>"2020-09-28T00:00:00Z", "count"=>108, "uniques"=>28},
37
+ ...
38
+ ]}>
39
+ =end
40
+
41
+ ## keep lastest (summary) record of last two weeks (14 days)
42
+ summary['views'] = { 'count' => views.data['count'],
43
+ 'uniques' => views.data['uniques'] }
44
+
45
+ ## update history / day-by-day items / timeline
46
+ views.data['views'].each do |view|
47
+ # e.g. "2020-09-27T00:00:00Z"
48
+ timestamp = DateTime.strptime( view['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
49
+
50
+ item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
51
+ ## note: merge "in-place"
52
+ item.merge!( { 'views' => { 'count' => view['count'],
53
+ 'uniques' => view['uniques'] }} )
54
+ end
55
+ end
56
+
57
+ if clones
58
+ raise ArgumentError, "Github::Resource expected; got #{clones.class.name}" unless clones.is_a?( Github::Resource )
59
+ =begin
60
+ {"count"=>51,
61
+ "uniques"=>17,
62
+ "clones"=>
63
+ [{"timestamp"=>"2020-09-26T00:00:00Z", "count"=>1, "uniques"=>1},
64
+ {"timestamp"=>"2020-09-27T00:00:00Z", "count"=>2, "uniques"=>1},
65
+ ...
66
+ ]}
67
+ =end
68
+
69
+ ## keep lastest (summary) record of last two weeks (14 days)
70
+ summary['clones'] = { 'count' => clones.data['count'],
71
+ 'uniques' => clones.data['uniques'] }
72
+
73
+ ## update history / day-by-day items / timeline
74
+ clones.data['clones'].each do |clone|
75
+ # e.g. "2020-09-27T00:00:00Z"
76
+ timestamp = DateTime.strptime( clone['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
77
+
78
+ item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
79
+ ## note: merge "in-place"
80
+ item.merge!( { 'clones' => { 'count' => clone['count'],
81
+ 'uniques' => clone['uniques'] }} )
82
+ end
83
+ end
84
+
85
+ if paths
86
+ raise ArgumentError, "Github::Resource expected; got #{paths.class.name}" unless paths.is_a?( Github::Resource )
87
+ =begin
88
+ [{"path"=>"/openfootball/england",
89
+ "title"=>
90
+ "openfootball/england: Free open public domain football data for England (and ...",
91
+ "count"=>394,
92
+ "uniques"=>227},
93
+ =end
94
+ summary['paths'] = paths.data
95
+ end
96
+
97
+ if referrers
98
+ raise ArgumentError, "Github::Resource expected; got #{referrers.class.name}" unless referrers.is_a?( Github::Resource )
99
+ =begin
100
+ [{"referrer"=>"github.com", "count"=>327, "uniques"=>198},
101
+ {"referrer"=>"openfootball.github.io", "count"=>71, "uniques"=>54},
102
+ {"referrer"=>"Google", "count"=>5, "uniques"=>5},
103
+ {"referrer"=>"reddit.com", "count"=>4, "uniques"=>4}]
104
+ =end
105
+ summary['referrers'] = referrers.data
106
+ end
107
+ end # method update_traffic
108
+
109
+
110
+ def update( repo,
111
+ commits: nil,
112
+ topics: nil,
113
+ languages: nil ) ## update stats / fetch data from github via api
114
+ raise ArgumentError, "Github::Resource expected; got #{repo.class.name}" unless repo.is_a?( Github::Resource )
115
+
116
+ ## e.g. 2015-05-11T20:21:43Z
117
+ ## puts Time.iso8601( repo.data['created_at'] )
118
+ @data['created_at'] = repo.data['created_at']
119
+ @data['updated_at'] = repo.data['updated_at']
120
+ @data['pushed_at'] = repo.data['pushed_at']
121
+
122
+ @data['size'] = repo.data['size'] # note: size in kb (kilobyte)
123
+
124
+ @data['description'] = repo.data['description']
125
+
126
+ ### todo/check - remove language (always use languages - see below) - why? why not?
127
+ @data['language'] = repo.data['language'] ## note: might be nil!!!
128
+
129
+
130
+
131
+ ########################################
132
+ #### history / by date record
133
+ rec = {}
134
+
135
+ rec['stargazers_count'] = repo.data['stargazers_count']
136
+ rec['forks_count'] = repo.data['forks_count']
137
+
138
+
139
+ today = Date.today.strftime( '%Y-%m-%d' ) ## e.g. 2016-09-27
140
+ puts "add record #{today} to history..."
141
+ pp rec # check if stargazers_count is a number (NOT a string)
142
+
143
+ history = @data[ 'history' ] ||= {}
144
+ item = history[ today ] ||= {}
145
+ ## note: merge "in-place" (overwrite with new - but keep other key/value pairs if any e.g. pageviews, clones, etc.)
146
+ item.merge!( rec )
147
+
148
+
149
+
150
+ ##########################
151
+ ## also check / keep track of (latest) commit
152
+ if commits
153
+ raise ArgumentError, "Github::Resource expected; got #{commits.class.name}" unless commits.is_a?( Github::Resource )
154
+
155
+ puts "update - last commit:"
156
+ ## pp commits
157
+ commit = {
158
+ 'committer' => {
159
+ 'date' => commits.data[0]['commit']['committer']['date'],
160
+ 'name' => commits.data[0]['commit']['committer']['name']
161
+ },
162
+ 'author' => {
163
+ 'date' => commits.data[0]['commit']['author']['date'],
164
+ 'name' => commits.data[0]['commit']['author']['name']
165
+ },
166
+ 'message' => commits.data[0]['commit']['message']
167
+ }
168
+
169
+ ## for now store only the latest commit (e.g. a single commit in an array)
170
+ @data[ 'commits' ] = [commit]
171
+ end
172
+
173
+ if topics
174
+ raise ArgumentError, "Github::Resource expected; got #{topics.class.name}" unless topics.is_a?( Github::Resource )
175
+
176
+ puts "update - topics:"
177
+ ## e.g.
178
+ # {"names"=>
179
+ # ["opendata",
180
+ # "football",
181
+ # "seriea",
182
+ # "italia",
183
+ # "italy",
184
+ # "juve",
185
+ # "inter",
186
+ # "napoli",
187
+ # "roma",
188
+ # "sqlite"]}
189
+ #
190
+ # {"names"=>[]}
191
+
192
+ @data[ 'topics' ] = topics.data['names']
193
+ end
194
+
195
+
196
+ if languages
197
+ raise ArgumentError, "Github::Resource expected; got #{languages.class.name}" unless languages.is_a?( Github::Resource )
198
+
199
+ puts "update - languages:"
200
+
201
+
202
+ ## e.g.
203
+ ## {"Ruby"=>1020599, "HTML"=>3219, "SCSS"=>508, "CSS"=>388}
204
+ ## or might be empty
205
+ ## {}
206
+
207
+ @data[ 'languages' ] = languages.data
208
+ end
209
+
210
+ pp @data
211
+
212
+
213
+ ## reset (invalidate) cached values from data hash
214
+ ## use after reading or fetching
215
+ @cache = {}
216
+
217
+ self ## return self for (easy chaining)
218
+ end
219
+
220
+
221
+ ########################################
222
+ ## read / write methods / helpers
223
+ def write
224
+ ## note: always downcase basename - why? why not?
225
+ basename = @data['full_name'].gsub( '/', '~' ).downcase ## e.g. poole/hyde become poole~hyde
226
+ letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
227
+ data_dir = "#{Hubba.config.data_dir}/#{letter}"
228
+ path = "#{data_dir}/#{basename}.json"
229
+
230
+ puts " writing stats to #{basename} (#{data_dir})..."
231
+
232
+ FileUtils.mkdir_p( File.dirname( path )) ## make sure path exists
233
+ File.open( path, 'w:utf-8' ) do |f|
234
+ f.write( JSON.pretty_generate( @data ))
235
+ end
236
+ self ## return self for (easy chaining)
237
+ end # method write
238
+
239
+
240
+ def read
241
+ ## note: always downcase basename - why? why not?
242
+ ## note: skip reading if file not present
243
+ basename = @data['full_name'].gsub( '/', '~' ).downcase ## e.g. poole/hyde become poole~hyde
244
+ letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
245
+ data_dir = "#{Hubba.config.data_dir}/#{letter}"
246
+ path = "#{data_dir}/#{basename}.json"
247
+
248
+ if File.exist?( path )
249
+ puts " reading stats from #{basename} (#{data_dir})..."
250
+ json = File.open( path, 'r:utf-8' ) { |f| f.read }
251
+ @data = JSON.parse( json )
252
+
253
+ ## reset (invalidate) cached values from data hash
254
+ ## use after reading or fetching
255
+ @cache = {}
256
+ else
257
+ puts "!! WARN: - skipping reading stats from #{basename} -- file not found"
258
+ end
259
+ self ## return self for (easy chaining)
260
+ end # method read
261
+
262
+ def read_old
263
+ ## note: skip reading if file not present
264
+ basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
265
+ data_dir = Hubba.config.data_dir
266
+ path = "#{data_dir}/#{basename}.json"
267
+
268
+ if File.exist?( path )
269
+ puts " reading stats from #{basename} (#{data_dir})..."
270
+ json = File.open( path, 'r:utf-8' ) { |f| f.read }
271
+ @data = JSON.parse( json )
272
+
273
+ ## reset (invalidate) cached values from data hash
274
+ ## use after reading or fetching
275
+ @cache = {}
276
+ else
277
+ puts "!! WARN: - skipping reading stats from #{basename} -- file not found"
278
+ end
279
+ self ## return self for (easy chaining)
280
+ end # method read_old
281
+ end # class Stats
282
+
283
+
284
+ end # module Hubba
data/lib/hubba/update.rb CHANGED
@@ -1,44 +1,47 @@
1
- module Hubba
2
-
3
- def self.update_stats( hash_or_path='./repos.yml' ) ## move to reposet e.g. Reposet#update_status!!!!
4
- h = if hash_or_path.is_a?( String ) ## assume it is a file path!!!
5
- path = hash_or_path
6
- YAML.load_file( path )
7
- else
8
- hash_or_path # assume its a hash / reposet already!!!
9
- end
10
-
11
- gh = Github.new
12
-
13
- h.each do |org_with_counter,names|
14
-
15
- ## remove optional number from key e.g.
16
- ## mrhydescripts (3) => mrhydescripts
17
- ## footballjs (4) => footballjs
18
- ## etc.
19
- org = org_with_counter.sub( /\([0-9]+\)/, '' ).strip
20
-
21
- ## puts " -- #{key_with_counter} [#{key}] --"
22
-
23
- names.each do |name|
24
- full_name = "#{org}/#{name}"
25
-
26
- ## puts " fetching stats #{count+1}/#{repo_count} - >#{full_name}<..."
27
- stats = Stats.new( full_name )
28
- stats.read
29
-
30
- puts "update >#{full_name}< [1/3] - fetching repo..."
31
- repo = gh.repo( full_name )
32
- puts "update >#{full_name}< [2/3] - fetching repo commits ..."
33
- commits = gh.repo_commits( full_name )
34
- puts "update >#{full_name}< [3/3] - fetching repo topics ..."
35
- topics = gh.repo_topics( full_name )
36
-
37
- stats.update( repo,
38
- commits: commits,
39
- topics: topics )
40
- stats.write
41
- end
42
- end
43
- end
1
+ module Hubba
2
+
3
+ def self.update_stats( hash_or_path='./repos.yml' ) ## move to reposet e.g. Reposet#update_status!!!!
4
+ h = if hash_or_path.is_a?( String ) ## assume it is a file path!!!
5
+ path = hash_or_path
6
+ YAML.load_file( path )
7
+ else
8
+ hash_or_path # assume its a hash / reposet already!!!
9
+ end
10
+
11
+ gh = Github.new
12
+
13
+ h.each do |org_with_counter,names|
14
+
15
+ ## remove optional number from key e.g.
16
+ ## mrhydescripts (3) => mrhydescripts
17
+ ## footballjs (4) => footballjs
18
+ ## etc.
19
+ org = org_with_counter.sub( /\([0-9]+\)/, '' ).strip
20
+
21
+ ## puts " -- #{key_with_counter} [#{key}] --"
22
+
23
+ names.each do |name|
24
+ full_name = "#{org}/#{name}"
25
+
26
+ ## puts " fetching stats #{count+1}/#{repo_count} - >#{full_name}<..."
27
+ stats = Stats.new( full_name )
28
+ stats.read
29
+
30
+ puts "update >#{full_name}< [1/4] - fetching repo..."
31
+ repo = gh.repo( full_name )
32
+ puts "update >#{full_name}< [2/4] - fetching repo commits ..."
33
+ commits = gh.repo_commits( full_name )
34
+ puts "update >#{full_name}< [3/4] - fetching repo topics ..."
35
+ topics = gh.repo_topics( full_name )
36
+ puts "update >#{full_name}< [4/4] - fetching repo languages ..."
37
+ languages = gh.repo_languages( full_name )
38
+
39
+ stats.update( repo,
40
+ commits: commits,
41
+ topics: topics,
42
+ languages: languages )
43
+ stats.write
44
+ end
45
+ end
46
+ end
44
47
  end # module Hubba