hubba 0.7.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hubba/stats.rb CHANGED
@@ -1,246 +1,284 @@
1
- module Hubba
2
-
3
- ####
4
- # keep track of repo stats over time (with history hash)
5
-
6
- class Stats ## todo/check: rename to GithubRepoStats or RepoStats - why? why not?
7
-
8
- def initialize( full_name )
9
- @data = {}
10
- @data['full_name'] = full_name # e.g. poole/hyde etc.
11
-
12
- @cache = {} ## keep a lookup cache - why? why not?
13
- end
14
-
15
-
16
- ##################
17
- ## update
18
- def update_traffic( clones: nil,
19
- views: nil,
20
- paths: nil,
21
- referrers: nil )
22
-
23
- traffic = @data[ 'traffic' ] ||= {}
24
-
25
- summary = traffic['summary'] ||= {}
26
- history = traffic['history'] ||= {}
27
-
28
-
29
- if views
30
- raise ArgumentError, "Github::Resource expected; got #{views.class.name}" unless views.is_a?( Github::Resource )
31
- =begin
32
- {"count"=>1526,
33
- "uniques"=>287,
34
- "views"=>
35
- [{"timestamp"=>"2020-09-27T00:00:00Z", "count"=>52, "uniques"=>13},
36
- {"timestamp"=>"2020-09-28T00:00:00Z", "count"=>108, "uniques"=>28},
37
- ...
38
- ]}>
39
- =end
40
-
41
- ## keep lastest (summary) record of last two weeks (14 days)
42
- summary['views'] = { 'count' => views.data['count'],
43
- 'uniques' => views.data['uniques'] }
44
-
45
- ## update history / day-by-day items / timeline
46
- views.data['views'].each do |view|
47
- # e.g. "2020-09-27T00:00:00Z"
48
- timestamp = DateTime.strptime( view['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
49
-
50
- item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
51
- ## note: merge "in-place"
52
- item.merge!( { 'views' => { 'count' => view['count'],
53
- 'uniques' => view['uniques'] }} )
54
- end
55
- end
56
-
57
- if clones
58
- raise ArgumentError, "Github::Resource expected; got #{clones.class.name}" unless clones.is_a?( Github::Resource )
59
- =begin
60
- {"count"=>51,
61
- "uniques"=>17,
62
- "clones"=>
63
- [{"timestamp"=>"2020-09-26T00:00:00Z", "count"=>1, "uniques"=>1},
64
- {"timestamp"=>"2020-09-27T00:00:00Z", "count"=>2, "uniques"=>1},
65
- ...
66
- ]}
67
- =end
68
-
69
- ## keep lastest (summary) record of last two weeks (14 days)
70
- summary['clones'] = { 'count' => clones.data['count'],
71
- 'uniques' => clones.data['uniques'] }
72
-
73
- ## update history / day-by-day items / timeline
74
- clones.data['clones'].each do |clone|
75
- # e.g. "2020-09-27T00:00:00Z"
76
- timestamp = DateTime.strptime( clone['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
77
-
78
- item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
79
- ## note: merge "in-place"
80
- item.merge!( { 'clones' => { 'count' => clone['count'],
81
- 'uniques' => clone['uniques'] }} )
82
- end
83
- end
84
-
85
- if paths
86
- raise ArgumentError, "Github::Resource expected; got #{paths.class.name}" unless paths.is_a?( Github::Resource )
87
- =begin
88
- [{"path"=>"/openfootball/england",
89
- "title"=>
90
- "openfootball/england: Free open public domain football data for England (and ...",
91
- "count"=>394,
92
- "uniques"=>227},
93
- =end
94
- summary['paths'] = paths.data
95
- end
96
-
97
- if referrers
98
- raise ArgumentError, "Github::Resource expected; got #{referrers.class.name}" unless referrers.is_a?( Github::Resource )
99
- =begin
100
- [{"referrer"=>"github.com", "count"=>327, "uniques"=>198},
101
- {"referrer"=>"openfootball.github.io", "count"=>71, "uniques"=>54},
102
- {"referrer"=>"Google", "count"=>5, "uniques"=>5},
103
- {"referrer"=>"reddit.com", "count"=>4, "uniques"=>4}]
104
- =end
105
- summary['referrers'] = referrers.data
106
- end
107
- end # method update_traffic
108
-
109
-
110
- def update( repo,
111
- commits: nil,
112
- topics: nil ) ## update stats / fetch data from github via api
113
- raise ArgumentError, "Github::Resource expected; got #{repo.class.name}" unless repo.is_a?( Github::Resource )
114
-
115
- ## e.g. 2015-05-11T20:21:43Z
116
- ## puts Time.iso8601( repo.data['created_at'] )
117
- @data['created_at'] = repo.data['created_at']
118
- @data['updated_at'] = repo.data['updated_at']
119
- @data['pushed_at'] = repo.data['pushed_at']
120
-
121
- @data['size'] = repo.data['size'] # note: size in kb (kilobyte)
122
-
123
- @data['description'] = repo.data['description']
124
- @data['language'] = repo.data['language'] ## note: might be nil!!!
125
-
126
-
127
-
128
- ########################################
129
- #### history / by date record
130
- rec = {}
131
-
132
- rec['stargazers_count'] = repo.data['stargazers_count']
133
- rec['forks_count'] = repo.data['forks_count']
134
-
135
-
136
- today = Date.today.strftime( '%Y-%m-%d' ) ## e.g. 2016-09-27
137
- puts "add record #{today} to history..."
138
- pp rec # check if stargazers_count is a number (NOT a string)
139
-
140
- history = @data[ 'history' ] ||= {}
141
- item = history[ today ] ||= {}
142
- ## note: merge "in-place" (overwrite with new - but keep other key/value pairs if any e.g. pageviews, clones, etc.)
143
- item.merge!( rec )
144
-
145
-
146
-
147
- ##########################
148
- ## also check / keep track of (latest) commit
149
- if commits
150
- raise ArgumentError, "Github::Resource expected; got #{commits.class.name}" unless commits.is_a?( Github::Resource )
151
-
152
- puts "update - last commit:"
153
- ## pp commits
154
- commit = {
155
- 'committer' => {
156
- 'date' => commits.data[0]['commit']['committer']['date'],
157
- 'name' => commits.data[0]['commit']['committer']['name']
158
- },
159
- 'author' => {
160
- 'date' => commits.data[0]['commit']['author']['date'],
161
- 'name' => commits.data[0]['commit']['author']['name']
162
- },
163
- 'message' => commits.data[0]['commit']['message']
164
- }
165
-
166
- ## for now store only the latest commit (e.g. a single commit in an array)
167
- @data[ 'commits' ] = [commit]
168
- end
169
-
170
- if topics
171
- raise ArgumentError, "Github::Resource expected; got #{topics.class.name}" unless topics.is_a?( Github::Resource )
172
-
173
- puts "update - topics:"
174
- ## e.g.
175
- # {"names"=>
176
- # ["opendata",
177
- # "football",
178
- # "seriea",
179
- # "italia",
180
- # "italy",
181
- # "juve",
182
- # "inter",
183
- # "napoli",
184
- # "roma",
185
- # "sqlite"]}
186
- #
187
- # {"names"=>[]}
188
-
189
- @data[ 'topics' ] = topics.data['names']
190
- end
191
-
192
-
193
- pp @data
194
-
195
-
196
-
197
- ## reset (invalidate) cached values from data hash
198
- ## use after reading or fetching
199
- @cache = {}
200
-
201
- self ## return self for (easy chaining)
202
- end
203
-
204
-
205
- ########################################
206
- ## read / write methods / helpers
207
- def write
208
- basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
209
- letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
210
- data_dir = "#{Hubba.config.data_dir}/#{letter}"
211
- path = "#{data_dir}/#{basename}.json"
212
-
213
- puts " writing stats to #{basename} (#{data_dir})..."
214
-
215
- FileUtils.mkdir_p( File.dirname( path )) ## make sure path exists
216
- File.open( path, 'w:utf-8' ) do |f|
217
- f.write( JSON.pretty_generate( @data ))
218
- end
219
- self ## return self for (easy chaining)
220
- end # method write
221
-
222
-
223
- def read
224
- ## note: skip reading if file not present
225
- basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
226
- letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
227
- data_dir = "#{Hubba.config.data_dir}/#{letter}"
228
- path = "#{data_dir}/#{basename}.json"
229
-
230
- if File.exist?( path )
231
- puts " reading stats from #{basename} (#{data_dir})..."
232
- json = File.open( path, 'r:utf-8' ) { |f| f.read }
233
- @data = JSON.parse( json )
234
-
235
- ## reset (invalidate) cached values from data hash
236
- ## use after reading or fetching
237
- @cache = {}
238
- else
239
- puts "!! WARN: - skipping reading stats from #{basename} -- file not found"
240
- end
241
- self ## return self for (easy chaining)
242
- end # method read
243
- end # class Stats
244
-
245
-
246
- end # module Hubba
1
+ module Hubba
2
+
3
+ ####
4
+ # keep track of repo stats over time (with history hash)
5
+
6
+ class Stats ## todo/check: rename to GithubRepoStats or RepoStats - why? why not?
7
+
8
+ def initialize( full_name )
9
+ @data = {}
10
+ @data['full_name'] = full_name # e.g. poole/hyde etc.
11
+
12
+ @cache = {} ## keep a lookup cache - why? why not?
13
+ end
14
+
15
+
16
+ ##################
17
+ ## update
18
+ def update_traffic( clones: nil,
19
+ views: nil,
20
+ paths: nil,
21
+ referrers: nil )
22
+
23
+ traffic = @data[ 'traffic' ] ||= {}
24
+
25
+ summary = traffic['summary'] ||= {}
26
+ history = traffic['history'] ||= {}
27
+
28
+
29
+ if views
30
+ raise ArgumentError, "Github::Resource expected; got #{views.class.name}" unless views.is_a?( Github::Resource )
31
+ =begin
32
+ {"count"=>1526,
33
+ "uniques"=>287,
34
+ "views"=>
35
+ [{"timestamp"=>"2020-09-27T00:00:00Z", "count"=>52, "uniques"=>13},
36
+ {"timestamp"=>"2020-09-28T00:00:00Z", "count"=>108, "uniques"=>28},
37
+ ...
38
+ ]}>
39
+ =end
40
+
41
+ ## keep lastest (summary) record of last two weeks (14 days)
42
+ summary['views'] = { 'count' => views.data['count'],
43
+ 'uniques' => views.data['uniques'] }
44
+
45
+ ## update history / day-by-day items / timeline
46
+ views.data['views'].each do |view|
47
+ # e.g. "2020-09-27T00:00:00Z"
48
+ timestamp = DateTime.strptime( view['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
49
+
50
+ item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
51
+ ## note: merge "in-place"
52
+ item.merge!( { 'views' => { 'count' => view['count'],
53
+ 'uniques' => view['uniques'] }} )
54
+ end
55
+ end
56
+
57
+ if clones
58
+ raise ArgumentError, "Github::Resource expected; got #{clones.class.name}" unless clones.is_a?( Github::Resource )
59
+ =begin
60
+ {"count"=>51,
61
+ "uniques"=>17,
62
+ "clones"=>
63
+ [{"timestamp"=>"2020-09-26T00:00:00Z", "count"=>1, "uniques"=>1},
64
+ {"timestamp"=>"2020-09-27T00:00:00Z", "count"=>2, "uniques"=>1},
65
+ ...
66
+ ]}
67
+ =end
68
+
69
+ ## keep lastest (summary) record of last two weeks (14 days)
70
+ summary['clones'] = { 'count' => clones.data['count'],
71
+ 'uniques' => clones.data['uniques'] }
72
+
73
+ ## update history / day-by-day items / timeline
74
+ clones.data['clones'].each do |clone|
75
+ # e.g. "2020-09-27T00:00:00Z"
76
+ timestamp = DateTime.strptime( clone['timestamp'], '%Y-%m-%dT%H:%M:%S%z' )
77
+
78
+ item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27
79
+ ## note: merge "in-place"
80
+ item.merge!( { 'clones' => { 'count' => clone['count'],
81
+ 'uniques' => clone['uniques'] }} )
82
+ end
83
+ end
84
+
85
+ if paths
86
+ raise ArgumentError, "Github::Resource expected; got #{paths.class.name}" unless paths.is_a?( Github::Resource )
87
+ =begin
88
+ [{"path"=>"/openfootball/england",
89
+ "title"=>
90
+ "openfootball/england: Free open public domain football data for England (and ...",
91
+ "count"=>394,
92
+ "uniques"=>227},
93
+ =end
94
+ summary['paths'] = paths.data
95
+ end
96
+
97
+ if referrers
98
+ raise ArgumentError, "Github::Resource expected; got #{referrers.class.name}" unless referrers.is_a?( Github::Resource )
99
+ =begin
100
+ [{"referrer"=>"github.com", "count"=>327, "uniques"=>198},
101
+ {"referrer"=>"openfootball.github.io", "count"=>71, "uniques"=>54},
102
+ {"referrer"=>"Google", "count"=>5, "uniques"=>5},
103
+ {"referrer"=>"reddit.com", "count"=>4, "uniques"=>4}]
104
+ =end
105
+ summary['referrers'] = referrers.data
106
+ end
107
+ end # method update_traffic
108
+
109
+
110
+ def update( repo,
111
+ commits: nil,
112
+ topics: nil,
113
+ languages: nil ) ## update stats / fetch data from github via api
114
+ raise ArgumentError, "Github::Resource expected; got #{repo.class.name}" unless repo.is_a?( Github::Resource )
115
+
116
+ ## e.g. 2015-05-11T20:21:43Z
117
+ ## puts Time.iso8601( repo.data['created_at'] )
118
+ @data['created_at'] = repo.data['created_at']
119
+ @data['updated_at'] = repo.data['updated_at']
120
+ @data['pushed_at'] = repo.data['pushed_at']
121
+
122
+ @data['size'] = repo.data['size'] # note: size in kb (kilobyte)
123
+
124
+ @data['description'] = repo.data['description']
125
+
126
+ ### todo/check - remove language (always use languages - see below) - why? why not?
127
+ @data['language'] = repo.data['language'] ## note: might be nil!!!
128
+
129
+
130
+
131
+ ########################################
132
+ #### history / by date record
133
+ rec = {}
134
+
135
+ rec['stargazers_count'] = repo.data['stargazers_count']
136
+ rec['forks_count'] = repo.data['forks_count']
137
+
138
+
139
+ today = Date.today.strftime( '%Y-%m-%d' ) ## e.g. 2016-09-27
140
+ puts "add record #{today} to history..."
141
+ pp rec # check if stargazers_count is a number (NOT a string)
142
+
143
+ history = @data[ 'history' ] ||= {}
144
+ item = history[ today ] ||= {}
145
+ ## note: merge "in-place" (overwrite with new - but keep other key/value pairs if any e.g. pageviews, clones, etc.)
146
+ item.merge!( rec )
147
+
148
+
149
+
150
+ ##########################
151
+ ## also check / keep track of (latest) commit
152
+ if commits
153
+ raise ArgumentError, "Github::Resource expected; got #{commits.class.name}" unless commits.is_a?( Github::Resource )
154
+
155
+ puts "update - last commit:"
156
+ ## pp commits
157
+ commit = {
158
+ 'committer' => {
159
+ 'date' => commits.data[0]['commit']['committer']['date'],
160
+ 'name' => commits.data[0]['commit']['committer']['name']
161
+ },
162
+ 'author' => {
163
+ 'date' => commits.data[0]['commit']['author']['date'],
164
+ 'name' => commits.data[0]['commit']['author']['name']
165
+ },
166
+ 'message' => commits.data[0]['commit']['message']
167
+ }
168
+
169
+ ## for now store only the latest commit (e.g. a single commit in an array)
170
+ @data[ 'commits' ] = [commit]
171
+ end
172
+
173
+ if topics
174
+ raise ArgumentError, "Github::Resource expected; got #{topics.class.name}" unless topics.is_a?( Github::Resource )
175
+
176
+ puts "update - topics:"
177
+ ## e.g.
178
+ # {"names"=>
179
+ # ["opendata",
180
+ # "football",
181
+ # "seriea",
182
+ # "italia",
183
+ # "italy",
184
+ # "juve",
185
+ # "inter",
186
+ # "napoli",
187
+ # "roma",
188
+ # "sqlite"]}
189
+ #
190
+ # {"names"=>[]}
191
+
192
+ @data[ 'topics' ] = topics.data['names']
193
+ end
194
+
195
+
196
+ if languages
197
+ raise ArgumentError, "Github::Resource expected; got #{languages.class.name}" unless languages.is_a?( Github::Resource )
198
+
199
+ puts "update - languages:"
200
+
201
+
202
+ ## e.g.
203
+ ## {"Ruby"=>1020599, "HTML"=>3219, "SCSS"=>508, "CSS"=>388}
204
+ ## or might be empty
205
+ ## {}
206
+
207
+ @data[ 'languages' ] = languages.data
208
+ end
209
+
210
+ pp @data
211
+
212
+
213
+ ## reset (invalidate) cached values from data hash
214
+ ## use after reading or fetching
215
+ @cache = {}
216
+
217
+ self ## return self for (easy chaining)
218
+ end
219
+
220
+
221
+ ########################################
222
+ ## read / write methods / helpers
223
+ def write
224
+ ## note: always downcase basename - why? why not?
225
+ basename = @data['full_name'].gsub( '/', '~' ).downcase ## e.g. poole/hyde become poole~hyde
226
+ letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
227
+ data_dir = "#{Hubba.config.data_dir}/#{letter}"
228
+ path = "#{data_dir}/#{basename}.json"
229
+
230
+ puts " writing stats to #{basename} (#{data_dir})..."
231
+
232
+ FileUtils.mkdir_p( File.dirname( path )) ## make sure path exists
233
+ File.open( path, 'w:utf-8' ) do |f|
234
+ f.write( JSON.pretty_generate( @data ))
235
+ end
236
+ self ## return self for (easy chaining)
237
+ end # method write
238
+
239
+
240
+ def read
241
+ ## note: always downcase basename - why? why not?
242
+ ## note: skip reading if file not present
243
+ basename = @data['full_name'].gsub( '/', '~' ).downcase ## e.g. poole/hyde become poole~hyde
244
+ letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde
245
+ data_dir = "#{Hubba.config.data_dir}/#{letter}"
246
+ path = "#{data_dir}/#{basename}.json"
247
+
248
+ if File.exist?( path )
249
+ puts " reading stats from #{basename} (#{data_dir})..."
250
+ json = File.open( path, 'r:utf-8' ) { |f| f.read }
251
+ @data = JSON.parse( json )
252
+
253
+ ## reset (invalidate) cached values from data hash
254
+ ## use after reading or fetching
255
+ @cache = {}
256
+ else
257
+ puts "!! WARN: - skipping reading stats from #{basename} -- file not found"
258
+ end
259
+ self ## return self for (easy chaining)
260
+ end # method read
261
+
262
+ def read_old
263
+ ## note: skip reading if file not present
264
+ basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde
265
+ data_dir = Hubba.config.data_dir
266
+ path = "#{data_dir}/#{basename}.json"
267
+
268
+ if File.exist?( path )
269
+ puts " reading stats from #{basename} (#{data_dir})..."
270
+ json = File.open( path, 'r:utf-8' ) { |f| f.read }
271
+ @data = JSON.parse( json )
272
+
273
+ ## reset (invalidate) cached values from data hash
274
+ ## use after reading or fetching
275
+ @cache = {}
276
+ else
277
+ puts "!! WARN: - skipping reading stats from #{basename} -- file not found"
278
+ end
279
+ self ## return self for (easy chaining)
280
+ end # method read_old
281
+ end # class Stats
282
+
283
+
284
+ end # module Hubba
data/lib/hubba/update.rb CHANGED
@@ -1,44 +1,47 @@
1
- module Hubba
2
-
3
- def self.update_stats( hash_or_path='./repos.yml' ) ## move to reposet e.g. Reposet#update_status!!!!
4
- h = if hash_or_path.is_a?( String ) ## assume it is a file path!!!
5
- path = hash_or_path
6
- YAML.load_file( path )
7
- else
8
- hash_or_path # assume its a hash / reposet already!!!
9
- end
10
-
11
- gh = Github.new
12
-
13
- h.each do |org_with_counter,names|
14
-
15
- ## remove optional number from key e.g.
16
- ## mrhydescripts (3) => mrhydescripts
17
- ## footballjs (4) => footballjs
18
- ## etc.
19
- org = org_with_counter.sub( /\([0-9]+\)/, '' ).strip
20
-
21
- ## puts " -- #{key_with_counter} [#{key}] --"
22
-
23
- names.each do |name|
24
- full_name = "#{org}/#{name}"
25
-
26
- ## puts " fetching stats #{count+1}/#{repo_count} - >#{full_name}<..."
27
- stats = Stats.new( full_name )
28
- stats.read
29
-
30
- puts "update >#{full_name}< [1/3] - fetching repo..."
31
- repo = gh.repo( full_name )
32
- puts "update >#{full_name}< [2/3] - fetching repo commits ..."
33
- commits = gh.repo_commits( full_name )
34
- puts "update >#{full_name}< [3/3] - fetching repo topics ..."
35
- topics = gh.repo_topics( full_name )
36
-
37
- stats.update( repo,
38
- commits: commits,
39
- topics: topics )
40
- stats.write
41
- end
42
- end
43
- end
1
+ module Hubba
2
+
3
+ def self.update_stats( hash_or_path='./repos.yml' ) ## move to reposet e.g. Reposet#update_status!!!!
4
+ h = if hash_or_path.is_a?( String ) ## assume it is a file path!!!
5
+ path = hash_or_path
6
+ YAML.load_file( path )
7
+ else
8
+ hash_or_path # assume its a hash / reposet already!!!
9
+ end
10
+
11
+ gh = Github.new
12
+
13
+ h.each do |org_with_counter,names|
14
+
15
+ ## remove optional number from key e.g.
16
+ ## mrhydescripts (3) => mrhydescripts
17
+ ## footballjs (4) => footballjs
18
+ ## etc.
19
+ org = org_with_counter.sub( /\([0-9]+\)/, '' ).strip
20
+
21
+ ## puts " -- #{key_with_counter} [#{key}] --"
22
+
23
+ names.each do |name|
24
+ full_name = "#{org}/#{name}"
25
+
26
+ ## puts " fetching stats #{count+1}/#{repo_count} - >#{full_name}<..."
27
+ stats = Stats.new( full_name )
28
+ stats.read
29
+
30
+ puts "update >#{full_name}< [1/4] - fetching repo..."
31
+ repo = gh.repo( full_name )
32
+ puts "update >#{full_name}< [2/4] - fetching repo commits ..."
33
+ commits = gh.repo_commits( full_name )
34
+ puts "update >#{full_name}< [3/4] - fetching repo topics ..."
35
+ topics = gh.repo_topics( full_name )
36
+ puts "update >#{full_name}< [4/4] - fetching repo languages ..."
37
+ languages = gh.repo_languages( full_name )
38
+
39
+ stats.update( repo,
40
+ commits: commits,
41
+ topics: topics,
42
+ languages: languages )
43
+ stats.write
44
+ end
45
+ end
46
+ end
44
47
  end # module Hubba