ossert 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rspec +2 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +16 -0
  8. data/Gemfile +8 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +199 -0
  11. data/Rakefile +12 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/config/classifiers.yml +153 -0
  15. data/config/descriptions.yml +45 -0
  16. data/config/sidekiq.rb +15 -0
  17. data/config/stats.yml +198 -0
  18. data/config/translations.yml +44 -0
  19. data/db/backups/.keep +0 -0
  20. data/db/migrate/001_create_projects.rb +22 -0
  21. data/db/migrate/002_create_exceptions.rb +14 -0
  22. data/db/migrate/003_add_meta_to_projects.rb +14 -0
  23. data/db/migrate/004_add_timestamps_to_projects.rb +12 -0
  24. data/db/migrate/005_create_classifiers.rb +19 -0
  25. data/lib/ossert/classifiers/decision_tree.rb +112 -0
  26. data/lib/ossert/classifiers/growing/check.rb +172 -0
  27. data/lib/ossert/classifiers/growing/classifier.rb +175 -0
  28. data/lib/ossert/classifiers/growing.rb +163 -0
  29. data/lib/ossert/classifiers.rb +14 -0
  30. data/lib/ossert/config.rb +24 -0
  31. data/lib/ossert/fetch/bestgems.rb +98 -0
  32. data/lib/ossert/fetch/github.rb +536 -0
  33. data/lib/ossert/fetch/rubygems.rb +80 -0
  34. data/lib/ossert/fetch.rb +142 -0
  35. data/lib/ossert/presenters/project.rb +202 -0
  36. data/lib/ossert/presenters/project_v2.rb +117 -0
  37. data/lib/ossert/presenters.rb +8 -0
  38. data/lib/ossert/project.rb +144 -0
  39. data/lib/ossert/quarters_store.rb +164 -0
  40. data/lib/ossert/rake_tasks.rb +6 -0
  41. data/lib/ossert/reference.rb +87 -0
  42. data/lib/ossert/repositories.rb +138 -0
  43. data/lib/ossert/saveable.rb +153 -0
  44. data/lib/ossert/stats/agility_quarter.rb +62 -0
  45. data/lib/ossert/stats/agility_total.rb +71 -0
  46. data/lib/ossert/stats/base.rb +113 -0
  47. data/lib/ossert/stats/community_quarter.rb +28 -0
  48. data/lib/ossert/stats/community_total.rb +24 -0
  49. data/lib/ossert/stats.rb +32 -0
  50. data/lib/ossert/tasks/database.rake +179 -0
  51. data/lib/ossert/tasks/ossert.rake +52 -0
  52. data/lib/ossert/version.rb +4 -0
  53. data/lib/ossert/workers/fetch.rb +21 -0
  54. data/lib/ossert/workers/fetch_bestgems_page.rb +32 -0
  55. data/lib/ossert/workers/refresh_fetch.rb +22 -0
  56. data/lib/ossert/workers/sync_rubygems.rb +0 -0
  57. data/lib/ossert/workers.rb +11 -0
  58. data/lib/ossert.rb +63 -0
  59. data/ossert.gemspec +47 -0
  60. metadata +396 -0
data/config/stats.yml ADDED
@@ -0,0 +1,198 @@
1
+ stats:
2
+ agility:
3
+ total:
4
+ metrics:
5
+ - issues_closed_percent
6
+ - issues_non_owner_percent
7
+ - issues_with_contrib_comments_percent
8
+ - issues_all_count
9
+ - issues_processed_in_avg
10
+ - issues_processed_in_median
11
+ - pr_closed_percent
12
+ - pr_non_owner_percent
13
+ - pr_with_contrib_comments_percent
14
+ - pr_all_count
15
+ - pr_processed_in_avg
16
+ - pr_processed_in_median
17
+ - first_pr_date_int
18
+ - last_pr_date_int
19
+ - first_issue_date_int
20
+ - last_issue_date_int
21
+ - last_release_date_int
22
+ - releases_count
23
+ - commits_count_since_last_release_count
24
+ - stale_branches_count
25
+ - life_period
26
+ - last_changed
27
+ uniq_attributes:
28
+ - dependencies
29
+ - issues_open
30
+ - issues_closed
31
+ - issues_owner
32
+ - issues_non_owner
33
+ - issues_with_contrib_comments
34
+ - issues_total
35
+ - pr_open
36
+ - pr_merged
37
+ - pr_closed
38
+ - pr_owner
39
+ - pr_non_owner
40
+ - pr_with_contrib_comments
41
+ - pr_total
42
+ - releases_total_gh
43
+ - releases_total_rg
44
+ - stale_branches
45
+ - branches
46
+ attributes:
47
+ dependencies: Array
48
+ issues_open: Array
49
+ issues_closed: Array
50
+ issues_owner: Array
51
+ issues_non_owner: Array
52
+ issues_with_contrib_comments: Array
53
+ issues_total: Array
54
+ issues_processed_in_avg: Ossert::TooLong
55
+ issues_processed_in_median: Ossert::TooLong
56
+ pr_open: Array
57
+ pr_merged: Array
58
+ pr_closed: Array
59
+ pr_owner: Array
60
+ pr_non_owner: Array
61
+ pr_with_contrib_comments: Array
62
+ pr_total: Array
63
+ pr_processed_in_avg: Ossert::TooLong
64
+ pr_processed_in_median: Ossert::TooLong
65
+ releases_total_gh: Array
66
+ releases_total_rg: Array
67
+ stale_branches: Array
68
+ branches: Array
69
+ first_pr_date: Ossert::Zero
70
+ last_pr_date: Ossert::Zero
71
+ first_issue_date: Ossert::Zero
72
+ last_issue_date: Ossert::Zero
73
+ last_release_date: Ossert::Zero
74
+ commits_count_since_last_release: Ossert::Zero
75
+ last_year_commits: Ossert::Zero
76
+ quarter:
77
+ metrics:
78
+ - issues_processed_in_avg
79
+ - issues_processed_in_median
80
+ - issues_closed_percent
81
+ - issues_actual_count
82
+ - issues_all_count
83
+ - pr_processed_in_avg
84
+ - pr_processed_in_median
85
+ - pr_closed_percent
86
+ - pr_actual_count
87
+ - pr_all_count
88
+ - releases_count
89
+ - commits
90
+ uniq_attributes:
91
+ - issues_open
92
+ - issues_closed
93
+ - issues_total
94
+ - issues_actual
95
+ - pr_open
96
+ - pr_closed
97
+ - pr_total
98
+ - pr_actual
99
+ - branches
100
+ - releases
101
+ - releases_total_gh
102
+ - releases_total_rg
103
+ attributes:
104
+ issues_open: Array
105
+ issues_closed: Array
106
+ issues_total: Array
107
+ issues_actual: Array
108
+ pr_open: Array
109
+ pr_merged: Array
110
+ pr_closed: Array
111
+ pr_total: Array
112
+ pr_actual: Array
113
+ branches: Array
114
+ releases: Array
115
+ releases_total_gh: Array
116
+ releases_total_rg: Array
117
+ commits: Ossert::Zero
118
+ issues_processed_in_days: Array
119
+ pr_processed_in_days: Array
120
+ community:
121
+ total:
122
+ metrics:
123
+ - dependants_count
124
+ - users_creating_issues_count
125
+ - contributors_count
126
+ - users_commenting_issues_count
127
+ - users_creating_pr_count
128
+ - users_commenting_pr_count
129
+ - watchers_count
130
+ - stargazers_count
131
+ - forks_count
132
+ - users_involved_count
133
+ - users_involved_no_stars_count
134
+ - total_downloads_count
135
+ uniq_attributes:
136
+ - dependants
137
+ - users_creating_issues
138
+ - users_commenting_issues
139
+ - users_creating_pr
140
+ - users_commenting_pr
141
+ - contributors
142
+ - watchers
143
+ - stargazers
144
+ - forks
145
+ - users_involved
146
+ - owners_github
147
+ - owners_rubygems
148
+ attributes:
149
+ dependants: Array
150
+ users_creating_issues: Array
151
+ users_commenting_issues: Array
152
+ users_creating_pr: Array
153
+ users_commenting_pr: Array
154
+ contributors: Array
155
+ watchers: Array
156
+ stargazers: Array
157
+ forks: Array
158
+ users_involved: Array
159
+ owners_github: Array
160
+ owners_rubygems: Array
161
+ total_downloads: Ossert::Zero
162
+ delta_downloads: Ossert::Zero
163
+ quarter:
164
+ metrics:
165
+ - users_creating_issues_count
166
+ - users_commenting_issues_count
167
+ - users_creating_pr_count
168
+ - users_commenting_pr_count
169
+ - stargazers_count
170
+ - forks_count
171
+ - users_involved_count
172
+ - users_involved_no_stars_count
173
+ - total_downloads_count
174
+ uniq_attributes:
175
+ - users_creating_issues
176
+ - users_commenting_issues
177
+ - users_creating_pr
178
+ - users_commenting_pr
179
+ - contributors
180
+ - stargazers
181
+ - forks
182
+ - users_involved
183
+ - owners_github
184
+ - owners_rubygems
185
+ absolute_attributes:
186
+ - total_downloads
187
+ attributes:
188
+ users_creating_issues: Array
189
+ users_commenting_issues: Array
190
+ users_creating_pr: Array
191
+ users_commenting_pr: Array
192
+ contributors: Array
193
+ stargazers: Array
194
+ forks: Array
195
+ users_involved: Array
196
+ download_divergence: Ossert::Zero
197
+ total_downloads: Ossert::Zero
198
+ delta_downloads: Ossert::Zero
@@ -0,0 +1,44 @@
1
+ translations:
2
+ # Agility
3
+ dependencies_count: 'Number of Runtime Dependencies'
4
+ issues_closed_percent: 'Issues Closed %'
5
+ issues_non_owner_percent: 'Issues Created Not By Repo Owner'
6
+ issues_with_contrib_comments_percent: 'Issues With Contibutors Comments'
7
+ issues_all_count: 'Number of Issues'
8
+ issues_processed_in_avg: 'Average Issue Processing Time'
9
+ issues_processed_in_median: 'Median Issue Processing Time'
10
+ first_issue_date_int: 'First Issue Created At'
11
+ last_issue_date_int: 'Last Issue Created At'
12
+ pr_closed_percent: 'Pull Requests Closed %'
13
+ pr_non_owner_percent: 'Pull Requests Created Not By Repo Owner'
14
+ pr_with_contrib_comments_percent: 'Pull Requests With Contibutors Comments'
15
+ pr_all_count: 'Number of Pull Requests'
16
+ pr_processed_in_avg: 'Average Pull Request Processing Time'
17
+ pr_processed_in_median: 'Median Pull Request Processing Time'
18
+ first_pr_date_int: 'First Pull Request Created At'
19
+ last_pr_date_int: 'Last Pull Request Created At'
20
+ pr_actual_count: 'Number of Legacy Pull Requests'
21
+ last_release_date_int: 'Last Release Date'
22
+ releases_count: 'Number of Releases'
23
+ commits_count_since_last_release_count: 'Commits Since Last Release'
24
+ stale_branches_count: 'Number of Stale Branches'
25
+ life_period: 'Active Years Age'
26
+ last_changed: 'Last Changed At'
27
+ issues_actual_count: 'Number of Legacy Issues'
28
+ commits: 'Number of Commits Made'
29
+
30
+ # Community
31
+ dependants_count: 'Number of Dependants'
32
+ users_creating_issues_count: 'Number of Users Creating Issues'
33
+ users_commenting_issues_count: 'Number of Users Commenting Issues'
34
+ users_creating_pr_count: 'Number of Users Creating Pull Requests'
35
+ users_commenting_pr_count: 'Number of Users Commenting Pull Requests'
36
+ watchers_count: 'Number of Watchers'
37
+ stargazers_count: 'Number of Stargazers'
38
+ contributors_count: 'Number of Contributors'
39
+ forks_count: 'Number of Forks'
40
+ users_involved_count: 'Number of Total Users Involved'
41
+ users_involved_no_stars_count: 'Number of Users Involved Without Stargazers'
42
+ total_downloads_count: 'Number of Downloads'
43
+ delta_downloads: 'Number of Downloads Increased By'
44
+ download_divergence: 'Downloads Growth From Previous Period %'
data/db/backups/.keep ADDED
File without changes
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ Sequel.migration do
3
+ up do
4
+ extension :pg_json
5
+
6
+ create_table(:projects) do
7
+ primary_key :id
8
+ String :name, null: false
9
+ String :github_name, null: false
10
+ String :rubygems_name, null: false
11
+ String :reference, null: false
12
+ json :agility_total_data
13
+ json :agility_quarters_data
14
+ json :community_total_data
15
+ json :community_quarters_data
16
+ end
17
+ end
18
+
19
+ down do
20
+ drop_table(:projects)
21
+ end
22
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ Sequel.migration do
3
+ up do
4
+ create_table(:exceptions) do
5
+ primary_key :id
6
+ String :name, null: false
7
+ String :github_name, null: false
8
+ end
9
+ end
10
+
11
+ down do
12
+ drop_table(:exceptions)
13
+ end
14
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ Sequel.migration do
3
+ up do
4
+ extension :pg_json
5
+
6
+ alter_table(:projects) do
7
+ add_column :meta_data, JSON
8
+ end
9
+ end
10
+
11
+ down do
12
+ drop_column :projects, :meta_data
13
+ end
14
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+ Sequel.migration do
3
+ up do
4
+ run 'ALTER TABLE projects ADD COLUMN created_at TIMESTAMP DEFAULT NOW() NOT NULL;'
5
+ run 'ALTER TABLE projects ADD COLUMN updated_at TIMESTAMP DEFAULT NOW() NOT NULL;'
6
+ end
7
+
8
+ down do
9
+ drop_column :projects, :created_at
10
+ drop_column :projects, :updated_at
11
+ end
12
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+ Sequel.migration do
3
+ up do
4
+ extension :pg_json
5
+
6
+ create_table(:classifiers) do
7
+ primary_key :id
8
+ String :section, null: false
9
+ json :reference_values
10
+ end
11
+
12
+ run 'ALTER TABLE classifiers ADD COLUMN created_at TIMESTAMP DEFAULT NOW() NOT NULL;'
13
+ run 'ALTER TABLE classifiers ADD COLUMN updated_at TIMESTAMP DEFAULT NOW() NOT NULL;'
14
+ end
15
+
16
+ down do
17
+ drop_table(:classifiers)
18
+ end
19
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+ require 'graphr'
3
+ require 'decisiontree'
4
+
5
+ module Ossert
6
+ module Classifiers
7
+ class DecisionTree
8
+ GRADES = %w(
9
+ ClassA
10
+ ClassB
11
+ ClassC
12
+ ClassD
13
+ ClassE
14
+ ).freeze
15
+
16
+ class << self
17
+ attr_accessor :all
18
+
19
+ def for_current_projects
20
+ new(Project.projects_by_reference)
21
+ end
22
+
23
+ def current
24
+ all.last
25
+ end
26
+ end
27
+
28
+ def initialize(train_group)
29
+ @train_group = train_group
30
+
31
+ (self.class.all ||= []) << self
32
+ end
33
+
34
+ attr_reader :train_group
35
+ attr_reader :agility_total_dec_tree,
36
+ :agility_last_year_dec_tree,
37
+ :community_total_dec_tree,
38
+ :community_total_dec_tree,
39
+ :community_last_year_dec_tree
40
+
41
+ def check(project)
42
+ {
43
+ agility: {
44
+ total: agility_total_check(project),
45
+ last_year: agility_last_year_check(project)
46
+ },
47
+ community: {
48
+ total: community_total_check(project),
49
+ last_year: community_last_year_check(project)
50
+ }
51
+ }
52
+ end
53
+
54
+ SECTION_DATA = {
55
+ agility_total: ->(project) { project.agility.total.metric_values },
56
+ agility_last_year: ->(project) { project.agility.quarters.last_year_data },
57
+ community_total: ->(project) { project.community.total.metric_values },
58
+ community_last_year: ->(project) { project.community.quarters.last_year_data }
59
+ }.freeze
60
+
61
+ SECTION_METRICS = {
62
+ agility_total: Stats::AgilityTotal.metrics,
63
+ agility_last_year: Stats::AgilityQuarter.metrics,
64
+ community_total: Stats::CommunityTotal.metrics,
65
+ community_last_year: Stats::CommunityQuarter.metrics
66
+ }.freeze
67
+
68
+ [:community, :agility].each do |section|
69
+ [:total, :last_year].each do |type|
70
+ name = "#{section}_#{type}".to_sym
71
+ define_method("#{name}_check") do |project|
72
+ public_send("#{name}_dec_tree").predict(SECTION_DATA[name].call(project))
73
+ end
74
+ end
75
+ end
76
+
77
+ def ready?
78
+ agility_total_dec_tree.presence &&
79
+ agility_last_year_dec_tree.presence &&
80
+ community_total_dec_tree.presence &&
81
+ community_last_year_dec_tree.presence
82
+ end
83
+
84
+ def train
85
+ data = initialize_data
86
+
87
+ trees = SECTION_METRICS.map do |section, metrics|
88
+ ::DecisionTree::ID3Tree.new(metrics, data[section], 'ClassE', :continuous)
89
+ end.tap(&:train)
90
+
91
+ @agility_total_dec_tree,
92
+ @agility_last_year_dec_tree,
93
+ @community_total_dec_tree,
94
+ @community_last_year_dec_tree = trees
95
+ end
96
+
97
+ def initialize_data
98
+ result = { agility_total: [], agility_last_year: [], community_total: [], community_last_year: [] }
99
+
100
+ GRADES.each_with_object(train_group) do |grade, grouped_projects|
101
+ grouped_projects[grade].each do |project|
102
+ SECTION_DATA.each do |section, data_collector|
103
+ result[section] << (data_collector.call(project) << grade)
104
+ end
105
+ end
106
+ end
107
+
108
+ result
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+ module Ossert
3
+ module Classifiers
4
+ class Growing
5
+ class Check
6
+ GRADES = %w(
7
+ A
8
+ B
9
+ C
10
+ D
11
+ E
12
+ ).freeze
13
+ KLASS_2_GRADE = {
14
+ 'ClassA' => 'A',
15
+ 'ClassB' => 'B',
16
+ 'ClassC' => 'C',
17
+ 'ClassD' => 'D',
18
+ 'ClassE' => 'E'
19
+ }.freeze
20
+
21
+ class << self
22
+ def process_using(action, config, project, classifiers, last_year_offset = 1)
23
+ checks_rates = config['checks'].map do |check_name|
24
+ [
25
+ check_name.to_sym,
26
+ check_class_by(check_name).new(
27
+ config, project, classifiers, last_year_offset
28
+ ).send(action)
29
+ ]
30
+ end
31
+ checks_rates.to_h
32
+ end
33
+
34
+ def check(*args)
35
+ process_using(*args.unshift(:grade_as_hash))
36
+ end
37
+
38
+ def grade(*args)
39
+ process_using(*args.unshift(:grade))
40
+ end
41
+
42
+ def check_class_by(check_name)
43
+ Kernel.const_get(
44
+ "Ossert::Classifiers::Growing::Check::#{check_name.capitalize}"
45
+ )
46
+ end
47
+ end
48
+
49
+ class Base
50
+ def initialize(config, project, classifiers, last_year_offset = 1)
51
+ @config = config
52
+ @project = project
53
+ @classifiers = classifiers
54
+ @last_year_offset = last_year_offset
55
+ end
56
+
57
+ def metrics_type
58
+ self.class.name.split('::').last.downcase
59
+ end
60
+
61
+ def metrics
62
+ return @metrics if defined? @metrics
63
+
64
+ @metrics = @config['metrics'][metrics_type].map do |section, section_metrics|
65
+ [
66
+ section,
67
+ section_metrics.map do |metric, weight|
68
+ [metric, weight.to_d]
69
+ end.to_h
70
+ ]
71
+ end.to_h
72
+ end
73
+
74
+ def max_gain
75
+ @max_gain ||= (metrics['last_year'].values.sum + metrics['total'].values.sum).to_d
76
+ end
77
+
78
+ def community_last_year_data
79
+ @community_last_year_data ||= @project.community.quarters.last_year_as_hash(@last_year_offset)
80
+ end
81
+
82
+ def community_total_data
83
+ @community_total_data ||= @project.community.total.metrics_to_hash
84
+ end
85
+
86
+ def agility_last_year_data
87
+ @agility_last_year_data ||= @project.agility.quarters.last_year_as_hash(@last_year_offset)
88
+ end
89
+
90
+ def agility_total_data
91
+ @agility_total_data ||= @project.agility.total.metrics_to_hash
92
+ end
93
+
94
+ def check
95
+ rates = GRADES.each_with_object({}) { |klass, res| res[klass] = 0.0.to_d }
96
+ strategy.each do |(section_type, data_types)|
97
+ Array(data_types).each do |data_type|
98
+ rate(
99
+ rates, metrics[section_type.to_s], send("#{data_type}_#{section_type}_data"),
100
+ @classifiers.fetch("#{data_type}_#{section_type}".to_sym)
101
+ )
102
+ end
103
+ end
104
+ rates
105
+ end
106
+
107
+ def grade
108
+ max = GRADES.count
109
+ sum = -0.6
110
+ check.sort.reverse.each do |(_, gain)|
111
+ sum += gain
112
+ sum += 0.1 if gain > trusted_probability
113
+ end
114
+ GRADES[(max - sum).to_i]
115
+ end
116
+
117
+ def grade_as_hash
118
+ max = GRADES.count
119
+ sum = -0.6
120
+ check.sort.reverse.each do |(_, gain)|
121
+ sum += gain
122
+ sum += 0.1 if gain > trusted_probability
123
+ end
124
+ { gain: sum, mark: GRADES[(max - sum).to_i] }
125
+ end
126
+
127
+ protected
128
+
129
+ def trusted_probability
130
+ @trusted_probability ||= @config['trusted_probability']
131
+ end
132
+
133
+ def rate(rates, metrics, data, classifier)
134
+ classifier.each_pair do |klass, qualified_metrics|
135
+ metrics.slice(*data.keys).each_pair do |metric, weight|
136
+ range = qualified_metrics[metric.to_s][:range]
137
+ rates[KLASS_2_GRADE[klass]] += weight / max_gain if range.cover? data[metric].to_f
138
+ end
139
+ end
140
+ end
141
+ end
142
+
143
+ class Popularity < Base
144
+ def strategy
145
+ {
146
+ last_year: :community,
147
+ total: :community
148
+ }
149
+ end
150
+ end
151
+
152
+ class Maintenance < Base
153
+ def strategy
154
+ {
155
+ last_year: :agility,
156
+ total: :agility
157
+ }
158
+ end
159
+ end
160
+
161
+ class Maturity < Base
162
+ def strategy
163
+ {
164
+ last_year: [:agility, :community],
165
+ total: [:agility, :community]
166
+ }
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end