ossert 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.rubocop_todo.yml +44 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +16 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +199 -0
- data/Rakefile +12 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/config/classifiers.yml +153 -0
- data/config/descriptions.yml +45 -0
- data/config/sidekiq.rb +15 -0
- data/config/stats.yml +198 -0
- data/config/translations.yml +44 -0
- data/db/backups/.keep +0 -0
- data/db/migrate/001_create_projects.rb +22 -0
- data/db/migrate/002_create_exceptions.rb +14 -0
- data/db/migrate/003_add_meta_to_projects.rb +14 -0
- data/db/migrate/004_add_timestamps_to_projects.rb +12 -0
- data/db/migrate/005_create_classifiers.rb +19 -0
- data/lib/ossert/classifiers/decision_tree.rb +112 -0
- data/lib/ossert/classifiers/growing/check.rb +172 -0
- data/lib/ossert/classifiers/growing/classifier.rb +175 -0
- data/lib/ossert/classifiers/growing.rb +163 -0
- data/lib/ossert/classifiers.rb +14 -0
- data/lib/ossert/config.rb +24 -0
- data/lib/ossert/fetch/bestgems.rb +98 -0
- data/lib/ossert/fetch/github.rb +536 -0
- data/lib/ossert/fetch/rubygems.rb +80 -0
- data/lib/ossert/fetch.rb +142 -0
- data/lib/ossert/presenters/project.rb +202 -0
- data/lib/ossert/presenters/project_v2.rb +117 -0
- data/lib/ossert/presenters.rb +8 -0
- data/lib/ossert/project.rb +144 -0
- data/lib/ossert/quarters_store.rb +164 -0
- data/lib/ossert/rake_tasks.rb +6 -0
- data/lib/ossert/reference.rb +87 -0
- data/lib/ossert/repositories.rb +138 -0
- data/lib/ossert/saveable.rb +153 -0
- data/lib/ossert/stats/agility_quarter.rb +62 -0
- data/lib/ossert/stats/agility_total.rb +71 -0
- data/lib/ossert/stats/base.rb +113 -0
- data/lib/ossert/stats/community_quarter.rb +28 -0
- data/lib/ossert/stats/community_total.rb +24 -0
- data/lib/ossert/stats.rb +32 -0
- data/lib/ossert/tasks/database.rake +179 -0
- data/lib/ossert/tasks/ossert.rake +52 -0
- data/lib/ossert/version.rb +4 -0
- data/lib/ossert/workers/fetch.rb +21 -0
- data/lib/ossert/workers/fetch_bestgems_page.rb +32 -0
- data/lib/ossert/workers/refresh_fetch.rb +22 -0
- data/lib/ossert/workers/sync_rubygems.rb +0 -0
- data/lib/ossert/workers.rb +11 -0
- data/lib/ossert.rb +63 -0
- data/ossert.gemspec +47 -0
- metadata +396 -0
@@ -0,0 +1,175 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Ossert
|
3
|
+
module Classifiers
|
4
|
+
class Growing
|
5
|
+
class Classifier
|
6
|
+
GRADES = %w(
|
7
|
+
ClassA
|
8
|
+
ClassB
|
9
|
+
ClassC
|
10
|
+
ClassD
|
11
|
+
ClassE
|
12
|
+
).freeze
|
13
|
+
REVERSED_GRADE = {
|
14
|
+
'ClassA' => 'ClassE',
|
15
|
+
'ClassB' => 'ClassD',
|
16
|
+
'ClassC' => 'ClassC',
|
17
|
+
'ClassD' => 'ClassB',
|
18
|
+
'ClassE' => 'ClassA'
|
19
|
+
}.freeze
|
20
|
+
|
21
|
+
attr_reader :classifier, :config
|
22
|
+
def initialize(classifier, config)
|
23
|
+
@classifier = classifier
|
24
|
+
@config = config
|
25
|
+
end
|
26
|
+
|
27
|
+
def train
|
28
|
+
run_aggregation
|
29
|
+
run_syntetics
|
30
|
+
run_values_to_ranges
|
31
|
+
run_reverse
|
32
|
+
|
33
|
+
classifier
|
34
|
+
end
|
35
|
+
|
36
|
+
def run_aggregation
|
37
|
+
GRADES.each_with_index do |grade, idx|
|
38
|
+
classifier[grade].each_pair do |metric, values|
|
39
|
+
sibling_class_values = []
|
40
|
+
sibling_class_values = classifier[GRADES[idx + 1]][metric] if (idx + 1) < GRADES.count
|
41
|
+
|
42
|
+
all_values = sibling_class_values + values
|
43
|
+
(classifier[grade][metric] = (values.max || 0)) && next if all_values.count <= 2
|
44
|
+
classifier[grade][metric] = (all_values.sum / all_values.count).round(2)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def run_syntetics
|
50
|
+
config['syntetics'].each do |synt_metric, values_range|
|
51
|
+
real_values = classifier.values.map { |metrics| metrics[synt_metric] }.compact
|
52
|
+
next if real_values.empty?
|
53
|
+
|
54
|
+
values_range = Array.wrap(values_range)
|
55
|
+
values_range = values_range.reverse if reversed_metrics.include? synt_metric
|
56
|
+
step_threshold = syntetic_step_threshold(values_range)
|
57
|
+
|
58
|
+
GRADES.each_with_index do |grade, idx|
|
59
|
+
classifier[grade][synt_metric] = step_threshold.call(idx)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def syntetic_step_threshold(values_range)
|
65
|
+
return ->(idx) { values_range[idx] } if values_range.count == GRADES.count
|
66
|
+
max_value, min_value = values_range
|
67
|
+
min_value = min_value.to_i
|
68
|
+
max_value = max_value.to_i
|
69
|
+
step = ((max_value - min_value) / GRADES.count.to_f).round(2)
|
70
|
+
->(idx) { max_value - (step * idx).round(2) }
|
71
|
+
end
|
72
|
+
|
73
|
+
def run_values_to_ranges
|
74
|
+
GRADES.each do |grade|
|
75
|
+
classifier[grade].each_pair do |metric, value|
|
76
|
+
classifier[grade][metric] = {
|
77
|
+
threshold: value,
|
78
|
+
range: ThresholdToRange.range_for(metric, value, grade)
|
79
|
+
}
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class ThresholdToRange
|
85
|
+
def self.range_for(metric, value, grade)
|
86
|
+
new(metric, value, grade).range
|
87
|
+
end
|
88
|
+
|
89
|
+
def initialize(metric, value, grade)
|
90
|
+
@metric = metric
|
91
|
+
@value = value
|
92
|
+
@grade = grade
|
93
|
+
end
|
94
|
+
|
95
|
+
def range
|
96
|
+
if reversed_metrics.include?(@metric)
|
97
|
+
Reversed.new(@value, @grade).range
|
98
|
+
else
|
99
|
+
Base.new(@value, @grade).range
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def reversed_metrics
|
104
|
+
@reversed_metrics ||= Ossert::Classifiers::Growing.config['reversed']
|
105
|
+
end
|
106
|
+
|
107
|
+
class Base
|
108
|
+
def initialize(value, grade)
|
109
|
+
@value = value
|
110
|
+
@full_range = (grade == last_grade)
|
111
|
+
end
|
112
|
+
|
113
|
+
def range
|
114
|
+
return full_range if full_range?
|
115
|
+
start_value...end_value
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def full_range?
|
121
|
+
@full_range
|
122
|
+
end
|
123
|
+
|
124
|
+
def last_grade
|
125
|
+
GRADES.last
|
126
|
+
end
|
127
|
+
|
128
|
+
def full_range
|
129
|
+
-Float::INFINITY...Float::INFINITY
|
130
|
+
end
|
131
|
+
|
132
|
+
def start_value
|
133
|
+
@value
|
134
|
+
end
|
135
|
+
|
136
|
+
def end_value
|
137
|
+
Float::INFINITY
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
class Reversed < Base
|
142
|
+
def last_grade
|
143
|
+
GRADES.first
|
144
|
+
end
|
145
|
+
|
146
|
+
def start_value
|
147
|
+
-Float::INFINITY
|
148
|
+
end
|
149
|
+
|
150
|
+
def end_value
|
151
|
+
@value
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def reversed_metrics
|
157
|
+
@reversed_metrics ||= config['reversed']
|
158
|
+
end
|
159
|
+
|
160
|
+
def run_reverse
|
161
|
+
reversed_metrics.each do |reversed_metric|
|
162
|
+
GRADES.first(GRADES.count / 2).each do |grade|
|
163
|
+
grade_metrics = classifier[grade]
|
164
|
+
next unless grade_metrics[reversed_metric].present?
|
165
|
+
reversed_grade_metrics = classifier[REVERSED_GRADE[grade]]
|
166
|
+
|
167
|
+
reversed_grade_metrics[reversed_metric], grade_metrics[reversed_metric] =
|
168
|
+
grade_metrics[reversed_metric], reversed_grade_metrics[reversed_metric]
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'ossert/classifiers/growing/classifier'
|
3
|
+
require 'ossert/classifiers/growing/check'
|
4
|
+
|
5
|
+
module Ossert
|
6
|
+
module Classifiers
|
7
|
+
class Growing
|
8
|
+
GRADES = %w(
|
9
|
+
ClassA
|
10
|
+
ClassB
|
11
|
+
ClassC
|
12
|
+
ClassD
|
13
|
+
ClassE
|
14
|
+
).freeze
|
15
|
+
|
16
|
+
class << self
|
17
|
+
attr_accessor :all
|
18
|
+
|
19
|
+
def current
|
20
|
+
all.last
|
21
|
+
end
|
22
|
+
|
23
|
+
def config
|
24
|
+
@config ||= Settings['classifiers']['growth']
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
(self.class.all ||= []) << self
|
30
|
+
end
|
31
|
+
|
32
|
+
attr_reader :train_group
|
33
|
+
attr_reader :agility_total_classifier, :community_total_classifier,
|
34
|
+
:agility_last_year_classifier, :community_last_year_classifier
|
35
|
+
|
36
|
+
def ready?
|
37
|
+
agility_total_classifier.keys == GRADES && community_total_classifier.keys == GRADES
|
38
|
+
end
|
39
|
+
|
40
|
+
def reference_values_per_grade
|
41
|
+
{
|
42
|
+
agility_total: classifier_to_metrics_per_grade(agility_total_classifier),
|
43
|
+
agility_quarter: classifier_to_metrics_per_grade(agility_last_year_classifier),
|
44
|
+
agility_year: classifier_to_metrics_per_grade(agility_last_year_classifier),
|
45
|
+
community_total: classifier_to_metrics_per_grade(community_total_classifier),
|
46
|
+
community_quarter: classifier_to_metrics_per_grade(community_last_year_classifier),
|
47
|
+
community_year: classifier_to_metrics_per_grade(community_last_year_classifier)
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def classifier_to_metrics_per_grade(classifier)
|
52
|
+
classifier.each_with_object({}) do |(grade, metrics), res|
|
53
|
+
metrics.each do |metric, value|
|
54
|
+
(res[metric] ||= {})[grade] = value
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def process_using(action, project, last_year_offset = 1)
|
60
|
+
Check.send(action,
|
61
|
+
self.class.config,
|
62
|
+
project,
|
63
|
+
{
|
64
|
+
agility_total: agility_total_classifier,
|
65
|
+
community_total: community_total_classifier,
|
66
|
+
agility_last_year: agility_last_year_classifier,
|
67
|
+
community_last_year: community_last_year_classifier
|
68
|
+
},
|
69
|
+
last_year_offset)
|
70
|
+
end
|
71
|
+
|
72
|
+
def grade(*args)
|
73
|
+
process_using(*args.unshift(:grade))
|
74
|
+
end
|
75
|
+
|
76
|
+
def check(*args)
|
77
|
+
process_using(*args.unshift(:check))
|
78
|
+
end
|
79
|
+
|
80
|
+
def train
|
81
|
+
classifiers_initializer = ClassifiersInitializer.load_or_create
|
82
|
+
classifiers_initializer.run
|
83
|
+
classifiers_initializer.classifiers.each do |name, classifier|
|
84
|
+
instance_variable_set(
|
85
|
+
"@#{name}_classifier",
|
86
|
+
Classifier.new(classifier, self.class.config).train
|
87
|
+
)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
class ClassifiersInitializer
|
92
|
+
CLASSIFIERS_METRICS = {
|
93
|
+
agility_total: ->(project) { project.agility.total.metrics_to_hash },
|
94
|
+
agility_last_year: ->(project) { project.agility.quarters.last_year_as_hash },
|
95
|
+
community_total: ->(project) { project.community.total.metrics_to_hash },
|
96
|
+
community_last_year: ->(project) { project.community.quarters.last_year_as_hash }
|
97
|
+
}.freeze
|
98
|
+
|
99
|
+
def self.load_or_create
|
100
|
+
if ::Classifier.actual?
|
101
|
+
new.load
|
102
|
+
else
|
103
|
+
new(Project.projects_by_reference)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
attr_reader :classifiers
|
108
|
+
|
109
|
+
def initialize(grouped_projects = nil)
|
110
|
+
@projects = grouped_projects
|
111
|
+
@classifiers = []
|
112
|
+
end
|
113
|
+
|
114
|
+
def load
|
115
|
+
@classifiers = {}
|
116
|
+
CLASSIFIERS_METRICS.keys.each do |section|
|
117
|
+
@classifiers[section] = JSON.parse(::Classifier[section.to_s].reference_values)
|
118
|
+
end
|
119
|
+
self
|
120
|
+
end
|
121
|
+
|
122
|
+
def merge_metrics(storage, metrics)
|
123
|
+
metrics.each do |metric, value|
|
124
|
+
storage.store(
|
125
|
+
metric.to_s,
|
126
|
+
storage[metric.to_s].to_a << value.to_f
|
127
|
+
)
|
128
|
+
end
|
129
|
+
storage
|
130
|
+
end
|
131
|
+
|
132
|
+
def save
|
133
|
+
::Classifier.dataset.delete
|
134
|
+
|
135
|
+
@classifiers.each do |section, reference_values|
|
136
|
+
::Classifier.create(
|
137
|
+
section: section.to_s,
|
138
|
+
reference_values: JSON.generate(reference_values)
|
139
|
+
)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def new_classifiers
|
144
|
+
CLASSIFIERS_METRICS.keys.map { |type| [type, {}] }.to_h
|
145
|
+
end
|
146
|
+
|
147
|
+
def run
|
148
|
+
return if @classifiers.present?
|
149
|
+
|
150
|
+
@classifiers = GRADES.each_with_object(new_classifiers) do |grade, classifiers|
|
151
|
+
@projects[grade].each do |project|
|
152
|
+
CLASSIFIERS_METRICS.each do |type, metrics|
|
153
|
+
classifiers[type][grade] = merge_metrics(classifiers[type][grade].to_h, metrics.call(project))
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
save
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'ossert/classifiers/decision_tree'
|
3
|
+
require 'ossert/classifiers/growing'
|
4
|
+
|
5
|
+
module Ossert
|
6
|
+
module Classifiers
|
7
|
+
def train
|
8
|
+
Growing.new.train
|
9
|
+
# Stale. Very untrusty
|
10
|
+
# DecisionTree.new(projects_by_reference).train
|
11
|
+
end
|
12
|
+
module_function :train
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
module Ossert
|
5
|
+
class Config
|
6
|
+
CONFIG_ROOT = File.join(File.dirname(__FILE__), '..', '..', 'config')
|
7
|
+
CONST_NAME = 'Settings'
|
8
|
+
|
9
|
+
# TODO: use ERB, needed for Time syntetics
|
10
|
+
def self.load(*configs)
|
11
|
+
config_data = configs.each_with_object({}) do |config, result|
|
12
|
+
if (path = File.join(CONFIG_ROOT, "#{config}.yml")) && File.exist?(path.to_s)
|
13
|
+
result.merge! YAML.load(IO.read(path.to_s))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
Kernel.send(:remove_const, CONST_NAME) if Kernel.const_defined?(CONST_NAME)
|
18
|
+
Kernel.const_set(CONST_NAME, config_data)
|
19
|
+
rescue Psych::SyntaxError => e
|
20
|
+
raise "YAML syntax error occurred while parsing #{path}. " \
|
21
|
+
"Error: #{e.message}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Ossert
|
3
|
+
module Fetch
|
4
|
+
class BestgemsBase
|
5
|
+
def self.process_page(page = nil)
|
6
|
+
doc = Nokogiri::HTML(open("http://bestgems.org/#{endpoint}#{page ? "?page=#{page}" : ''}"))
|
7
|
+
doc.css('table').xpath('//tr//td').each_slice(4) do |rank, downloads, name, _|
|
8
|
+
rank = rank.text.delete(',').to_i
|
9
|
+
downloads = downloads.text.delete(',').to_i
|
10
|
+
yield(rank, downloads, name.text)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class BestgemsDailyStat < BestgemsBase
|
16
|
+
def self.endpoint
|
17
|
+
:daily
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class BestgemsTotalStat < BestgemsBase
|
22
|
+
def self.endpoint
|
23
|
+
:total
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class Bestgems
|
28
|
+
attr_reader :client, :project
|
29
|
+
|
30
|
+
extend Forwardable
|
31
|
+
def_delegators :project, :agility, :community, :meta
|
32
|
+
|
33
|
+
def initialize(project)
|
34
|
+
@client = SimpleClient.new('http://bestgems.org/api/v1/')
|
35
|
+
@project = project
|
36
|
+
end
|
37
|
+
|
38
|
+
def total_downloads
|
39
|
+
client.get("gems/#{project.rubygems_alias}/total_downloads.json")
|
40
|
+
end
|
41
|
+
|
42
|
+
def daily_downloads
|
43
|
+
client.get("gems/#{project.rubygems_alias}/daily_downloads.json")
|
44
|
+
end
|
45
|
+
|
46
|
+
def total_ranking
|
47
|
+
client.get("gems/#{project.rubygems_alias}/total_ranking.json")
|
48
|
+
end
|
49
|
+
|
50
|
+
def daily_ranking
|
51
|
+
client.get("gems/#{project.rubygems_alias}/daily_ranking.json")
|
52
|
+
end
|
53
|
+
|
54
|
+
def process
|
55
|
+
process_total_downloads
|
56
|
+
process_daily_downloads
|
57
|
+
process_delta_downloads
|
58
|
+
end
|
59
|
+
|
60
|
+
def process_total_downloads
|
61
|
+
@downloads_till_now = nil
|
62
|
+
total_downloads.each do |total|
|
63
|
+
@downloads_till_now = total unless @downloads_till_now
|
64
|
+
quarter_downloads = community.quarters[total['date']]
|
65
|
+
|
66
|
+
quarter_downloads.total_downloads = [
|
67
|
+
quarter_downloads.total_downloads.to_i,
|
68
|
+
total['total_downloads']
|
69
|
+
].max
|
70
|
+
end
|
71
|
+
community.total.total_downloads = @downloads_till_now['total_downloads']
|
72
|
+
end
|
73
|
+
|
74
|
+
def process_daily_downloads
|
75
|
+
daily_downloads.each do |daily|
|
76
|
+
downloads_saved = community.quarters[daily['date']].delta_downloads.to_i
|
77
|
+
community.quarters[daily['date']].delta_downloads = downloads_saved + daily['daily_downloads']
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def process_delta_downloads
|
82
|
+
prev_downloads_delta = 0
|
83
|
+
community.quarters.each_sorted do |start_date, stat|
|
84
|
+
prev_downloads_delta = stat.delta_downloads.to_i - prev_downloads_delta
|
85
|
+
community.quarters[start_date].download_divergence = divergence(
|
86
|
+
prev_downloads_delta, @downloads_till_now['total_downloads']
|
87
|
+
)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def divergence(delta, total)
|
94
|
+
(delta.to_f / total.to_f * 100.0).round(2)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|