ossert 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rspec +2 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +16 -0
  8. data/Gemfile +8 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +199 -0
  11. data/Rakefile +12 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/config/classifiers.yml +153 -0
  15. data/config/descriptions.yml +45 -0
  16. data/config/sidekiq.rb +15 -0
  17. data/config/stats.yml +198 -0
  18. data/config/translations.yml +44 -0
  19. data/db/backups/.keep +0 -0
  20. data/db/migrate/001_create_projects.rb +22 -0
  21. data/db/migrate/002_create_exceptions.rb +14 -0
  22. data/db/migrate/003_add_meta_to_projects.rb +14 -0
  23. data/db/migrate/004_add_timestamps_to_projects.rb +12 -0
  24. data/db/migrate/005_create_classifiers.rb +19 -0
  25. data/lib/ossert/classifiers/decision_tree.rb +112 -0
  26. data/lib/ossert/classifiers/growing/check.rb +172 -0
  27. data/lib/ossert/classifiers/growing/classifier.rb +175 -0
  28. data/lib/ossert/classifiers/growing.rb +163 -0
  29. data/lib/ossert/classifiers.rb +14 -0
  30. data/lib/ossert/config.rb +24 -0
  31. data/lib/ossert/fetch/bestgems.rb +98 -0
  32. data/lib/ossert/fetch/github.rb +536 -0
  33. data/lib/ossert/fetch/rubygems.rb +80 -0
  34. data/lib/ossert/fetch.rb +142 -0
  35. data/lib/ossert/presenters/project.rb +202 -0
  36. data/lib/ossert/presenters/project_v2.rb +117 -0
  37. data/lib/ossert/presenters.rb +8 -0
  38. data/lib/ossert/project.rb +144 -0
  39. data/lib/ossert/quarters_store.rb +164 -0
  40. data/lib/ossert/rake_tasks.rb +6 -0
  41. data/lib/ossert/reference.rb +87 -0
  42. data/lib/ossert/repositories.rb +138 -0
  43. data/lib/ossert/saveable.rb +153 -0
  44. data/lib/ossert/stats/agility_quarter.rb +62 -0
  45. data/lib/ossert/stats/agility_total.rb +71 -0
  46. data/lib/ossert/stats/base.rb +113 -0
  47. data/lib/ossert/stats/community_quarter.rb +28 -0
  48. data/lib/ossert/stats/community_total.rb +24 -0
  49. data/lib/ossert/stats.rb +32 -0
  50. data/lib/ossert/tasks/database.rake +179 -0
  51. data/lib/ossert/tasks/ossert.rake +52 -0
  52. data/lib/ossert/version.rb +4 -0
  53. data/lib/ossert/workers/fetch.rb +21 -0
  54. data/lib/ossert/workers/fetch_bestgems_page.rb +32 -0
  55. data/lib/ossert/workers/refresh_fetch.rb +22 -0
  56. data/lib/ossert/workers/sync_rubygems.rb +0 -0
  57. data/lib/ossert/workers.rb +11 -0
  58. data/lib/ossert.rb +63 -0
  59. data/ossert.gemspec +47 -0
  60. metadata +396 -0
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+ module Ossert
3
+ module Classifiers
4
+ class Growing
5
+ class Classifier
6
+ GRADES = %w(
7
+ ClassA
8
+ ClassB
9
+ ClassC
10
+ ClassD
11
+ ClassE
12
+ ).freeze
13
+ REVERSED_GRADE = {
14
+ 'ClassA' => 'ClassE',
15
+ 'ClassB' => 'ClassD',
16
+ 'ClassC' => 'ClassC',
17
+ 'ClassD' => 'ClassB',
18
+ 'ClassE' => 'ClassA'
19
+ }.freeze
20
+
21
+ attr_reader :classifier, :config
22
+ def initialize(classifier, config)
23
+ @classifier = classifier
24
+ @config = config
25
+ end
26
+
27
+ def train
28
+ run_aggregation
29
+ run_syntetics
30
+ run_values_to_ranges
31
+ run_reverse
32
+
33
+ classifier
34
+ end
35
+
36
+ def run_aggregation
37
+ GRADES.each_with_index do |grade, idx|
38
+ classifier[grade].each_pair do |metric, values|
39
+ sibling_class_values = []
40
+ sibling_class_values = classifier[GRADES[idx + 1]][metric] if (idx + 1) < GRADES.count
41
+
42
+ all_values = sibling_class_values + values
43
+ (classifier[grade][metric] = (values.max || 0)) && next if all_values.count <= 2
44
+ classifier[grade][metric] = (all_values.sum / all_values.count).round(2)
45
+ end
46
+ end
47
+ end
48
+
49
+ def run_syntetics
50
+ config['syntetics'].each do |synt_metric, values_range|
51
+ real_values = classifier.values.map { |metrics| metrics[synt_metric] }.compact
52
+ next if real_values.empty?
53
+
54
+ values_range = Array.wrap(values_range)
55
+ values_range = values_range.reverse if reversed_metrics.include? synt_metric
56
+ step_threshold = syntetic_step_threshold(values_range)
57
+
58
+ GRADES.each_with_index do |grade, idx|
59
+ classifier[grade][synt_metric] = step_threshold.call(idx)
60
+ end
61
+ end
62
+ end
63
+
64
+ def syntetic_step_threshold(values_range)
65
+ return ->(idx) { values_range[idx] } if values_range.count == GRADES.count
66
+ max_value, min_value = values_range
67
+ min_value = min_value.to_i
68
+ max_value = max_value.to_i
69
+ step = ((max_value - min_value) / GRADES.count.to_f).round(2)
70
+ ->(idx) { max_value - (step * idx).round(2) }
71
+ end
72
+
73
+ def run_values_to_ranges
74
+ GRADES.each do |grade|
75
+ classifier[grade].each_pair do |metric, value|
76
+ classifier[grade][metric] = {
77
+ threshold: value,
78
+ range: ThresholdToRange.range_for(metric, value, grade)
79
+ }
80
+ end
81
+ end
82
+ end
83
+
84
+ class ThresholdToRange
85
+ def self.range_for(metric, value, grade)
86
+ new(metric, value, grade).range
87
+ end
88
+
89
+ def initialize(metric, value, grade)
90
+ @metric = metric
91
+ @value = value
92
+ @grade = grade
93
+ end
94
+
95
+ def range
96
+ if reversed_metrics.include?(@metric)
97
+ Reversed.new(@value, @grade).range
98
+ else
99
+ Base.new(@value, @grade).range
100
+ end
101
+ end
102
+
103
+ def reversed_metrics
104
+ @reversed_metrics ||= Ossert::Classifiers::Growing.config['reversed']
105
+ end
106
+
107
+ class Base
108
+ def initialize(value, grade)
109
+ @value = value
110
+ @full_range = (grade == last_grade)
111
+ end
112
+
113
+ def range
114
+ return full_range if full_range?
115
+ start_value...end_value
116
+ end
117
+
118
+ private
119
+
120
+ def full_range?
121
+ @full_range
122
+ end
123
+
124
+ def last_grade
125
+ GRADES.last
126
+ end
127
+
128
+ def full_range
129
+ -Float::INFINITY...Float::INFINITY
130
+ end
131
+
132
+ def start_value
133
+ @value
134
+ end
135
+
136
+ def end_value
137
+ Float::INFINITY
138
+ end
139
+ end
140
+
141
+ class Reversed < Base
142
+ def last_grade
143
+ GRADES.first
144
+ end
145
+
146
+ def start_value
147
+ -Float::INFINITY
148
+ end
149
+
150
+ def end_value
151
+ @value
152
+ end
153
+ end
154
+ end
155
+
156
+ def reversed_metrics
157
+ @reversed_metrics ||= config['reversed']
158
+ end
159
+
160
+ def run_reverse
161
+ reversed_metrics.each do |reversed_metric|
162
+ GRADES.first(GRADES.count / 2).each do |grade|
163
+ grade_metrics = classifier[grade]
164
+ next unless grade_metrics[reversed_metric].present?
165
+ reversed_grade_metrics = classifier[REVERSED_GRADE[grade]]
166
+
167
+ reversed_grade_metrics[reversed_metric], grade_metrics[reversed_metric] =
168
+ grade_metrics[reversed_metric], reversed_grade_metrics[reversed_metric]
169
+ end
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+ require 'ossert/classifiers/growing/classifier'
3
+ require 'ossert/classifiers/growing/check'
4
+
5
+ module Ossert
6
+ module Classifiers
7
+ class Growing
8
+ GRADES = %w(
9
+ ClassA
10
+ ClassB
11
+ ClassC
12
+ ClassD
13
+ ClassE
14
+ ).freeze
15
+
16
+ class << self
17
+ attr_accessor :all
18
+
19
+ def current
20
+ all.last
21
+ end
22
+
23
+ def config
24
+ @config ||= Settings['classifiers']['growth']
25
+ end
26
+ end
27
+
28
+ def initialize
29
+ (self.class.all ||= []) << self
30
+ end
31
+
32
+ attr_reader :train_group
33
+ attr_reader :agility_total_classifier, :community_total_classifier,
34
+ :agility_last_year_classifier, :community_last_year_classifier
35
+
36
+ def ready?
37
+ agility_total_classifier.keys == GRADES && community_total_classifier.keys == GRADES
38
+ end
39
+
40
+ def reference_values_per_grade
41
+ {
42
+ agility_total: classifier_to_metrics_per_grade(agility_total_classifier),
43
+ agility_quarter: classifier_to_metrics_per_grade(agility_last_year_classifier),
44
+ agility_year: classifier_to_metrics_per_grade(agility_last_year_classifier),
45
+ community_total: classifier_to_metrics_per_grade(community_total_classifier),
46
+ community_quarter: classifier_to_metrics_per_grade(community_last_year_classifier),
47
+ community_year: classifier_to_metrics_per_grade(community_last_year_classifier)
48
+ }
49
+ end
50
+
51
+ def classifier_to_metrics_per_grade(classifier)
52
+ classifier.each_with_object({}) do |(grade, metrics), res|
53
+ metrics.each do |metric, value|
54
+ (res[metric] ||= {})[grade] = value
55
+ end
56
+ end
57
+ end
58
+
59
+ def process_using(action, project, last_year_offset = 1)
60
+ Check.send(action,
61
+ self.class.config,
62
+ project,
63
+ {
64
+ agility_total: agility_total_classifier,
65
+ community_total: community_total_classifier,
66
+ agility_last_year: agility_last_year_classifier,
67
+ community_last_year: community_last_year_classifier
68
+ },
69
+ last_year_offset)
70
+ end
71
+
72
+ def grade(*args)
73
+ process_using(*args.unshift(:grade))
74
+ end
75
+
76
+ def check(*args)
77
+ process_using(*args.unshift(:check))
78
+ end
79
+
80
+ def train
81
+ classifiers_initializer = ClassifiersInitializer.load_or_create
82
+ classifiers_initializer.run
83
+ classifiers_initializer.classifiers.each do |name, classifier|
84
+ instance_variable_set(
85
+ "@#{name}_classifier",
86
+ Classifier.new(classifier, self.class.config).train
87
+ )
88
+ end
89
+ end
90
+
91
+ class ClassifiersInitializer
92
+ CLASSIFIERS_METRICS = {
93
+ agility_total: ->(project) { project.agility.total.metrics_to_hash },
94
+ agility_last_year: ->(project) { project.agility.quarters.last_year_as_hash },
95
+ community_total: ->(project) { project.community.total.metrics_to_hash },
96
+ community_last_year: ->(project) { project.community.quarters.last_year_as_hash }
97
+ }.freeze
98
+
99
+ def self.load_or_create
100
+ if ::Classifier.actual?
101
+ new.load
102
+ else
103
+ new(Project.projects_by_reference)
104
+ end
105
+ end
106
+
107
+ attr_reader :classifiers
108
+
109
+ def initialize(grouped_projects = nil)
110
+ @projects = grouped_projects
111
+ @classifiers = []
112
+ end
113
+
114
+ def load
115
+ @classifiers = {}
116
+ CLASSIFIERS_METRICS.keys.each do |section|
117
+ @classifiers[section] = JSON.parse(::Classifier[section.to_s].reference_values)
118
+ end
119
+ self
120
+ end
121
+
122
+ def merge_metrics(storage, metrics)
123
+ metrics.each do |metric, value|
124
+ storage.store(
125
+ metric.to_s,
126
+ storage[metric.to_s].to_a << value.to_f
127
+ )
128
+ end
129
+ storage
130
+ end
131
+
132
+ def save
133
+ ::Classifier.dataset.delete
134
+
135
+ @classifiers.each do |section, reference_values|
136
+ ::Classifier.create(
137
+ section: section.to_s,
138
+ reference_values: JSON.generate(reference_values)
139
+ )
140
+ end
141
+ end
142
+
143
+ def new_classifiers
144
+ CLASSIFIERS_METRICS.keys.map { |type| [type, {}] }.to_h
145
+ end
146
+
147
+ def run
148
+ return if @classifiers.present?
149
+
150
+ @classifiers = GRADES.each_with_object(new_classifiers) do |grade, classifiers|
151
+ @projects[grade].each do |project|
152
+ CLASSIFIERS_METRICS.each do |type, metrics|
153
+ classifiers[type][grade] = merge_metrics(classifiers[type][grade].to_h, metrics.call(project))
154
+ end
155
+ end
156
+ end
157
+
158
+ save
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ require 'ossert/classifiers/decision_tree'
3
+ require 'ossert/classifiers/growing'
4
+
5
+ module Ossert
6
+ module Classifiers
7
+ def train
8
+ Growing.new.train
9
+ # Stale. Very untrusty
10
+ # DecisionTree.new(projects_by_reference).train
11
+ end
12
+ module_function :train
13
+ end
14
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+ require 'yaml'
3
+
4
+ module Ossert
5
+ class Config
6
+ CONFIG_ROOT = File.join(File.dirname(__FILE__), '..', '..', 'config')
7
+ CONST_NAME = 'Settings'
8
+
9
+ # TODO: use ERB, needed for Time syntetics
10
+ def self.load(*configs)
11
+ config_data = configs.each_with_object({}) do |config, result|
12
+ if (path = File.join(CONFIG_ROOT, "#{config}.yml")) && File.exist?(path.to_s)
13
+ result.merge! YAML.load(IO.read(path.to_s))
14
+ end
15
+ end
16
+
17
+ Kernel.send(:remove_const, CONST_NAME) if Kernel.const_defined?(CONST_NAME)
18
+ Kernel.const_set(CONST_NAME, config_data)
19
+ rescue Psych::SyntaxError => e
20
+ raise "YAML syntax error occurred while parsing #{path}. " \
21
+ "Error: #{e.message}"
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+ module Ossert
3
+ module Fetch
4
+ class BestgemsBase
5
+ def self.process_page(page = nil)
6
+ doc = Nokogiri::HTML(open("http://bestgems.org/#{endpoint}#{page ? "?page=#{page}" : ''}"))
7
+ doc.css('table').xpath('//tr//td').each_slice(4) do |rank, downloads, name, _|
8
+ rank = rank.text.delete(',').to_i
9
+ downloads = downloads.text.delete(',').to_i
10
+ yield(rank, downloads, name.text)
11
+ end
12
+ end
13
+ end
14
+
15
+ class BestgemsDailyStat < BestgemsBase
16
+ def self.endpoint
17
+ :daily
18
+ end
19
+ end
20
+
21
+ class BestgemsTotalStat < BestgemsBase
22
+ def self.endpoint
23
+ :total
24
+ end
25
+ end
26
+
27
+ class Bestgems
28
+ attr_reader :client, :project
29
+
30
+ extend Forwardable
31
+ def_delegators :project, :agility, :community, :meta
32
+
33
+ def initialize(project)
34
+ @client = SimpleClient.new('http://bestgems.org/api/v1/')
35
+ @project = project
36
+ end
37
+
38
+ def total_downloads
39
+ client.get("gems/#{project.rubygems_alias}/total_downloads.json")
40
+ end
41
+
42
+ def daily_downloads
43
+ client.get("gems/#{project.rubygems_alias}/daily_downloads.json")
44
+ end
45
+
46
+ def total_ranking
47
+ client.get("gems/#{project.rubygems_alias}/total_ranking.json")
48
+ end
49
+
50
+ def daily_ranking
51
+ client.get("gems/#{project.rubygems_alias}/daily_ranking.json")
52
+ end
53
+
54
+ def process
55
+ process_total_downloads
56
+ process_daily_downloads
57
+ process_delta_downloads
58
+ end
59
+
60
+ def process_total_downloads
61
+ @downloads_till_now = nil
62
+ total_downloads.each do |total|
63
+ @downloads_till_now = total unless @downloads_till_now
64
+ quarter_downloads = community.quarters[total['date']]
65
+
66
+ quarter_downloads.total_downloads = [
67
+ quarter_downloads.total_downloads.to_i,
68
+ total['total_downloads']
69
+ ].max
70
+ end
71
+ community.total.total_downloads = @downloads_till_now['total_downloads']
72
+ end
73
+
74
+ def process_daily_downloads
75
+ daily_downloads.each do |daily|
76
+ downloads_saved = community.quarters[daily['date']].delta_downloads.to_i
77
+ community.quarters[daily['date']].delta_downloads = downloads_saved + daily['daily_downloads']
78
+ end
79
+ end
80
+
81
+ def process_delta_downloads
82
+ prev_downloads_delta = 0
83
+ community.quarters.each_sorted do |start_date, stat|
84
+ prev_downloads_delta = stat.delta_downloads.to_i - prev_downloads_delta
85
+ community.quarters[start_date].download_divergence = divergence(
86
+ prev_downloads_delta, @downloads_till_now['total_downloads']
87
+ )
88
+ end
89
+ end
90
+
91
+ private
92
+
93
+ def divergence(delta, total)
94
+ (delta.to_f / total.to_f * 100.0).round(2)
95
+ end
96
+ end
97
+ end
98
+ end