puppet-community-mvp 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8ed5308091443f5847159a6a481611fba281f4d5
4
- data.tar.gz: f7bb0dd50ea248c04b5809144d67355ad7c6c202
3
+ metadata.gz: '082fa42831056dfcb39a9cf587c9128728ede8af'
4
+ data.tar.gz: c401376ae86adfc9ffcf2631cd607bcd1bbe8c6e
5
5
  SHA512:
6
- metadata.gz: ececdc2a2121c4054fc49b16385892e78364b9ed197b2ac3e38a5542de2f5be94cf52ac9e6d6e1590c7e91b912fa54f4e9a70e71e60ae831fe545b98731021ee
7
- data.tar.gz: 5f87defac101d2105403c0b5b54d34c37cac1b552fc77a2008d7d62624df41c8d1d4813be9c2cf15ded62942d6e0cbc70f36862d39866caa2bd77d6d9528aba0
6
+ metadata.gz: 396050b127e436e2c020836a051426a43c025ed3452983e42450f186ee1b04486c0efb7adfbd010a658a43865c4173cd6262ae16308363fd9847fa2969269f25
7
+ data.tar.gz: 35af39cff28f02378e7b7da71130c76a01cb06e4ec73b884b0ada051b3414c580e5fa578d606096d613940da70b3d3feb416ed5c23f77aa3238fbdaa894a9ca3
data/bin/mvp CHANGED
@@ -8,7 +8,7 @@ require 'logger'
8
8
  require 'mvp'
9
9
 
10
10
  NAME = File.basename($PROGRAM_NAME)
11
- options = {:config => File.expand_path('~/.mvp.config.yaml')}
11
+ options = {:config => File.expand_path('~/.mvp/config.yaml')}
12
12
  optparse = OptionParser.new { |opts|
13
13
  opts.banner = "Usage : #{NAME} [command] [target] [options]
14
14
 
@@ -21,6 +21,9 @@ The following CLI commands are available.
21
21
  * upload | insert [target]
22
22
  * Uploads data to BigQuery
23
23
  * Optional targets: all, authors, modules, releases, mirrors
24
+ * mirror [target]
25
+ * Runs the download & then upload tasks.
26
+ * Optional targets: all, authors, modules, releases
24
27
  * stats
25
28
  * Print out a summary of interesting stats.
26
29
  "
@@ -80,6 +83,7 @@ options[:gcloud][:project] ||= 'puppet'
80
83
  options[:gcloud][:keyfile] ||= '~/.mvp/credentials.json'
81
84
 
82
85
  options[:cachedir] = File.expand_path(options[:cachedir])
86
+ options[:github_data] = File.expand_path(options[:github_data])
83
87
  options[:gcloud][:keyfile] = File.expand_path(options[:gcloud][:keyfile])
84
88
  FileUtils.mkdir_p(options[:cachedir])
85
89
 
@@ -7,15 +7,35 @@ require 'mvp/monkeypatches'
7
7
  class Mvp
8
8
  class Downloader
9
9
  def initialize(options = {})
10
- @cachedir = options[:cachedir]
11
- @forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
10
+ @useragent = 'Puppet Community Stats Monitor'
11
+ @cachedir = options[:cachedir]
12
+ @forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
13
+ end
14
+
15
+ def mirror(entity, uploader)
16
+ # using authors for git repo terminology consistency
17
+ item = (entity == :authors) ? 'users' : entity.to_s
18
+ download(item) do |data|
19
+ case entity
20
+ when :modules
21
+ uploader.insert(:validations, flatten_validations(retrieve_validations(data)))
22
+ data = flatten_modules(data)
23
+ when :releases
24
+ data = flatten_releases(data)
25
+ end
26
+
27
+ uploader.insert(entity, data)
28
+ end
12
29
  end
13
30
 
14
31
  def retrieve(entity, download = true)
15
32
  if download
16
33
  # I am focusing on authorship rather than just users, so for now I'm using the word authors
17
34
  item = (entity == :authors) ? 'users' : entity.to_s
18
- data = download(item)
35
+ data = []
36
+ download(item) do |resp|
37
+ data.concat resp
38
+ end
19
39
  save_json(entity, data)
20
40
  else
21
41
  data = File.read("#{@cachedir}/#{entity}.json")
@@ -30,9 +50,35 @@ class Mvp
30
50
  save_nld_json(entity.to_s, data)
31
51
  end
32
52
 
33
- def validations()
53
+ def retrieve_validations(modules, period = 25)
34
54
  results = {}
35
- cache = "#{@cachedir}/modules.json"
55
+
56
+ begin
57
+ offset = 0
58
+ endpoint = "/private/validations/"
59
+ modules.each do |mod|
60
+ name = "#{mod['owner']['username']}-#{mod['name']}"
61
+ response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
62
+ raise "Forge Error: #{@response.body}" unless response.code == 200
63
+
64
+ results[name] = JSON.parse(response.body)
65
+ offset += 1
66
+
67
+ if block_given? and (offset % period == 0)
68
+ yield offset
69
+ GC.start
70
+ end
71
+ end
72
+ rescue => e
73
+ $logger.error e.message
74
+ $logger.debug e.backtrace.join("\n")
75
+ end
76
+
77
+ results
78
+ end
79
+
80
+ def validations()
81
+ cache = "#{@cachedir}/modules.json"
36
82
 
37
83
  if File.exist? cache
38
84
  module_data = JSON.parse(File.read(cache))
@@ -41,22 +87,12 @@ class Mvp
41
87
  end
42
88
 
43
89
  begin
44
- offset = 0
45
- endpoint = "/private/validations/"
46
- spinner = TTY::Spinner.new("[:spinner] :title")
90
+ spinner = TTY::Spinner.new("[:spinner] :title")
47
91
  spinner.update(title: "Downloading module validations ...")
48
92
  spinner.auto_spin
49
93
 
50
- module_data.each do |mod|
51
- name = "#{mod['owner']['username']}-#{mod['name']}"
52
- response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {"User-Agent" => "Puppet Community Stats Monitor"})
53
- raise "Forge Error: #{@response.body}" unless response.code == 200
54
-
55
- data = JSON.parse(response.body)
56
- offset += 1
57
- results[name] = data
58
-
59
- spinner.update(title: "Downloading module validations [#{offset}]...") if (offset % 25 == 0)
94
+ results = retrieve_validations(module_data) do |offset|
95
+ spinner.update(title: "Downloading module validations [#{offset}]...")
60
96
  end
61
97
 
62
98
  spinner.success('(OK)')
@@ -72,7 +108,7 @@ class Mvp
72
108
  end
73
109
 
74
110
  def download(entity)
75
- results = []
111
+ raise 'Please process downloaded data by passing a block' unless block_given?
76
112
 
77
113
  begin
78
114
  offset = 0
@@ -82,15 +118,19 @@ class Mvp
82
118
  spinner.auto_spin
83
119
 
84
120
  while endpoint do
85
- response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => "Puppet Community Stats Monitor"})
121
+ response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
86
122
  raise "Forge Error: #{@response.body}" unless response.code == 200
87
-
88
123
  data = JSON.parse(response.body)
124
+
89
125
  offset += 50
90
- results += data['results']
91
126
  endpoint = data['pagination']['next']
92
127
 
93
- spinner.update(title: "Downloading #{entity} [#{offset}]...") if (endpoint and (offset % 250 == 0))
128
+ yield munge_dates(data['results'])
129
+
130
+ if (endpoint and (offset % 250 == 0))
131
+ spinner.update(title: "Downloading #{entity} [#{offset}]...")
132
+ GC.start
133
+ end
94
134
  end
95
135
 
96
136
  spinner.success('(OK)')
@@ -100,7 +140,7 @@ class Mvp
100
140
  $logger.debug e.backtrace.join("\n")
101
141
  end
102
142
 
103
- munge_dates(results)
143
+ nil
104
144
  end
105
145
 
106
146
  # transform dates into a format that bigquery knows
@@ -138,7 +178,7 @@ class Mvp
138
178
  row['source'] = row['current_release']['metadata']['source']
139
179
  row['project_page'] = row['current_release']['metadata']['project_page']
140
180
  row['issues_url'] = row['current_release']['metadata']['issues_url']
141
- row['tasks'] = row['current_release']['tasks'].map{|task| task['name']}
181
+ row['tasks'] = row['current_release']['tasks'].map{|task| task['name']} rescue []
142
182
 
143
183
  row['release_count'] = row['releases'].count rescue 0
144
184
  row['releases'] = row['releases'].map{|r| r['version']} rescue []
@@ -152,12 +192,12 @@ class Mvp
152
192
  def flatten_releases(data)
153
193
  data.each do |row|
154
194
  row['name'] = row['module']['name']
155
- row['owner'] = row['module']['username']
195
+ row['owner'] = row['module']['owner']['username']
156
196
  row['license'] = row['metadata']['license']
157
197
  row['source'] = row['metadata']['source']
158
198
  row['project_page'] = row['metadata']['project_page']
159
199
  row['issues_url'] = row['metadata']['issues_url']
160
- row['tasks'] = row['tasks'].map{|task| task['name']}
200
+ row['tasks'] = row['tasks'].map{|task| task['name']} rescue []
161
201
 
162
202
  simplify_metadata(row, row['metadata'])
163
203
  row.delete('module')
@@ -176,8 +216,8 @@ class Mvp
176
216
  end
177
217
 
178
218
  def simplify_metadata(data, metadata)
179
- data['operatingsystem'] = metadata['operatingsystem_support'].map{|i| i['operatingsystem']} rescue nil
180
- data['dependencies'] = metadata['dependencies'].map{|i| i['name']} rescue nil
219
+ data['operatingsystem'] = metadata['operatingsystem_support'].map{|i| i['operatingsystem']} rescue []
220
+ data['dependencies'] = metadata['dependencies'].map{|i| i['name'].sub('/', '-')} rescue []
181
221
  data['puppet_range'] = metadata['requirements'].select{|r| r['name'] == 'puppet'}.first['version_requirement'] rescue nil
182
222
  data['metadata'] = metadata.to_json
183
223
 
@@ -26,15 +26,26 @@ class Mvp
26
26
  def upload(target = :all)
27
27
  uploader = Mvp::Uploader.new(@options)
28
28
 
29
- [:authors, :modules, :releases, :validations, :mirrors].each do |thing|
29
+ [:authors, :modules, :releases, :validations, :github_mirrors].each do |thing|
30
30
  next unless [:all, thing].include? target
31
31
  uploader.send(thing)
32
32
  end
33
33
  end
34
34
 
35
35
  def mirror(target = :all)
36
- retrieve(target)
37
- upload(target)
36
+ downloader = Mvp::Downloader.new(@options)
37
+ uploader = Mvp::Uploader.new(@options)
38
+
39
+ # validations are downloaded with modules
40
+ [:authors, :modules, :releases].each do |thing|
41
+ next unless [:all, thing].include? target
42
+ uploader.truncate(thing)
43
+ downloader.mirror(thing, uploader)
44
+ end
45
+
46
+ if [:all, :mirrors].include? target
47
+ uploader.github_mirrors()
48
+ end
38
49
  end
39
50
 
40
51
  def stats(target)
@@ -12,6 +12,139 @@ class Mvp
12
12
  :credentials => Google::Cloud::Bigquery::Credentials.new(options[:gcloud][:keyfile]),
13
13
  )
14
14
  @dataset = @bigquery.dataset(options[:gcloud][:dataset])
15
+
16
+ raise "\nThere is a problem with the gCloud configuration: \n #{JSON.pretty_generate(options)}" if @dataset.nil?
17
+ end
18
+
19
+ def truncate(entity)
20
+ begin
21
+ case entity
22
+ when :authors
23
+ @dataset.table('forge_authors').delete rescue nil
24
+ @dataset.create_table('forge_authors') do |table|
25
+ table.name = 'Forge Authors'
26
+ table.description = 'A list of all authors (users) on the Forge'
27
+ table.schema do |s|
28
+ s.integer "module_count", mode: :required
29
+ s.integer "release_count", mode: :required
30
+ s.timestamp "created_at", mode: :required
31
+ s.string "display_name", mode: :required
32
+ s.string "username", mode: :required
33
+ s.timestamp "updated_at", mode: :required
34
+ s.string "gravatar_id", mode: :required
35
+ s.string "slug", mode: :required
36
+ s.string "uri", mode: :required
37
+ end
38
+ end
39
+
40
+ when :modules
41
+ # both modules and validations
42
+ @dataset.table('forge_modules').delete rescue nil
43
+ @dataset.create_table('forge_modules') do |table|
44
+ table.name = 'Forge Modules'
45
+ table.description = 'All modules and their metadata on the Forge'
46
+ table.schema do |s|
47
+ s.string "name", mode: :required
48
+ s.string "owner", mode: :required
49
+ s.string "version", mode: :required
50
+ s.string "slug", mode: :required
51
+ s.string "uri", mode: :required
52
+ s.timestamp "created_at", mode: :required
53
+ s.timestamp "updated_at", mode: :required
54
+ s.string "tasks", mode: :repeated
55
+ s.string "homepage_url"
56
+ s.string "project_page"
57
+ s.string "issues_url"
58
+ s.string "source"
59
+ s.boolean "supported"
60
+ s.string "endorsement"
61
+ s.string "module_group"
62
+ s.boolean "pdk"
63
+ s.string "operatingsystem", mode: :repeated
64
+ s.integer "release_count", mode: :required
65
+ s.integer "downloads", mode: :required
66
+ s.integer "feedback_score"
67
+ s.integer "validation_score"
68
+ s.string "releases", mode: :repeated
69
+ s.string "puppet_range"
70
+ s.boolean "puppet_2x"
71
+ s.boolean "puppet_3x"
72
+ s.boolean "puppet_4x"
73
+ s.boolean "puppet_5x"
74
+ s.boolean "puppet_6x"
75
+ s.string "superseded_by"
76
+ s.string "deprecated_for"
77
+ s.timestamp "deprecated_at"
78
+ s.timestamp "deleted_at"
79
+ s.string "dependencies", mode: :repeated
80
+ s.string "license"
81
+ s.string "metadata", mode: :required
82
+ end
83
+ end
84
+
85
+ @dataset.table('forge_validations').delete rescue nil
86
+ @dataset.create_table('forge_validations') do |table|
87
+ table.name = 'Forge Module Validations'
88
+ table.description = 'Validation scores for all the modules on the Forge'
89
+ table.schema do |s|
90
+ s.integer "total"
91
+ s.integer "parser"
92
+ s.integer "metadata"
93
+ s.integer "lint"
94
+ s.string "name", mode: :required
95
+ end
96
+ end
97
+
98
+ when :releases
99
+ @dataset.table('forge_releases').delete rescue nil
100
+ @dataset.create_table('forge_releases') do |table|
101
+ table.name = 'Forge Releases'
102
+ table.description = 'Releases of all modules on the Forge'
103
+ table.schema do |s|
104
+ s.string "name", mode: :required
105
+ s.string "owner", mode: :required
106
+ s.string "version", mode: :required
107
+ s.string "slug", mode: :required
108
+ s.string "uri", mode: :required
109
+ s.timestamp "created_at", mode: :required
110
+ s.timestamp "updated_at", mode: :required
111
+ s.timestamp "deleted_at"
112
+ s.string "deleted_for"
113
+ s.string "tasks", mode: :repeated
114
+ s.string "project_page"
115
+ s.string "issues_url"
116
+ s.string "source"
117
+ s.boolean "supported"
118
+ s.boolean "pdk"
119
+ s.string "tags", mode: :repeated
120
+ s.string "operatingsystem", mode: :repeated
121
+ s.integer "downloads", mode: :required
122
+ s.integer "feedback_score"
123
+ s.integer "validation_score"
124
+ s.string "puppet_range"
125
+ s.boolean "puppet_2x"
126
+ s.boolean "puppet_3x"
127
+ s.boolean "puppet_4x"
128
+ s.boolean "puppet_5x"
129
+ s.boolean "puppet_6x"
130
+ s.string "dependencies", mode: :repeated
131
+ s.string "file_uri", mode: :required
132
+ s.string "file_md5", mode: :required
133
+ s.integer "file_size", mode: :required
134
+ s.string "changelog"
135
+ s.string "reference"
136
+ s.string "readme"
137
+ s.string "license"
138
+ s.string "metadata", mode: :required
139
+ end
140
+ end
141
+
142
+ end
143
+ rescue => e
144
+ $logger.error e.message
145
+ $logger.debug e.backtrace.join("\n")
146
+ @channels = @dataset.table('slack_channels')
147
+ end
15
148
  end
16
149
 
17
150
  def authors()
@@ -30,7 +163,7 @@ class Mvp
30
163
  upload('validations')
31
164
  end
32
165
 
33
- def mirrors()
166
+ def github_mirrors()
34
167
  @mirrors.each do |entity|
35
168
  begin
36
169
  spinner = TTY::Spinner.new("[:spinner] :title")
@@ -62,6 +195,19 @@ class Mvp
62
195
  end
63
196
  end
64
197
 
198
+ def insert(entity, data)
199
+ table = @dataset.table("forge_#{entity}")
200
+ response = table.insert(data)
201
+
202
+ unless response.success?
203
+ errors = {}
204
+ response.insert_errors.each do |err|
205
+ errors[err.row['slug']] = err.errors
206
+ end
207
+ $logger.error JSON.pretty_generate(errors)
208
+ end
209
+ end
210
+
65
211
  def upload(entity)
66
212
  begin
67
213
  spinner = TTY::Spinner.new("[:spinner] :title")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: puppet-community-mvp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Ford
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-27 00:00:00.000000000 Z
11
+ date: 2018-09-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -163,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
163
  version: '0'
164
164
  requirements: []
165
165
  rubyforge_project:
166
- rubygems_version: 2.5.2.3
166
+ rubygems_version: 2.6.10
167
167
  signing_key:
168
168
  specification_version: 4
169
169
  summary: Generate some stats about the Puppet Community.