puppet-community-mvp 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8ed5308091443f5847159a6a481611fba281f4d5
4
- data.tar.gz: f7bb0dd50ea248c04b5809144d67355ad7c6c202
3
+ metadata.gz: '082fa42831056dfcb39a9cf587c9128728ede8af'
4
+ data.tar.gz: c401376ae86adfc9ffcf2631cd607bcd1bbe8c6e
5
5
  SHA512:
6
- metadata.gz: ececdc2a2121c4054fc49b16385892e78364b9ed197b2ac3e38a5542de2f5be94cf52ac9e6d6e1590c7e91b912fa54f4e9a70e71e60ae831fe545b98731021ee
7
- data.tar.gz: 5f87defac101d2105403c0b5b54d34c37cac1b552fc77a2008d7d62624df41c8d1d4813be9c2cf15ded62942d6e0cbc70f36862d39866caa2bd77d6d9528aba0
6
+ metadata.gz: 396050b127e436e2c020836a051426a43c025ed3452983e42450f186ee1b04486c0efb7adfbd010a658a43865c4173cd6262ae16308363fd9847fa2969269f25
7
+ data.tar.gz: 35af39cff28f02378e7b7da71130c76a01cb06e4ec73b884b0ada051b3414c580e5fa578d606096d613940da70b3d3feb416ed5c23f77aa3238fbdaa894a9ca3
data/bin/mvp CHANGED
@@ -8,7 +8,7 @@ require 'logger'
8
8
  require 'mvp'
9
9
 
10
10
  NAME = File.basename($PROGRAM_NAME)
11
- options = {:config => File.expand_path('~/.mvp.config.yaml')}
11
+ options = {:config => File.expand_path('~/.mvp/config.yaml')}
12
12
  optparse = OptionParser.new { |opts|
13
13
  opts.banner = "Usage : #{NAME} [command] [target] [options]
14
14
 
@@ -21,6 +21,9 @@ The following CLI commands are available.
21
21
  * upload | insert [target]
22
22
  * Uploads data to BigQuery
23
23
  * Optional targets: all, authors, modules, releases, mirrors
24
+ * mirror [target]
25
+ * Runs the download & then upload tasks.
26
+ * Optional targets: all, authors, modules, releases
24
27
  * stats
25
28
  * Print out a summary of interesting stats.
26
29
  "
@@ -80,6 +83,7 @@ options[:gcloud][:project] ||= 'puppet'
80
83
  options[:gcloud][:keyfile] ||= '~/.mvp/credentials.json'
81
84
 
82
85
  options[:cachedir] = File.expand_path(options[:cachedir])
86
+ options[:github_data] = File.expand_path(options[:github_data])
83
87
  options[:gcloud][:keyfile] = File.expand_path(options[:gcloud][:keyfile])
84
88
  FileUtils.mkdir_p(options[:cachedir])
85
89
 
@@ -7,15 +7,35 @@ require 'mvp/monkeypatches'
7
7
  class Mvp
8
8
  class Downloader
9
9
  def initialize(options = {})
10
- @cachedir = options[:cachedir]
11
- @forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
10
+ @useragent = 'Puppet Community Stats Monitor'
11
+ @cachedir = options[:cachedir]
12
+ @forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
13
+ end
14
+
15
+ def mirror(entity, uploader)
16
+ # using authors for git repo terminology consistency
17
+ item = (entity == :authors) ? 'users' : entity.to_s
18
+ download(item) do |data|
19
+ case entity
20
+ when :modules
21
+ uploader.insert(:validations, flatten_validations(retrieve_validations(data)))
22
+ data = flatten_modules(data)
23
+ when :releases
24
+ data = flatten_releases(data)
25
+ end
26
+
27
+ uploader.insert(entity, data)
28
+ end
12
29
  end
13
30
 
14
31
  def retrieve(entity, download = true)
15
32
  if download
16
33
  # I am focusing on authorship rather than just users, so for now I'm using the word authors
17
34
  item = (entity == :authors) ? 'users' : entity.to_s
18
- data = download(item)
35
+ data = []
36
+ download(item) do |resp|
37
+ data.concat resp
38
+ end
19
39
  save_json(entity, data)
20
40
  else
21
41
  data = File.read("#{@cachedir}/#{entity}.json")
@@ -30,9 +50,35 @@ class Mvp
30
50
  save_nld_json(entity.to_s, data)
31
51
  end
32
52
 
33
- def validations()
53
+ def retrieve_validations(modules, period = 25)
34
54
  results = {}
35
- cache = "#{@cachedir}/modules.json"
55
+
56
+ begin
57
+ offset = 0
58
+ endpoint = "/private/validations/"
59
+ modules.each do |mod|
60
+ name = "#{mod['owner']['username']}-#{mod['name']}"
61
+ response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
62
+ raise "Forge Error: #{@response.body}" unless response.code == 200
63
+
64
+ results[name] = JSON.parse(response.body)
65
+ offset += 1
66
+
67
+ if block_given? and (offset % period == 0)
68
+ yield offset
69
+ GC.start
70
+ end
71
+ end
72
+ rescue => e
73
+ $logger.error e.message
74
+ $logger.debug e.backtrace.join("\n")
75
+ end
76
+
77
+ results
78
+ end
79
+
80
+ def validations()
81
+ cache = "#{@cachedir}/modules.json"
36
82
 
37
83
  if File.exist? cache
38
84
  module_data = JSON.parse(File.read(cache))
@@ -41,22 +87,12 @@ class Mvp
41
87
  end
42
88
 
43
89
  begin
44
- offset = 0
45
- endpoint = "/private/validations/"
46
- spinner = TTY::Spinner.new("[:spinner] :title")
90
+ spinner = TTY::Spinner.new("[:spinner] :title")
47
91
  spinner.update(title: "Downloading module validations ...")
48
92
  spinner.auto_spin
49
93
 
50
- module_data.each do |mod|
51
- name = "#{mod['owner']['username']}-#{mod['name']}"
52
- response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {"User-Agent" => "Puppet Community Stats Monitor"})
53
- raise "Forge Error: #{@response.body}" unless response.code == 200
54
-
55
- data = JSON.parse(response.body)
56
- offset += 1
57
- results[name] = data
58
-
59
- spinner.update(title: "Downloading module validations [#{offset}]...") if (offset % 25 == 0)
94
+ results = retrieve_validations(module_data) do |offset|
95
+ spinner.update(title: "Downloading module validations [#{offset}]...")
60
96
  end
61
97
 
62
98
  spinner.success('(OK)')
@@ -72,7 +108,7 @@ class Mvp
72
108
  end
73
109
 
74
110
  def download(entity)
75
- results = []
111
+ raise 'Please process downloaded data by passing a block' unless block_given?
76
112
 
77
113
  begin
78
114
  offset = 0
@@ -82,15 +118,19 @@ class Mvp
82
118
  spinner.auto_spin
83
119
 
84
120
  while endpoint do
85
- response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => "Puppet Community Stats Monitor"})
121
+ response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
86
122
  raise "Forge Error: #{@response.body}" unless response.code == 200
87
-
88
123
  data = JSON.parse(response.body)
124
+
89
125
  offset += 50
90
- results += data['results']
91
126
  endpoint = data['pagination']['next']
92
127
 
93
- spinner.update(title: "Downloading #{entity} [#{offset}]...") if (endpoint and (offset % 250 == 0))
128
+ yield munge_dates(data['results'])
129
+
130
+ if (endpoint and (offset % 250 == 0))
131
+ spinner.update(title: "Downloading #{entity} [#{offset}]...")
132
+ GC.start
133
+ end
94
134
  end
95
135
 
96
136
  spinner.success('(OK)')
@@ -100,7 +140,7 @@ class Mvp
100
140
  $logger.debug e.backtrace.join("\n")
101
141
  end
102
142
 
103
- munge_dates(results)
143
+ nil
104
144
  end
105
145
 
106
146
  # transform dates into a format that bigquery knows
@@ -138,7 +178,7 @@ class Mvp
138
178
  row['source'] = row['current_release']['metadata']['source']
139
179
  row['project_page'] = row['current_release']['metadata']['project_page']
140
180
  row['issues_url'] = row['current_release']['metadata']['issues_url']
141
- row['tasks'] = row['current_release']['tasks'].map{|task| task['name']}
181
+ row['tasks'] = row['current_release']['tasks'].map{|task| task['name']} rescue []
142
182
 
143
183
  row['release_count'] = row['releases'].count rescue 0
144
184
  row['releases'] = row['releases'].map{|r| r['version']} rescue []
@@ -152,12 +192,12 @@ class Mvp
152
192
  def flatten_releases(data)
153
193
  data.each do |row|
154
194
  row['name'] = row['module']['name']
155
- row['owner'] = row['module']['username']
195
+ row['owner'] = row['module']['owner']['username']
156
196
  row['license'] = row['metadata']['license']
157
197
  row['source'] = row['metadata']['source']
158
198
  row['project_page'] = row['metadata']['project_page']
159
199
  row['issues_url'] = row['metadata']['issues_url']
160
- row['tasks'] = row['tasks'].map{|task| task['name']}
200
+ row['tasks'] = row['tasks'].map{|task| task['name']} rescue []
161
201
 
162
202
  simplify_metadata(row, row['metadata'])
163
203
  row.delete('module')
@@ -176,8 +216,8 @@ class Mvp
176
216
  end
177
217
 
178
218
  def simplify_metadata(data, metadata)
179
- data['operatingsystem'] = metadata['operatingsystem_support'].map{|i| i['operatingsystem']} rescue nil
180
- data['dependencies'] = metadata['dependencies'].map{|i| i['name']} rescue nil
219
+ data['operatingsystem'] = metadata['operatingsystem_support'].map{|i| i['operatingsystem']} rescue []
220
+ data['dependencies'] = metadata['dependencies'].map{|i| i['name'].sub('/', '-')} rescue []
181
221
  data['puppet_range'] = metadata['requirements'].select{|r| r['name'] == 'puppet'}.first['version_requirement'] rescue nil
182
222
  data['metadata'] = metadata.to_json
183
223
 
@@ -26,15 +26,26 @@ class Mvp
26
26
  def upload(target = :all)
27
27
  uploader = Mvp::Uploader.new(@options)
28
28
 
29
- [:authors, :modules, :releases, :validations, :mirrors].each do |thing|
29
+ [:authors, :modules, :releases, :validations, :github_mirrors].each do |thing|
30
30
  next unless [:all, thing].include? target
31
31
  uploader.send(thing)
32
32
  end
33
33
  end
34
34
 
35
35
  def mirror(target = :all)
36
- retrieve(target)
37
- upload(target)
36
+ downloader = Mvp::Downloader.new(@options)
37
+ uploader = Mvp::Uploader.new(@options)
38
+
39
+ # validations are downloaded with modules
40
+ [:authors, :modules, :releases].each do |thing|
41
+ next unless [:all, thing].include? target
42
+ uploader.truncate(thing)
43
+ downloader.mirror(thing, uploader)
44
+ end
45
+
46
+ if [:all, :mirrors].include? target
47
+ uploader.github_mirrors()
48
+ end
38
49
  end
39
50
 
40
51
  def stats(target)
@@ -12,6 +12,139 @@ class Mvp
12
12
  :credentials => Google::Cloud::Bigquery::Credentials.new(options[:gcloud][:keyfile]),
13
13
  )
14
14
  @dataset = @bigquery.dataset(options[:gcloud][:dataset])
15
+
16
+ raise "\nThere is a problem with the gCloud configuration: \n #{JSON.pretty_generate(options)}" if @dataset.nil?
17
+ end
18
+
19
+ def truncate(entity)
20
+ begin
21
+ case entity
22
+ when :authors
23
+ @dataset.table('forge_authors').delete rescue nil
24
+ @dataset.create_table('forge_authors') do |table|
25
+ table.name = 'Forge Authors'
26
+ table.description = 'A list of all authors (users) on the Forge'
27
+ table.schema do |s|
28
+ s.integer "module_count", mode: :required
29
+ s.integer "release_count", mode: :required
30
+ s.timestamp "created_at", mode: :required
31
+ s.string "display_name", mode: :required
32
+ s.string "username", mode: :required
33
+ s.timestamp "updated_at", mode: :required
34
+ s.string "gravatar_id", mode: :required
35
+ s.string "slug", mode: :required
36
+ s.string "uri", mode: :required
37
+ end
38
+ end
39
+
40
+ when :modules
41
+ # both modules and validations
42
+ @dataset.table('forge_modules').delete rescue nil
43
+ @dataset.create_table('forge_modules') do |table|
44
+ table.name = 'Forge Modules'
45
+ table.description = 'All modules and their metadata on the Forge'
46
+ table.schema do |s|
47
+ s.string "name", mode: :required
48
+ s.string "owner", mode: :required
49
+ s.string "version", mode: :required
50
+ s.string "slug", mode: :required
51
+ s.string "uri", mode: :required
52
+ s.timestamp "created_at", mode: :required
53
+ s.timestamp "updated_at", mode: :required
54
+ s.string "tasks", mode: :repeated
55
+ s.string "homepage_url"
56
+ s.string "project_page"
57
+ s.string "issues_url"
58
+ s.string "source"
59
+ s.boolean "supported"
60
+ s.string "endorsement"
61
+ s.string "module_group"
62
+ s.boolean "pdk"
63
+ s.string "operatingsystem", mode: :repeated
64
+ s.integer "release_count", mode: :required
65
+ s.integer "downloads", mode: :required
66
+ s.integer "feedback_score"
67
+ s.integer "validation_score"
68
+ s.string "releases", mode: :repeated
69
+ s.string "puppet_range"
70
+ s.boolean "puppet_2x"
71
+ s.boolean "puppet_3x"
72
+ s.boolean "puppet_4x"
73
+ s.boolean "puppet_5x"
74
+ s.boolean "puppet_6x"
75
+ s.string "superseded_by"
76
+ s.string "deprecated_for"
77
+ s.timestamp "deprecated_at"
78
+ s.timestamp "deleted_at"
79
+ s.string "dependencies", mode: :repeated
80
+ s.string "license"
81
+ s.string "metadata", mode: :required
82
+ end
83
+ end
84
+
85
+ @dataset.table('forge_validations').delete rescue nil
86
+ @dataset.create_table('forge_validations') do |table|
87
+ table.name = 'Forge Module Validations'
88
+ table.description = 'Validation scores for all the modules on the Forge'
89
+ table.schema do |s|
90
+ s.integer "total"
91
+ s.integer "parser"
92
+ s.integer "metadata"
93
+ s.integer "lint"
94
+ s.string "name", mode: :required
95
+ end
96
+ end
97
+
98
+ when :releases
99
+ @dataset.table('forge_releases').delete rescue nil
100
+ @dataset.create_table('forge_releases') do |table|
101
+ table.name = 'Forge Releases'
102
+ table.description = 'Releases of all modules on the Forge'
103
+ table.schema do |s|
104
+ s.string "name", mode: :required
105
+ s.string "owner", mode: :required
106
+ s.string "version", mode: :required
107
+ s.string "slug", mode: :required
108
+ s.string "uri", mode: :required
109
+ s.timestamp "created_at", mode: :required
110
+ s.timestamp "updated_at", mode: :required
111
+ s.timestamp "deleted_at"
112
+ s.string "deleted_for"
113
+ s.string "tasks", mode: :repeated
114
+ s.string "project_page"
115
+ s.string "issues_url"
116
+ s.string "source"
117
+ s.boolean "supported"
118
+ s.boolean "pdk"
119
+ s.string "tags", mode: :repeated
120
+ s.string "operatingsystem", mode: :repeated
121
+ s.integer "downloads", mode: :required
122
+ s.integer "feedback_score"
123
+ s.integer "validation_score"
124
+ s.string "puppet_range"
125
+ s.boolean "puppet_2x"
126
+ s.boolean "puppet_3x"
127
+ s.boolean "puppet_4x"
128
+ s.boolean "puppet_5x"
129
+ s.boolean "puppet_6x"
130
+ s.string "dependencies", mode: :repeated
131
+ s.string "file_uri", mode: :required
132
+ s.string "file_md5", mode: :required
133
+ s.integer "file_size", mode: :required
134
+ s.string "changelog"
135
+ s.string "reference"
136
+ s.string "readme"
137
+ s.string "license"
138
+ s.string "metadata", mode: :required
139
+ end
140
+ end
141
+
142
+ end
143
+ rescue => e
144
+ $logger.error e.message
145
+ $logger.debug e.backtrace.join("\n")
146
+ @channels = @dataset.table('slack_channels')
147
+ end
15
148
  end
16
149
 
17
150
  def authors()
@@ -30,7 +163,7 @@ class Mvp
30
163
  upload('validations')
31
164
  end
32
165
 
33
- def mirrors()
166
+ def github_mirrors()
34
167
  @mirrors.each do |entity|
35
168
  begin
36
169
  spinner = TTY::Spinner.new("[:spinner] :title")
@@ -62,6 +195,19 @@ class Mvp
62
195
  end
63
196
  end
64
197
 
198
+ def insert(entity, data)
199
+ table = @dataset.table("forge_#{entity}")
200
+ response = table.insert(data)
201
+
202
+ unless response.success?
203
+ errors = {}
204
+ response.insert_errors.each do |err|
205
+ errors[err.row['slug']] = err.errors
206
+ end
207
+ $logger.error JSON.pretty_generate(errors)
208
+ end
209
+ end
210
+
65
211
  def upload(entity)
66
212
  begin
67
213
  spinner = TTY::Spinner.new("[:spinner] :title")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: puppet-community-mvp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Ford
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-27 00:00:00.000000000 Z
11
+ date: 2018-09-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -163,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
163
  version: '0'
164
164
  requirements: []
165
165
  rubyforge_project:
166
- rubygems_version: 2.5.2.3
166
+ rubygems_version: 2.6.10
167
167
  signing_key:
168
168
  specification_version: 4
169
169
  summary: Generate some stats about the Puppet Community.