puppet-community-mvp 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/mvp +5 -1
- data/lib/mvp/downloader.rb +69 -29
- data/lib/mvp/runner.rb +14 -3
- data/lib/mvp/uploader.rb +147 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '082fa42831056dfcb39a9cf587c9128728ede8af'
|
4
|
+
data.tar.gz: c401376ae86adfc9ffcf2631cd607bcd1bbe8c6e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 396050b127e436e2c020836a051426a43c025ed3452983e42450f186ee1b04486c0efb7adfbd010a658a43865c4173cd6262ae16308363fd9847fa2969269f25
|
7
|
+
data.tar.gz: 35af39cff28f02378e7b7da71130c76a01cb06e4ec73b884b0ada051b3414c580e5fa578d606096d613940da70b3d3feb416ed5c23f77aa3238fbdaa894a9ca3
|
data/bin/mvp
CHANGED
@@ -8,7 +8,7 @@ require 'logger'
|
|
8
8
|
require 'mvp'
|
9
9
|
|
10
10
|
NAME = File.basename($PROGRAM_NAME)
|
11
|
-
options = {:config => File.expand_path('~/.mvp
|
11
|
+
options = {:config => File.expand_path('~/.mvp/config.yaml')}
|
12
12
|
optparse = OptionParser.new { |opts|
|
13
13
|
opts.banner = "Usage : #{NAME} [command] [target] [options]
|
14
14
|
|
@@ -21,6 +21,9 @@ The following CLI commands are available.
|
|
21
21
|
* upload | insert [target]
|
22
22
|
* Uploads data to BigQuery
|
23
23
|
* Optional targets: all, authors, modules, releases, mirrors
|
24
|
+
* mirror [target]
|
25
|
+
* Runs the download & then upload tasks.
|
26
|
+
* Optional targets: all, authors, modules, releases
|
24
27
|
* stats
|
25
28
|
* Print out a summary of interesting stats.
|
26
29
|
"
|
@@ -80,6 +83,7 @@ options[:gcloud][:project] ||= 'puppet'
|
|
80
83
|
options[:gcloud][:keyfile] ||= '~/.mvp/credentials.json'
|
81
84
|
|
82
85
|
options[:cachedir] = File.expand_path(options[:cachedir])
|
86
|
+
options[:github_data] = File.expand_path(options[:github_data])
|
83
87
|
options[:gcloud][:keyfile] = File.expand_path(options[:gcloud][:keyfile])
|
84
88
|
FileUtils.mkdir_p(options[:cachedir])
|
85
89
|
|
data/lib/mvp/downloader.rb
CHANGED
@@ -7,15 +7,35 @@ require 'mvp/monkeypatches'
|
|
7
7
|
class Mvp
|
8
8
|
class Downloader
|
9
9
|
def initialize(options = {})
|
10
|
-
@
|
11
|
-
@
|
10
|
+
@useragent = 'Puppet Community Stats Monitor'
|
11
|
+
@cachedir = options[:cachedir]
|
12
|
+
@forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
|
13
|
+
end
|
14
|
+
|
15
|
+
def mirror(entity, uploader)
|
16
|
+
# using authors for git repo terminology consistency
|
17
|
+
item = (entity == :authors) ? 'users' : entity.to_s
|
18
|
+
download(item) do |data|
|
19
|
+
case entity
|
20
|
+
when :modules
|
21
|
+
uploader.insert(:validations, flatten_validations(retrieve_validations(data)))
|
22
|
+
data = flatten_modules(data)
|
23
|
+
when :releases
|
24
|
+
data = flatten_releases(data)
|
25
|
+
end
|
26
|
+
|
27
|
+
uploader.insert(entity, data)
|
28
|
+
end
|
12
29
|
end
|
13
30
|
|
14
31
|
def retrieve(entity, download = true)
|
15
32
|
if download
|
16
33
|
# I am focusing on authorship rather than just users, so for now I'm using the word authors
|
17
34
|
item = (entity == :authors) ? 'users' : entity.to_s
|
18
|
-
data =
|
35
|
+
data = []
|
36
|
+
download(item) do |resp|
|
37
|
+
data.concat resp
|
38
|
+
end
|
19
39
|
save_json(entity, data)
|
20
40
|
else
|
21
41
|
data = File.read("#{@cachedir}/#{entity}.json")
|
@@ -30,9 +50,35 @@ class Mvp
|
|
30
50
|
save_nld_json(entity.to_s, data)
|
31
51
|
end
|
32
52
|
|
33
|
-
def
|
53
|
+
def retrieve_validations(modules, period = 25)
|
34
54
|
results = {}
|
35
|
-
|
55
|
+
|
56
|
+
begin
|
57
|
+
offset = 0
|
58
|
+
endpoint = "/private/validations/"
|
59
|
+
modules.each do |mod|
|
60
|
+
name = "#{mod['owner']['username']}-#{mod['name']}"
|
61
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
|
62
|
+
raise "Forge Error: #{@response.body}" unless response.code == 200
|
63
|
+
|
64
|
+
results[name] = JSON.parse(response.body)
|
65
|
+
offset += 1
|
66
|
+
|
67
|
+
if block_given? and (offset % period == 0)
|
68
|
+
yield offset
|
69
|
+
GC.start
|
70
|
+
end
|
71
|
+
end
|
72
|
+
rescue => e
|
73
|
+
$logger.error e.message
|
74
|
+
$logger.debug e.backtrace.join("\n")
|
75
|
+
end
|
76
|
+
|
77
|
+
results
|
78
|
+
end
|
79
|
+
|
80
|
+
def validations()
|
81
|
+
cache = "#{@cachedir}/modules.json"
|
36
82
|
|
37
83
|
if File.exist? cache
|
38
84
|
module_data = JSON.parse(File.read(cache))
|
@@ -41,22 +87,12 @@ class Mvp
|
|
41
87
|
end
|
42
88
|
|
43
89
|
begin
|
44
|
-
|
45
|
-
endpoint = "/private/validations/"
|
46
|
-
spinner = TTY::Spinner.new("[:spinner] :title")
|
90
|
+
spinner = TTY::Spinner.new("[:spinner] :title")
|
47
91
|
spinner.update(title: "Downloading module validations ...")
|
48
92
|
spinner.auto_spin
|
49
93
|
|
50
|
-
module_data
|
51
|
-
|
52
|
-
response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {"User-Agent" => "Puppet Community Stats Monitor"})
|
53
|
-
raise "Forge Error: #{@response.body}" unless response.code == 200
|
54
|
-
|
55
|
-
data = JSON.parse(response.body)
|
56
|
-
offset += 1
|
57
|
-
results[name] = data
|
58
|
-
|
59
|
-
spinner.update(title: "Downloading module validations [#{offset}]...") if (offset % 25 == 0)
|
94
|
+
results = retrieve_validations(module_data) do |offset|
|
95
|
+
spinner.update(title: "Downloading module validations [#{offset}]...")
|
60
96
|
end
|
61
97
|
|
62
98
|
spinner.success('(OK)')
|
@@ -72,7 +108,7 @@ class Mvp
|
|
72
108
|
end
|
73
109
|
|
74
110
|
def download(entity)
|
75
|
-
|
111
|
+
raise 'Please process downloaded data by passing a block' unless block_given?
|
76
112
|
|
77
113
|
begin
|
78
114
|
offset = 0
|
@@ -82,15 +118,19 @@ class Mvp
|
|
82
118
|
spinner.auto_spin
|
83
119
|
|
84
120
|
while endpoint do
|
85
|
-
response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" =>
|
121
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
|
86
122
|
raise "Forge Error: #{@response.body}" unless response.code == 200
|
87
|
-
|
88
123
|
data = JSON.parse(response.body)
|
124
|
+
|
89
125
|
offset += 50
|
90
|
-
results += data['results']
|
91
126
|
endpoint = data['pagination']['next']
|
92
127
|
|
93
|
-
|
128
|
+
yield munge_dates(data['results'])
|
129
|
+
|
130
|
+
if (endpoint and (offset % 250 == 0))
|
131
|
+
spinner.update(title: "Downloading #{entity} [#{offset}]...")
|
132
|
+
GC.start
|
133
|
+
end
|
94
134
|
end
|
95
135
|
|
96
136
|
spinner.success('(OK)')
|
@@ -100,7 +140,7 @@ class Mvp
|
|
100
140
|
$logger.debug e.backtrace.join("\n")
|
101
141
|
end
|
102
142
|
|
103
|
-
|
143
|
+
nil
|
104
144
|
end
|
105
145
|
|
106
146
|
# transform dates into a format that bigquery knows
|
@@ -138,7 +178,7 @@ class Mvp
|
|
138
178
|
row['source'] = row['current_release']['metadata']['source']
|
139
179
|
row['project_page'] = row['current_release']['metadata']['project_page']
|
140
180
|
row['issues_url'] = row['current_release']['metadata']['issues_url']
|
141
|
-
row['tasks'] = row['current_release']['tasks'].map{|task| task['name']}
|
181
|
+
row['tasks'] = row['current_release']['tasks'].map{|task| task['name']} rescue []
|
142
182
|
|
143
183
|
row['release_count'] = row['releases'].count rescue 0
|
144
184
|
row['releases'] = row['releases'].map{|r| r['version']} rescue []
|
@@ -152,12 +192,12 @@ class Mvp
|
|
152
192
|
def flatten_releases(data)
|
153
193
|
data.each do |row|
|
154
194
|
row['name'] = row['module']['name']
|
155
|
-
row['owner'] = row['module']['username']
|
195
|
+
row['owner'] = row['module']['owner']['username']
|
156
196
|
row['license'] = row['metadata']['license']
|
157
197
|
row['source'] = row['metadata']['source']
|
158
198
|
row['project_page'] = row['metadata']['project_page']
|
159
199
|
row['issues_url'] = row['metadata']['issues_url']
|
160
|
-
row['tasks'] = row['tasks'].map{|task| task['name']}
|
200
|
+
row['tasks'] = row['tasks'].map{|task| task['name']} rescue []
|
161
201
|
|
162
202
|
simplify_metadata(row, row['metadata'])
|
163
203
|
row.delete('module')
|
@@ -176,8 +216,8 @@ class Mvp
|
|
176
216
|
end
|
177
217
|
|
178
218
|
def simplify_metadata(data, metadata)
|
179
|
-
data['operatingsystem'] = metadata['operatingsystem_support'].map{|i| i['operatingsystem']} rescue
|
180
|
-
data['dependencies'] = metadata['dependencies'].map{|i| i['name']}
|
219
|
+
data['operatingsystem'] = metadata['operatingsystem_support'].map{|i| i['operatingsystem']} rescue []
|
220
|
+
data['dependencies'] = metadata['dependencies'].map{|i| i['name'].sub('/', '-')} rescue []
|
181
221
|
data['puppet_range'] = metadata['requirements'].select{|r| r['name'] == 'puppet'}.first['version_requirement'] rescue nil
|
182
222
|
data['metadata'] = metadata.to_json
|
183
223
|
|
data/lib/mvp/runner.rb
CHANGED
@@ -26,15 +26,26 @@ class Mvp
|
|
26
26
|
def upload(target = :all)
|
27
27
|
uploader = Mvp::Uploader.new(@options)
|
28
28
|
|
29
|
-
[:authors, :modules, :releases, :validations, :
|
29
|
+
[:authors, :modules, :releases, :validations, :github_mirrors].each do |thing|
|
30
30
|
next unless [:all, thing].include? target
|
31
31
|
uploader.send(thing)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
35
|
def mirror(target = :all)
|
36
|
-
|
37
|
-
|
36
|
+
downloader = Mvp::Downloader.new(@options)
|
37
|
+
uploader = Mvp::Uploader.new(@options)
|
38
|
+
|
39
|
+
# validations are downloaded with modules
|
40
|
+
[:authors, :modules, :releases].each do |thing|
|
41
|
+
next unless [:all, thing].include? target
|
42
|
+
uploader.truncate(thing)
|
43
|
+
downloader.mirror(thing, uploader)
|
44
|
+
end
|
45
|
+
|
46
|
+
if [:all, :mirrors].include? target
|
47
|
+
uploader.github_mirrors()
|
48
|
+
end
|
38
49
|
end
|
39
50
|
|
40
51
|
def stats(target)
|
data/lib/mvp/uploader.rb
CHANGED
@@ -12,6 +12,139 @@ class Mvp
|
|
12
12
|
:credentials => Google::Cloud::Bigquery::Credentials.new(options[:gcloud][:keyfile]),
|
13
13
|
)
|
14
14
|
@dataset = @bigquery.dataset(options[:gcloud][:dataset])
|
15
|
+
|
16
|
+
raise "\nThere is a problem with the gCloud configuration: \n #{JSON.pretty_generate(options)}" if @dataset.nil?
|
17
|
+
end
|
18
|
+
|
19
|
+
def truncate(entity)
|
20
|
+
begin
|
21
|
+
case entity
|
22
|
+
when :authors
|
23
|
+
@dataset.table('forge_authors').delete rescue nil
|
24
|
+
@dataset.create_table('forge_authors') do |table|
|
25
|
+
table.name = 'Forge Authors'
|
26
|
+
table.description = 'A list of all authors (users) on the Forge'
|
27
|
+
table.schema do |s|
|
28
|
+
s.integer "module_count", mode: :required
|
29
|
+
s.integer "release_count", mode: :required
|
30
|
+
s.timestamp "created_at", mode: :required
|
31
|
+
s.string "display_name", mode: :required
|
32
|
+
s.string "username", mode: :required
|
33
|
+
s.timestamp "updated_at", mode: :required
|
34
|
+
s.string "gravatar_id", mode: :required
|
35
|
+
s.string "slug", mode: :required
|
36
|
+
s.string "uri", mode: :required
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
when :modules
|
41
|
+
# both modules and validations
|
42
|
+
@dataset.table('forge_modules').delete rescue nil
|
43
|
+
@dataset.create_table('forge_modules') do |table|
|
44
|
+
table.name = 'Forge Modules'
|
45
|
+
table.description = 'All modules and their metadata on the Forge'
|
46
|
+
table.schema do |s|
|
47
|
+
s.string "name", mode: :required
|
48
|
+
s.string "owner", mode: :required
|
49
|
+
s.string "version", mode: :required
|
50
|
+
s.string "slug", mode: :required
|
51
|
+
s.string "uri", mode: :required
|
52
|
+
s.timestamp "created_at", mode: :required
|
53
|
+
s.timestamp "updated_at", mode: :required
|
54
|
+
s.string "tasks", mode: :repeated
|
55
|
+
s.string "homepage_url"
|
56
|
+
s.string "project_page"
|
57
|
+
s.string "issues_url"
|
58
|
+
s.string "source"
|
59
|
+
s.boolean "supported"
|
60
|
+
s.string "endorsement"
|
61
|
+
s.string "module_group"
|
62
|
+
s.boolean "pdk"
|
63
|
+
s.string "operatingsystem", mode: :repeated
|
64
|
+
s.integer "release_count", mode: :required
|
65
|
+
s.integer "downloads", mode: :required
|
66
|
+
s.integer "feedback_score"
|
67
|
+
s.integer "validation_score"
|
68
|
+
s.string "releases", mode: :repeated
|
69
|
+
s.string "puppet_range"
|
70
|
+
s.boolean "puppet_2x"
|
71
|
+
s.boolean "puppet_3x"
|
72
|
+
s.boolean "puppet_4x"
|
73
|
+
s.boolean "puppet_5x"
|
74
|
+
s.boolean "puppet_6x"
|
75
|
+
s.string "superseded_by"
|
76
|
+
s.string "deprecated_for"
|
77
|
+
s.timestamp "deprecated_at"
|
78
|
+
s.timestamp "deleted_at"
|
79
|
+
s.string "dependencies", mode: :repeated
|
80
|
+
s.string "license"
|
81
|
+
s.string "metadata", mode: :required
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
@dataset.table('forge_validations').delete rescue nil
|
86
|
+
@dataset.create_table('forge_validations') do |table|
|
87
|
+
table.name = 'Forge Module Validations'
|
88
|
+
table.description = 'Validation scores for all the modules on the Forge'
|
89
|
+
table.schema do |s|
|
90
|
+
s.integer "total"
|
91
|
+
s.integer "parser"
|
92
|
+
s.integer "metadata"
|
93
|
+
s.integer "lint"
|
94
|
+
s.string "name", mode: :required
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
when :releases
|
99
|
+
@dataset.table('forge_releases').delete rescue nil
|
100
|
+
@dataset.create_table('forge_releases') do |table|
|
101
|
+
table.name = 'Forge Releases'
|
102
|
+
table.description = 'Releases of all modules on the Forge'
|
103
|
+
table.schema do |s|
|
104
|
+
s.string "name", mode: :required
|
105
|
+
s.string "owner", mode: :required
|
106
|
+
s.string "version", mode: :required
|
107
|
+
s.string "slug", mode: :required
|
108
|
+
s.string "uri", mode: :required
|
109
|
+
s.timestamp "created_at", mode: :required
|
110
|
+
s.timestamp "updated_at", mode: :required
|
111
|
+
s.timestamp "deleted_at"
|
112
|
+
s.string "deleted_for"
|
113
|
+
s.string "tasks", mode: :repeated
|
114
|
+
s.string "project_page"
|
115
|
+
s.string "issues_url"
|
116
|
+
s.string "source"
|
117
|
+
s.boolean "supported"
|
118
|
+
s.boolean "pdk"
|
119
|
+
s.string "tags", mode: :repeated
|
120
|
+
s.string "operatingsystem", mode: :repeated
|
121
|
+
s.integer "downloads", mode: :required
|
122
|
+
s.integer "feedback_score"
|
123
|
+
s.integer "validation_score"
|
124
|
+
s.string "puppet_range"
|
125
|
+
s.boolean "puppet_2x"
|
126
|
+
s.boolean "puppet_3x"
|
127
|
+
s.boolean "puppet_4x"
|
128
|
+
s.boolean "puppet_5x"
|
129
|
+
s.boolean "puppet_6x"
|
130
|
+
s.string "dependencies", mode: :repeated
|
131
|
+
s.string "file_uri", mode: :required
|
132
|
+
s.string "file_md5", mode: :required
|
133
|
+
s.integer "file_size", mode: :required
|
134
|
+
s.string "changelog"
|
135
|
+
s.string "reference"
|
136
|
+
s.string "readme"
|
137
|
+
s.string "license"
|
138
|
+
s.string "metadata", mode: :required
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
rescue => e
|
144
|
+
$logger.error e.message
|
145
|
+
$logger.debug e.backtrace.join("\n")
|
146
|
+
@channels = @dataset.table('slack_channels')
|
147
|
+
end
|
15
148
|
end
|
16
149
|
|
17
150
|
def authors()
|
@@ -30,7 +163,7 @@ class Mvp
|
|
30
163
|
upload('validations')
|
31
164
|
end
|
32
165
|
|
33
|
-
def
|
166
|
+
def github_mirrors()
|
34
167
|
@mirrors.each do |entity|
|
35
168
|
begin
|
36
169
|
spinner = TTY::Spinner.new("[:spinner] :title")
|
@@ -62,6 +195,19 @@ class Mvp
|
|
62
195
|
end
|
63
196
|
end
|
64
197
|
|
198
|
+
def insert(entity, data)
|
199
|
+
table = @dataset.table("forge_#{entity}")
|
200
|
+
response = table.insert(data)
|
201
|
+
|
202
|
+
unless response.success?
|
203
|
+
errors = {}
|
204
|
+
response.insert_errors.each do |err|
|
205
|
+
errors[err.row['slug']] = err.errors
|
206
|
+
end
|
207
|
+
$logger.error JSON.pretty_generate(errors)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
65
211
|
def upload(entity)
|
66
212
|
begin
|
67
213
|
spinner = TTY::Spinner.new("[:spinner] :title")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: puppet-community-mvp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Ford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06
|
11
|
+
date: 2018-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -163,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
163
|
version: '0'
|
164
164
|
requirements: []
|
165
165
|
rubyforge_project:
|
166
|
-
rubygems_version: 2.
|
166
|
+
rubygems_version: 2.6.10
|
167
167
|
signing_key:
|
168
168
|
specification_version: 4
|
169
169
|
summary: Generate some stats about the Puppet Community.
|