puppet-community-mvp 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/mvp +5 -1
- data/lib/mvp/downloader.rb +69 -29
- data/lib/mvp/runner.rb +14 -3
- data/lib/mvp/uploader.rb +147 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '082fa42831056dfcb39a9cf587c9128728ede8af'
|
4
|
+
data.tar.gz: c401376ae86adfc9ffcf2631cd607bcd1bbe8c6e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 396050b127e436e2c020836a051426a43c025ed3452983e42450f186ee1b04486c0efb7adfbd010a658a43865c4173cd6262ae16308363fd9847fa2969269f25
|
7
|
+
data.tar.gz: 35af39cff28f02378e7b7da71130c76a01cb06e4ec73b884b0ada051b3414c580e5fa578d606096d613940da70b3d3feb416ed5c23f77aa3238fbdaa894a9ca3
|
data/bin/mvp
CHANGED
@@ -8,7 +8,7 @@ require 'logger'
|
|
8
8
|
require 'mvp'
|
9
9
|
|
10
10
|
NAME = File.basename($PROGRAM_NAME)
|
11
|
-
options = {:config => File.expand_path('~/.mvp
|
11
|
+
options = {:config => File.expand_path('~/.mvp/config.yaml')}
|
12
12
|
optparse = OptionParser.new { |opts|
|
13
13
|
opts.banner = "Usage : #{NAME} [command] [target] [options]
|
14
14
|
|
@@ -21,6 +21,9 @@ The following CLI commands are available.
|
|
21
21
|
* upload | insert [target]
|
22
22
|
* Uploads data to BigQuery
|
23
23
|
* Optional targets: all, authors, modules, releases, mirrors
|
24
|
+
* mirror [target]
|
25
|
+
* Runs the download & then upload tasks.
|
26
|
+
* Optional targets: all, authors, modules, releases
|
24
27
|
* stats
|
25
28
|
* Print out a summary of interesting stats.
|
26
29
|
"
|
@@ -80,6 +83,7 @@ options[:gcloud][:project] ||= 'puppet'
|
|
80
83
|
options[:gcloud][:keyfile] ||= '~/.mvp/credentials.json'
|
81
84
|
|
82
85
|
options[:cachedir] = File.expand_path(options[:cachedir])
|
86
|
+
options[:github_data] = File.expand_path(options[:github_data])
|
83
87
|
options[:gcloud][:keyfile] = File.expand_path(options[:gcloud][:keyfile])
|
84
88
|
FileUtils.mkdir_p(options[:cachedir])
|
85
89
|
|
data/lib/mvp/downloader.rb
CHANGED
@@ -7,15 +7,35 @@ require 'mvp/monkeypatches'
|
|
7
7
|
class Mvp
|
8
8
|
class Downloader
|
9
9
|
def initialize(options = {})
|
10
|
-
@
|
11
|
-
@
|
10
|
+
@useragent = 'Puppet Community Stats Monitor'
|
11
|
+
@cachedir = options[:cachedir]
|
12
|
+
@forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
|
13
|
+
end
|
14
|
+
|
15
|
+
def mirror(entity, uploader)
|
16
|
+
# using authors for git repo terminology consistency
|
17
|
+
item = (entity == :authors) ? 'users' : entity.to_s
|
18
|
+
download(item) do |data|
|
19
|
+
case entity
|
20
|
+
when :modules
|
21
|
+
uploader.insert(:validations, flatten_validations(retrieve_validations(data)))
|
22
|
+
data = flatten_modules(data)
|
23
|
+
when :releases
|
24
|
+
data = flatten_releases(data)
|
25
|
+
end
|
26
|
+
|
27
|
+
uploader.insert(entity, data)
|
28
|
+
end
|
12
29
|
end
|
13
30
|
|
14
31
|
def retrieve(entity, download = true)
|
15
32
|
if download
|
16
33
|
# I am focusing on authorship rather than just users, so for now I'm using the word authors
|
17
34
|
item = (entity == :authors) ? 'users' : entity.to_s
|
18
|
-
data =
|
35
|
+
data = []
|
36
|
+
download(item) do |resp|
|
37
|
+
data.concat resp
|
38
|
+
end
|
19
39
|
save_json(entity, data)
|
20
40
|
else
|
21
41
|
data = File.read("#{@cachedir}/#{entity}.json")
|
@@ -30,9 +50,35 @@ class Mvp
|
|
30
50
|
save_nld_json(entity.to_s, data)
|
31
51
|
end
|
32
52
|
|
33
|
-
def
|
53
|
+
def retrieve_validations(modules, period = 25)
|
34
54
|
results = {}
|
35
|
-
|
55
|
+
|
56
|
+
begin
|
57
|
+
offset = 0
|
58
|
+
endpoint = "/private/validations/"
|
59
|
+
modules.each do |mod|
|
60
|
+
name = "#{mod['owner']['username']}-#{mod['name']}"
|
61
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
|
62
|
+
raise "Forge Error: #{@response.body}" unless response.code == 200
|
63
|
+
|
64
|
+
results[name] = JSON.parse(response.body)
|
65
|
+
offset += 1
|
66
|
+
|
67
|
+
if block_given? and (offset % period == 0)
|
68
|
+
yield offset
|
69
|
+
GC.start
|
70
|
+
end
|
71
|
+
end
|
72
|
+
rescue => e
|
73
|
+
$logger.error e.message
|
74
|
+
$logger.debug e.backtrace.join("\n")
|
75
|
+
end
|
76
|
+
|
77
|
+
results
|
78
|
+
end
|
79
|
+
|
80
|
+
def validations()
|
81
|
+
cache = "#{@cachedir}/modules.json"
|
36
82
|
|
37
83
|
if File.exist? cache
|
38
84
|
module_data = JSON.parse(File.read(cache))
|
@@ -41,22 +87,12 @@ class Mvp
|
|
41
87
|
end
|
42
88
|
|
43
89
|
begin
|
44
|
-
|
45
|
-
endpoint = "/private/validations/"
|
46
|
-
spinner = TTY::Spinner.new("[:spinner] :title")
|
90
|
+
spinner = TTY::Spinner.new("[:spinner] :title")
|
47
91
|
spinner.update(title: "Downloading module validations ...")
|
48
92
|
spinner.auto_spin
|
49
93
|
|
50
|
-
module_data
|
51
|
-
|
52
|
-
response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {"User-Agent" => "Puppet Community Stats Monitor"})
|
53
|
-
raise "Forge Error: #{@response.body}" unless response.code == 200
|
54
|
-
|
55
|
-
data = JSON.parse(response.body)
|
56
|
-
offset += 1
|
57
|
-
results[name] = data
|
58
|
-
|
59
|
-
spinner.update(title: "Downloading module validations [#{offset}]...") if (offset % 25 == 0)
|
94
|
+
results = retrieve_validations(module_data) do |offset|
|
95
|
+
spinner.update(title: "Downloading module validations [#{offset}]...")
|
60
96
|
end
|
61
97
|
|
62
98
|
spinner.success('(OK)')
|
@@ -72,7 +108,7 @@ class Mvp
|
|
72
108
|
end
|
73
109
|
|
74
110
|
def download(entity)
|
75
|
-
|
111
|
+
raise 'Please process downloaded data by passing a block' unless block_given?
|
76
112
|
|
77
113
|
begin
|
78
114
|
offset = 0
|
@@ -82,15 +118,19 @@ class Mvp
|
|
82
118
|
spinner.auto_spin
|
83
119
|
|
84
120
|
while endpoint do
|
85
|
-
response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" =>
|
121
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
|
86
122
|
raise "Forge Error: #{@response.body}" unless response.code == 200
|
87
|
-
|
88
123
|
data = JSON.parse(response.body)
|
124
|
+
|
89
125
|
offset += 50
|
90
|
-
results += data['results']
|
91
126
|
endpoint = data['pagination']['next']
|
92
127
|
|
93
|
-
|
128
|
+
yield munge_dates(data['results'])
|
129
|
+
|
130
|
+
if (endpoint and (offset % 250 == 0))
|
131
|
+
spinner.update(title: "Downloading #{entity} [#{offset}]...")
|
132
|
+
GC.start
|
133
|
+
end
|
94
134
|
end
|
95
135
|
|
96
136
|
spinner.success('(OK)')
|
@@ -100,7 +140,7 @@ class Mvp
|
|
100
140
|
$logger.debug e.backtrace.join("\n")
|
101
141
|
end
|
102
142
|
|
103
|
-
|
143
|
+
nil
|
104
144
|
end
|
105
145
|
|
106
146
|
# transform dates into a format that bigquery knows
|
@@ -138,7 +178,7 @@ class Mvp
|
|
138
178
|
row['source'] = row['current_release']['metadata']['source']
|
139
179
|
row['project_page'] = row['current_release']['metadata']['project_page']
|
140
180
|
row['issues_url'] = row['current_release']['metadata']['issues_url']
|
141
|
-
row['tasks'] = row['current_release']['tasks'].map{|task| task['name']}
|
181
|
+
row['tasks'] = row['current_release']['tasks'].map{|task| task['name']} rescue []
|
142
182
|
|
143
183
|
row['release_count'] = row['releases'].count rescue 0
|
144
184
|
row['releases'] = row['releases'].map{|r| r['version']} rescue []
|
@@ -152,12 +192,12 @@ class Mvp
|
|
152
192
|
def flatten_releases(data)
|
153
193
|
data.each do |row|
|
154
194
|
row['name'] = row['module']['name']
|
155
|
-
row['owner'] = row['module']['username']
|
195
|
+
row['owner'] = row['module']['owner']['username']
|
156
196
|
row['license'] = row['metadata']['license']
|
157
197
|
row['source'] = row['metadata']['source']
|
158
198
|
row['project_page'] = row['metadata']['project_page']
|
159
199
|
row['issues_url'] = row['metadata']['issues_url']
|
160
|
-
row['tasks'] = row['tasks'].map{|task| task['name']}
|
200
|
+
row['tasks'] = row['tasks'].map{|task| task['name']} rescue []
|
161
201
|
|
162
202
|
simplify_metadata(row, row['metadata'])
|
163
203
|
row.delete('module')
|
@@ -176,8 +216,8 @@ class Mvp
|
|
176
216
|
end
|
177
217
|
|
178
218
|
def simplify_metadata(data, metadata)
|
179
|
-
data['operatingsystem'] = metadata['operatingsystem_support'].map{|i| i['operatingsystem']} rescue
|
180
|
-
data['dependencies'] = metadata['dependencies'].map{|i| i['name']}
|
219
|
+
data['operatingsystem'] = metadata['operatingsystem_support'].map{|i| i['operatingsystem']} rescue []
|
220
|
+
data['dependencies'] = metadata['dependencies'].map{|i| i['name'].sub('/', '-')} rescue []
|
181
221
|
data['puppet_range'] = metadata['requirements'].select{|r| r['name'] == 'puppet'}.first['version_requirement'] rescue nil
|
182
222
|
data['metadata'] = metadata.to_json
|
183
223
|
|
data/lib/mvp/runner.rb
CHANGED
@@ -26,15 +26,26 @@ class Mvp
|
|
26
26
|
def upload(target = :all)
|
27
27
|
uploader = Mvp::Uploader.new(@options)
|
28
28
|
|
29
|
-
[:authors, :modules, :releases, :validations, :
|
29
|
+
[:authors, :modules, :releases, :validations, :github_mirrors].each do |thing|
|
30
30
|
next unless [:all, thing].include? target
|
31
31
|
uploader.send(thing)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
35
|
def mirror(target = :all)
|
36
|
-
|
37
|
-
|
36
|
+
downloader = Mvp::Downloader.new(@options)
|
37
|
+
uploader = Mvp::Uploader.new(@options)
|
38
|
+
|
39
|
+
# validations are downloaded with modules
|
40
|
+
[:authors, :modules, :releases].each do |thing|
|
41
|
+
next unless [:all, thing].include? target
|
42
|
+
uploader.truncate(thing)
|
43
|
+
downloader.mirror(thing, uploader)
|
44
|
+
end
|
45
|
+
|
46
|
+
if [:all, :mirrors].include? target
|
47
|
+
uploader.github_mirrors()
|
48
|
+
end
|
38
49
|
end
|
39
50
|
|
40
51
|
def stats(target)
|
data/lib/mvp/uploader.rb
CHANGED
@@ -12,6 +12,139 @@ class Mvp
|
|
12
12
|
:credentials => Google::Cloud::Bigquery::Credentials.new(options[:gcloud][:keyfile]),
|
13
13
|
)
|
14
14
|
@dataset = @bigquery.dataset(options[:gcloud][:dataset])
|
15
|
+
|
16
|
+
raise "\nThere is a problem with the gCloud configuration: \n #{JSON.pretty_generate(options)}" if @dataset.nil?
|
17
|
+
end
|
18
|
+
|
19
|
+
def truncate(entity)
|
20
|
+
begin
|
21
|
+
case entity
|
22
|
+
when :authors
|
23
|
+
@dataset.table('forge_authors').delete rescue nil
|
24
|
+
@dataset.create_table('forge_authors') do |table|
|
25
|
+
table.name = 'Forge Authors'
|
26
|
+
table.description = 'A list of all authors (users) on the Forge'
|
27
|
+
table.schema do |s|
|
28
|
+
s.integer "module_count", mode: :required
|
29
|
+
s.integer "release_count", mode: :required
|
30
|
+
s.timestamp "created_at", mode: :required
|
31
|
+
s.string "display_name", mode: :required
|
32
|
+
s.string "username", mode: :required
|
33
|
+
s.timestamp "updated_at", mode: :required
|
34
|
+
s.string "gravatar_id", mode: :required
|
35
|
+
s.string "slug", mode: :required
|
36
|
+
s.string "uri", mode: :required
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
when :modules
|
41
|
+
# both modules and validations
|
42
|
+
@dataset.table('forge_modules').delete rescue nil
|
43
|
+
@dataset.create_table('forge_modules') do |table|
|
44
|
+
table.name = 'Forge Modules'
|
45
|
+
table.description = 'All modules and their metadata on the Forge'
|
46
|
+
table.schema do |s|
|
47
|
+
s.string "name", mode: :required
|
48
|
+
s.string "owner", mode: :required
|
49
|
+
s.string "version", mode: :required
|
50
|
+
s.string "slug", mode: :required
|
51
|
+
s.string "uri", mode: :required
|
52
|
+
s.timestamp "created_at", mode: :required
|
53
|
+
s.timestamp "updated_at", mode: :required
|
54
|
+
s.string "tasks", mode: :repeated
|
55
|
+
s.string "homepage_url"
|
56
|
+
s.string "project_page"
|
57
|
+
s.string "issues_url"
|
58
|
+
s.string "source"
|
59
|
+
s.boolean "supported"
|
60
|
+
s.string "endorsement"
|
61
|
+
s.string "module_group"
|
62
|
+
s.boolean "pdk"
|
63
|
+
s.string "operatingsystem", mode: :repeated
|
64
|
+
s.integer "release_count", mode: :required
|
65
|
+
s.integer "downloads", mode: :required
|
66
|
+
s.integer "feedback_score"
|
67
|
+
s.integer "validation_score"
|
68
|
+
s.string "releases", mode: :repeated
|
69
|
+
s.string "puppet_range"
|
70
|
+
s.boolean "puppet_2x"
|
71
|
+
s.boolean "puppet_3x"
|
72
|
+
s.boolean "puppet_4x"
|
73
|
+
s.boolean "puppet_5x"
|
74
|
+
s.boolean "puppet_6x"
|
75
|
+
s.string "superseded_by"
|
76
|
+
s.string "deprecated_for"
|
77
|
+
s.timestamp "deprecated_at"
|
78
|
+
s.timestamp "deleted_at"
|
79
|
+
s.string "dependencies", mode: :repeated
|
80
|
+
s.string "license"
|
81
|
+
s.string "metadata", mode: :required
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
@dataset.table('forge_validations').delete rescue nil
|
86
|
+
@dataset.create_table('forge_validations') do |table|
|
87
|
+
table.name = 'Forge Module Validations'
|
88
|
+
table.description = 'Validation scores for all the modules on the Forge'
|
89
|
+
table.schema do |s|
|
90
|
+
s.integer "total"
|
91
|
+
s.integer "parser"
|
92
|
+
s.integer "metadata"
|
93
|
+
s.integer "lint"
|
94
|
+
s.string "name", mode: :required
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
when :releases
|
99
|
+
@dataset.table('forge_releases').delete rescue nil
|
100
|
+
@dataset.create_table('forge_releases') do |table|
|
101
|
+
table.name = 'Forge Releases'
|
102
|
+
table.description = 'Releases of all modules on the Forge'
|
103
|
+
table.schema do |s|
|
104
|
+
s.string "name", mode: :required
|
105
|
+
s.string "owner", mode: :required
|
106
|
+
s.string "version", mode: :required
|
107
|
+
s.string "slug", mode: :required
|
108
|
+
s.string "uri", mode: :required
|
109
|
+
s.timestamp "created_at", mode: :required
|
110
|
+
s.timestamp "updated_at", mode: :required
|
111
|
+
s.timestamp "deleted_at"
|
112
|
+
s.string "deleted_for"
|
113
|
+
s.string "tasks", mode: :repeated
|
114
|
+
s.string "project_page"
|
115
|
+
s.string "issues_url"
|
116
|
+
s.string "source"
|
117
|
+
s.boolean "supported"
|
118
|
+
s.boolean "pdk"
|
119
|
+
s.string "tags", mode: :repeated
|
120
|
+
s.string "operatingsystem", mode: :repeated
|
121
|
+
s.integer "downloads", mode: :required
|
122
|
+
s.integer "feedback_score"
|
123
|
+
s.integer "validation_score"
|
124
|
+
s.string "puppet_range"
|
125
|
+
s.boolean "puppet_2x"
|
126
|
+
s.boolean "puppet_3x"
|
127
|
+
s.boolean "puppet_4x"
|
128
|
+
s.boolean "puppet_5x"
|
129
|
+
s.boolean "puppet_6x"
|
130
|
+
s.string "dependencies", mode: :repeated
|
131
|
+
s.string "file_uri", mode: :required
|
132
|
+
s.string "file_md5", mode: :required
|
133
|
+
s.integer "file_size", mode: :required
|
134
|
+
s.string "changelog"
|
135
|
+
s.string "reference"
|
136
|
+
s.string "readme"
|
137
|
+
s.string "license"
|
138
|
+
s.string "metadata", mode: :required
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
rescue => e
|
144
|
+
$logger.error e.message
|
145
|
+
$logger.debug e.backtrace.join("\n")
|
146
|
+
@channels = @dataset.table('slack_channels')
|
147
|
+
end
|
15
148
|
end
|
16
149
|
|
17
150
|
def authors()
|
@@ -30,7 +163,7 @@ class Mvp
|
|
30
163
|
upload('validations')
|
31
164
|
end
|
32
165
|
|
33
|
-
def
|
166
|
+
def github_mirrors()
|
34
167
|
@mirrors.each do |entity|
|
35
168
|
begin
|
36
169
|
spinner = TTY::Spinner.new("[:spinner] :title")
|
@@ -62,6 +195,19 @@ class Mvp
|
|
62
195
|
end
|
63
196
|
end
|
64
197
|
|
198
|
+
def insert(entity, data)
|
199
|
+
table = @dataset.table("forge_#{entity}")
|
200
|
+
response = table.insert(data)
|
201
|
+
|
202
|
+
unless response.success?
|
203
|
+
errors = {}
|
204
|
+
response.insert_errors.each do |err|
|
205
|
+
errors[err.row['slug']] = err.errors
|
206
|
+
end
|
207
|
+
$logger.error JSON.pretty_generate(errors)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
65
211
|
def upload(entity)
|
66
212
|
begin
|
67
213
|
spinner = TTY::Spinner.new("[:spinner] :title")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: puppet-community-mvp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Ford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06
|
11
|
+
date: 2018-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -163,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
163
|
version: '0'
|
164
164
|
requirements: []
|
165
165
|
rubyforge_project:
|
166
|
-
rubygems_version: 2.
|
166
|
+
rubygems_version: 2.6.10
|
167
167
|
signing_key:
|
168
168
|
specification_version: 4
|
169
169
|
summary: Generate some stats about the Puppet Community.
|