puppet-community-mvp 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/mvp +9 -15
- data/bin/pftest.rb +22 -0
- data/lib/mvp.rb +1 -3
- data/lib/mvp/{uploader.rb → bigquery.rb} +75 -70
- data/lib/mvp/{downloader.rb → forge.rb} +44 -125
- data/lib/mvp/itemizer.rb +12 -4
- data/lib/mvp/puppetfile_parser.rb +171 -0
- data/lib/mvp/runner.rb +96 -27
- metadata +6 -5
- data/lib/mvp/monkeypatches.rb +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f5ed8c2978cad58a0ce52346eb854b3b4e82a9a
|
4
|
+
data.tar.gz: a5637f505ed5fe8c74a22f9633723b9211ad3c4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3ad31ba42a0e2f96ae8254bf383e523c74890fa219cb1b27946cae7718aba76b2d3532149dc98dfc1d59b5e47c4d31c70156c96203659425b55607c6a0d948d1
|
7
|
+
data.tar.gz: f8970bb1a709f807e3e5d2f45e2c74f7adbea034fc5d9293024e76f4dfb8b2e9eb59ecf636e38b12d69ce224867bbc96570f5e9ed1ddb5092783888e145ceec9
|
data/bin/mvp
CHANGED
@@ -13,16 +13,14 @@ optparse = OptionParser.new { |opts|
|
|
13
13
|
opts.banner = "Usage : #{NAME} [command] [target] [options]
|
14
14
|
|
15
15
|
This tool will scrape the Puppet Forge API for interesting module & author stats.
|
16
|
-
|
16
|
+
It can also mirror public BigQuery tables or views into our dataset for efficiency,
|
17
|
+
or download and itemize each Forge module.
|
17
18
|
|
18
|
-
* get | retrieve | download [target]
|
19
|
-
* Downloads and caches all Forge metadata.
|
20
|
-
* Optional targets: all, authors, modules, releases
|
21
|
-
* upload | insert [target]
|
22
|
-
* Uploads data to BigQuery
|
23
|
-
* Optional targets: all, authors, modules, releases, mirrors
|
24
19
|
* mirror [target]
|
25
20
|
* Runs the download & then upload tasks.
|
21
|
+
* Optional targets: all, authors, modules, releases, validations, itemizations, puppetfiles, tables
|
22
|
+
* get | retrieve | download [target]
|
23
|
+
* Downloads and caches data locally so you can run the stats task.
|
26
24
|
* Optional targets: all, authors, modules, releases
|
27
25
|
* stats
|
28
26
|
* Print out a summary of interesting stats.
|
@@ -64,6 +62,10 @@ The following CLI commands are available.
|
|
64
62
|
options[:debug] = true
|
65
63
|
end
|
66
64
|
|
65
|
+
opts.on("-n", "--noop", "Don't actually upload data.") do
|
66
|
+
options[:noop] = true
|
67
|
+
end
|
68
|
+
|
67
69
|
opts.separator('')
|
68
70
|
|
69
71
|
opts.on("-h", "--help", "Displays this help") do
|
@@ -100,14 +102,6 @@ when 'get', 'retrieve', 'download'
|
|
100
102
|
target ||= :all
|
101
103
|
runner.retrieve(target.to_sym)
|
102
104
|
|
103
|
-
when 'transform'
|
104
|
-
target ||= :all
|
105
|
-
runner.retrieve(target.to_sym, false)
|
106
|
-
|
107
|
-
when 'insert', 'upload'
|
108
|
-
target ||= :all
|
109
|
-
runner.upload(target.to_sym)
|
110
|
-
|
111
105
|
when 'mirror'
|
112
106
|
target ||= :all
|
113
107
|
runner.mirror(target.to_sym)
|
data/bin/pftest.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'mvp/puppetfile_parser'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'json'
|
6
|
+
require 'logger'
|
7
|
+
|
8
|
+
$logger = Logger::new(STDOUT)
|
9
|
+
$logger.level = Logger::INFO
|
10
|
+
$logger.formatter = proc { |severity,datetime,progname,msg| "#{severity}: #{msg}\n" }
|
11
|
+
|
12
|
+
pf = open(ARGV.first)
|
13
|
+
parser = Mvp::PuppetfileParser.new()
|
14
|
+
|
15
|
+
|
16
|
+
repo = {
|
17
|
+
:repo_name => 'testing',
|
18
|
+
:md5 => 'wakka wakka',
|
19
|
+
:content => pf.read,
|
20
|
+
}
|
21
|
+
|
22
|
+
puts JSON.pretty_generate(parser.parse(repo))
|
data/lib/mvp.rb
CHANGED
@@ -3,10 +3,10 @@ require 'tty-spinner'
|
|
3
3
|
require "google/cloud/bigquery"
|
4
4
|
|
5
5
|
class Mvp
|
6
|
-
class
|
6
|
+
class Bigquery
|
7
7
|
def initialize(options = {})
|
8
|
+
@options = options
|
8
9
|
@cachedir = options[:cachedir]
|
9
|
-
@mirrors = options[:gcloud][:mirrors]
|
10
10
|
@bigquery = Google::Cloud::Bigquery.new(
|
11
11
|
:project_id => options[:gcloud][:project],
|
12
12
|
:credentials => Google::Cloud::Bigquery::Credentials.new(options[:gcloud][:keyfile]),
|
@@ -27,9 +27,24 @@ class Mvp
|
|
27
27
|
s.integer "count", mode: :required
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
@puppetfile_usage = @dataset.table('github_puppetfile_usage') || @dataset.create_table('github_puppetfile_usage') do |table|
|
32
|
+
table.name = 'Puppetfile Module Usage'
|
33
|
+
table.description = 'A list of all modules referenced in public Puppetfiles'
|
34
|
+
table.schema do |s|
|
35
|
+
s.string "repo_name", mode: :required
|
36
|
+
s.string "module", mode: :required
|
37
|
+
s.string "type", mode: :required
|
38
|
+
s.string "source"
|
39
|
+
s.string "version"
|
40
|
+
s.string "md5", mode: :required
|
41
|
+
end
|
42
|
+
end
|
30
43
|
end
|
31
44
|
|
32
45
|
def truncate(entity)
|
46
|
+
return if @options[:noop]
|
47
|
+
|
33
48
|
begin
|
34
49
|
case entity
|
35
50
|
when :authors
|
@@ -163,95 +178,85 @@ class Mvp
|
|
163
178
|
end
|
164
179
|
end
|
165
180
|
|
166
|
-
def
|
167
|
-
|
168
|
-
end
|
169
|
-
|
170
|
-
def modules()
|
171
|
-
upload('modules')
|
181
|
+
def retrieve(entity)
|
182
|
+
get(entity, ['*'])
|
172
183
|
end
|
173
184
|
|
174
|
-
def
|
175
|
-
|
176
|
-
end
|
185
|
+
def mirror_table(entity)
|
186
|
+
return if @options[:noop]
|
177
187
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
@mirrors.each do |entity|
|
184
|
-
begin
|
185
|
-
spinner = TTY::Spinner.new("[:spinner] :title")
|
186
|
-
spinner.update(title: "Mirroring #{entity[:type]} #{entity[:name]} to BigQuery...")
|
187
|
-
spinner.auto_spin
|
188
|
-
|
189
|
-
case entity[:type]
|
190
|
-
when :view
|
191
|
-
@dataset.table(entity[:name]).delete rescue nil # delete if exists
|
192
|
-
@dataset.create_view(entity[:name], entity[:query],
|
193
|
-
:legacy_sql => true)
|
194
|
-
|
195
|
-
when :table
|
196
|
-
job = @dataset.query_job(entity[:query],
|
197
|
-
:legacy_sql => true,
|
198
|
-
:write => 'truncate',
|
199
|
-
:table => @dataset.table(entity[:name], :skip_lookup => true))
|
200
|
-
job.wait_until_done!
|
188
|
+
begin
|
189
|
+
case entity[:type]
|
190
|
+
when :view
|
191
|
+
@dataset.table(entity[:name]).delete rescue nil # delete if exists
|
192
|
+
@dataset.create_view(entity[:name], entity[:query])
|
201
193
|
|
202
|
-
|
203
|
-
|
204
|
-
|
194
|
+
when :table
|
195
|
+
job = @dataset.query_job(entity[:query],
|
196
|
+
:write => 'truncate',
|
197
|
+
:table => @dataset.table(entity[:name], :skip_lookup => true))
|
198
|
+
job.wait_until_done!
|
205
199
|
|
206
|
-
|
207
|
-
|
208
|
-
spinner.error("(Google Cloud error: #{e.message})")
|
209
|
-
$logger.error e.backtrace.join("\n")
|
200
|
+
else
|
201
|
+
$logger.error "Unknown mirror type: #{entity[:type]}"
|
210
202
|
end
|
203
|
+
rescue => e
|
204
|
+
$logger.error("(Google Cloud error: #{e.message})")
|
205
|
+
$logger.debug e.backtrace.join("\n")
|
211
206
|
end
|
212
207
|
end
|
213
208
|
|
214
|
-
def insert(entity, data)
|
215
|
-
|
209
|
+
def insert(entity, data, suite = 'forge')
|
210
|
+
return if @options[:noop]
|
211
|
+
return if data.empty?
|
212
|
+
|
213
|
+
table = @dataset.table("#{suite}_#{entity}")
|
216
214
|
response = table.insert(data)
|
217
215
|
|
218
216
|
unless response.success?
|
219
|
-
errors = {}
|
220
217
|
response.insert_errors.each do |err|
|
221
|
-
|
218
|
+
$logger.error JSON.pretty_generate(err.row)
|
219
|
+
$logger.error JSON.pretty_generate(err.errors)
|
222
220
|
end
|
223
|
-
$logger.error JSON.pretty_generate(errors)
|
224
221
|
end
|
225
222
|
end
|
226
223
|
|
227
|
-
def
|
228
|
-
|
229
|
-
|
230
|
-
spinner.update(title: "Uploading #{entity} to BigQuery ...")
|
231
|
-
spinner.auto_spin
|
224
|
+
def delete(entity, field, match, suite = 'forge')
|
225
|
+
@dataset.query("DELETE FROM #{suite}_#{entity} WHERE #{field} = '#{match}'")
|
226
|
+
end
|
232
227
|
|
233
|
-
|
234
|
-
|
235
|
-
|
228
|
+
def get(entity, fields, suite = 'forge')
|
229
|
+
raise 'pass fields as an array' unless fields.is_a? Array
|
230
|
+
@dataset.query("SELECT #{fields.join(', ')} FROM #{suite}_#{entity}")
|
231
|
+
end
|
236
232
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
#
|
241
|
-
# begin
|
242
|
-
# table.insert data
|
243
|
-
# rescue
|
244
|
-
# require 'pry'
|
245
|
-
# binding.pry
|
246
|
-
# end
|
247
|
-
# end
|
233
|
+
def module_sources()
|
234
|
+
get('modules', ['slug', 'source'])
|
235
|
+
end
|
248
236
|
|
237
|
+
def puppetfiles()
|
238
|
+
sql = 'SELECT f.repo_name, f.path, c.content, c.md5
|
239
|
+
FROM github_puppetfile_files AS f
|
240
|
+
JOIN github_puppetfile_contents AS c
|
241
|
+
ON c.id = f.id
|
249
242
|
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
243
|
+
WHERE c.md5 NOT IN (
|
244
|
+
SELECT u.md5
|
245
|
+
FROM github_puppetfile_usage AS u
|
246
|
+
WHERE u.repo_name = f.repo_name
|
247
|
+
) AND LOWER(repo_name) NOT LIKE "%boxen%"'
|
248
|
+
@dataset.query(sql)
|
249
|
+
end
|
250
|
+
|
251
|
+
def unitemized()
|
252
|
+
sql = 'SELECT m.name, m.slug, m.version, m.dependencies
|
253
|
+
FROM forge_modules AS m
|
254
|
+
WHERE m.version NOT IN (
|
255
|
+
SELECT i.version
|
256
|
+
FROM forge_itemized AS i
|
257
|
+
WHERE module = m.slug
|
258
|
+
)'
|
259
|
+
@dataset.query(sql)
|
255
260
|
end
|
256
261
|
|
257
262
|
def version_itemized?(mod, version)
|
@@ -2,151 +2,82 @@ require 'json'
|
|
2
2
|
require 'httparty'
|
3
3
|
require 'tty-spinner'
|
4
4
|
require 'semantic_puppet'
|
5
|
-
require 'mvp/monkeypatches'
|
6
|
-
require 'mvp/itemizer'
|
7
5
|
|
8
6
|
class Mvp
|
9
|
-
class
|
7
|
+
class Forge
|
10
8
|
def initialize(options = {})
|
11
9
|
@useragent = 'Puppet Community Stats Monitor'
|
12
|
-
@cachedir = options[:cachedir]
|
13
10
|
@forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
|
14
|
-
@itemizer = Mvp::Itemizer.new(options)
|
15
11
|
end
|
16
12
|
|
17
|
-
def
|
18
|
-
|
19
|
-
item = (entity == :authors) ? 'users' : entity.to_s
|
20
|
-
download(item) do |data|
|
21
|
-
case entity
|
22
|
-
when :modules
|
23
|
-
uploader.insert(:validations, flatten_validations(retrieve_validations(data)))
|
24
|
-
data = flatten_modules(data)
|
25
|
-
|
26
|
-
@itemizer.run!(data, uploader)
|
27
|
-
when :releases
|
28
|
-
data = flatten_releases(data)
|
29
|
-
end
|
30
|
-
|
31
|
-
uploader.insert(entity, data)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def retrieve(entity, download = true)
|
36
|
-
if download
|
37
|
-
# I am focusing on authorship rather than just users, so for now I'm using the word authors
|
38
|
-
item = (entity == :authors) ? 'users' : entity.to_s
|
39
|
-
data = []
|
40
|
-
download(item) do |resp|
|
41
|
-
data.concat resp
|
42
|
-
end
|
43
|
-
save_json(entity, data)
|
44
|
-
else
|
45
|
-
data = File.read("#{@cachedir}/#{entity}.json")
|
46
|
-
end
|
47
|
-
|
48
|
-
case entity
|
49
|
-
when :modules
|
50
|
-
data = flatten_modules(data)
|
51
|
-
when :releases
|
52
|
-
data = flatten_releases(data)
|
53
|
-
end
|
54
|
-
save_nld_json(entity.to_s, data)
|
55
|
-
end
|
56
|
-
|
57
|
-
def retrieve_validations(modules, period = 25)
|
58
|
-
results = {}
|
13
|
+
def retrieve(entity)
|
14
|
+
raise 'Please process downloaded data by passing a block' unless block_given?
|
59
15
|
|
16
|
+
# using authors for git repo terminology consistency
|
17
|
+
entity = :users if entity == :authors
|
60
18
|
begin
|
61
19
|
offset = 0
|
62
|
-
endpoint = "/
|
63
|
-
|
64
|
-
|
65
|
-
response = HTTParty.get("#{@forgeapi}#{endpoint}
|
20
|
+
endpoint = "/v3/#{entity}?sort_by=downloads&limit=50"
|
21
|
+
|
22
|
+
while endpoint do
|
23
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
|
66
24
|
raise "Forge Error: #{@response.body}" unless response.code == 200
|
25
|
+
data = JSON.parse(response.body)
|
26
|
+
results = munge_dates(data['results'])
|
27
|
+
|
28
|
+
case entity
|
29
|
+
when :modules
|
30
|
+
results = flatten_modules(results)
|
31
|
+
when :releases
|
32
|
+
results = flatten_releases(results)
|
33
|
+
end
|
67
34
|
|
68
|
-
results
|
69
|
-
offset += 1
|
35
|
+
yield results, offset
|
70
36
|
|
71
|
-
|
72
|
-
|
37
|
+
offset += 50
|
38
|
+
endpoint = data['pagination']['next']
|
39
|
+
if (endpoint and (offset % 250 == 0))
|
73
40
|
GC.start
|
74
41
|
end
|
75
42
|
end
|
43
|
+
|
76
44
|
rescue => e
|
77
45
|
$logger.error e.message
|
78
46
|
$logger.debug e.backtrace.join("\n")
|
79
47
|
end
|
80
48
|
|
81
|
-
|
49
|
+
nil
|
82
50
|
end
|
83
51
|
|
84
|
-
def
|
85
|
-
|
86
|
-
|
87
|
-
if File.exist? cache
|
88
|
-
module_data = JSON.parse(File.read(cache))
|
89
|
-
else
|
90
|
-
module_data = retrieve(:modules)
|
91
|
-
end
|
52
|
+
def retrieve_validations(modules, period = 25)
|
53
|
+
raise 'Please process validations by passing a block' unless block_given?
|
92
54
|
|
55
|
+
offset = 0
|
93
56
|
begin
|
94
|
-
|
95
|
-
|
96
|
-
|
57
|
+
modules.each_slice(period) do |group|
|
58
|
+
offset += period
|
59
|
+
results = group.map { |mod| validations(mod[:slug]) }
|
97
60
|
|
98
|
-
|
99
|
-
|
61
|
+
yield results, offset
|
62
|
+
GC.start
|
100
63
|
end
|
101
|
-
|
102
|
-
spinner.success('(OK)')
|
103
64
|
rescue => e
|
104
|
-
spinner.error('API error')
|
105
65
|
$logger.error e.message
|
106
66
|
$logger.debug e.backtrace.join("\n")
|
107
67
|
end
|
108
68
|
|
109
|
-
|
110
|
-
save_nld_json('validations', flatten_validations(results))
|
111
|
-
results
|
69
|
+
nil
|
112
70
|
end
|
113
71
|
|
114
|
-
def
|
115
|
-
|
72
|
+
def validations(name)
|
73
|
+
endpoint = "/private/validations/"
|
74
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
|
75
|
+
raise "Forge Error: #{@response.body}" unless response.code == 200
|
116
76
|
|
117
|
-
|
118
|
-
offset = 0
|
119
|
-
endpoint = "/v3/#{entity}?sort_by=downloads&limit=50"
|
120
|
-
spinner = TTY::Spinner.new("[:spinner] :title")
|
121
|
-
spinner.update(title: "Downloading #{entity} ...")
|
122
|
-
spinner.auto_spin
|
123
|
-
|
124
|
-
while endpoint do
|
125
|
-
response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
|
126
|
-
raise "Forge Error: #{@response.body}" unless response.code == 200
|
127
|
-
data = JSON.parse(response.body)
|
128
|
-
|
129
|
-
offset += 50
|
130
|
-
endpoint = data['pagination']['next']
|
131
|
-
|
132
|
-
yield munge_dates(data['results'])
|
133
|
-
|
134
|
-
if (endpoint and (offset % 250 == 0))
|
135
|
-
spinner.update(title: "Downloading #{entity} [#{offset}]...")
|
136
|
-
GC.start
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
spinner.success('(OK)')
|
141
|
-
rescue => e
|
142
|
-
spinner.error('API error')
|
143
|
-
$logger.error e.message
|
144
|
-
$logger.debug e.backtrace.join("\n")
|
145
|
-
end
|
146
|
-
|
147
|
-
nil
|
77
|
+
flatten_validations(name, JSON.parse(response.body))
|
148
78
|
end
|
149
79
|
|
80
|
+
|
150
81
|
# transform dates into a format that bigquery knows
|
151
82
|
def munge_dates(object)
|
152
83
|
["created_at", "updated_at", "deprecated_at", "deleted_at"].each do |field|
|
@@ -160,16 +91,6 @@ class Mvp
|
|
160
91
|
object
|
161
92
|
end
|
162
93
|
|
163
|
-
def save_json(thing, data)
|
164
|
-
File.write("#{@cachedir}/#{thing}.json", data.to_json)
|
165
|
-
end
|
166
|
-
|
167
|
-
# store data in a way that bigquery can grok
|
168
|
-
# uploading files is far easier than streaming data, when replacing a dataset
|
169
|
-
def save_nld_json(thing, data)
|
170
|
-
File.write("#{@cachedir}/nld_#{thing}.json", data.to_newline_delimited_json)
|
171
|
-
end
|
172
|
-
|
173
94
|
def flatten_modules(data)
|
174
95
|
data.each do |row|
|
175
96
|
row['owner'] = row['owner']['username']
|
@@ -209,14 +130,12 @@ class Mvp
|
|
209
130
|
data
|
210
131
|
end
|
211
132
|
|
212
|
-
def flatten_validations(
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
row[entry['name']] = entry['score']
|
217
|
-
end
|
218
|
-
row
|
133
|
+
def flatten_validations(name, scores)
|
134
|
+
row = { 'name' => name }
|
135
|
+
scores.each do |entry|
|
136
|
+
row[entry['name']] = entry['score']
|
219
137
|
end
|
138
|
+
row
|
220
139
|
end
|
221
140
|
|
222
141
|
def simplify_metadata(data, metadata)
|
data/lib/mvp/itemizer.rb
CHANGED
@@ -27,6 +27,14 @@ class Mvp
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
+
def itemized(mod)
|
31
|
+
modname = mod[:slug]
|
32
|
+
version = mod[:version]
|
33
|
+
baserow = { :module => modname, :version => version, :kind => 'admin', :element => 'version', :count => 0}
|
34
|
+
|
35
|
+
table(itemize(modname, version), mod) << baserow
|
36
|
+
end
|
37
|
+
|
30
38
|
def download(path, modname, version)
|
31
39
|
filename = "#{modname}-#{version}.tar.gz"
|
32
40
|
Dir.chdir(path) do
|
@@ -58,10 +66,10 @@ class Mvp
|
|
58
66
|
# Build a table with this schema
|
59
67
|
# module | version | source | kind | element | count
|
60
68
|
def table(itemized, data)
|
61
|
-
modname = data[
|
62
|
-
slug = data[
|
63
|
-
version = data[
|
64
|
-
dependencies = data[
|
69
|
+
modname = data[:name]
|
70
|
+
slug = data[:slug]
|
71
|
+
version = data[:version]
|
72
|
+
dependencies = data[:dependencies]
|
65
73
|
|
66
74
|
itemized.map do |kind, elements|
|
67
75
|
# the kind of element comes pluralized from puppet-itemize
|
@@ -0,0 +1,171 @@
|
|
1
|
+
class Mvp
|
2
|
+
class PuppetfileParser
|
3
|
+
def initialize(options = {})
|
4
|
+
@sources = {}
|
5
|
+
@modules = []
|
6
|
+
@repo = nil
|
7
|
+
end
|
8
|
+
|
9
|
+
def suitable?
|
10
|
+
defined?(RubyVM::AbstractSyntaxTree)
|
11
|
+
end
|
12
|
+
|
13
|
+
def sources=(modules)
|
14
|
+
modules.each do |row|
|
15
|
+
next unless row[:source]
|
16
|
+
next if row[:source] == 'UNKNOWN'
|
17
|
+
|
18
|
+
@sources[canonical_git_repo(row[:source])] = row[:slug]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse(repo)
|
23
|
+
# This only works on Ruby 2.6+
|
24
|
+
return unless suitable?
|
25
|
+
|
26
|
+
begin
|
27
|
+
root = RubyVM::AbstractSyntaxTree.parse(repo[:content])
|
28
|
+
rescue SyntaxError => e
|
29
|
+
$logger.warn "Syntax error in #{repo[:repo_name]}/Puppetfile"
|
30
|
+
$logger.warn e.message
|
31
|
+
end
|
32
|
+
|
33
|
+
@repo = repo
|
34
|
+
@modules = []
|
35
|
+
traverse(root)
|
36
|
+
@modules.compact.map do |row|
|
37
|
+
row[:repo_name] = repo[:repo_name]
|
38
|
+
row[:md5] = repo[:md5]
|
39
|
+
row[:module] = canonical_name(row[:module], row[:source])
|
40
|
+
stringify(row)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def stringify(row)
|
45
|
+
row.each do |key, value|
|
46
|
+
if value.is_a? RubyVM::AbstractSyntaxTree::Node
|
47
|
+
row[key] = :'#<programmatically generated via ruby code>'
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def canonical_name(name, repo)
|
53
|
+
return name if name.include?('-')
|
54
|
+
repo = canonical_git_repo(repo)
|
55
|
+
|
56
|
+
return @sources[repo] if @sources.include?(repo)
|
57
|
+
name
|
58
|
+
end
|
59
|
+
|
60
|
+
def canonical_git_repo(repo)
|
61
|
+
return unless repo
|
62
|
+
return unless repo.is_a? String
|
63
|
+
repo.sub(/^git@github.com\:/, 'github.com/')
|
64
|
+
.sub(/^(git|https?)\:\/\//, '')
|
65
|
+
.sub(/\.git$/, '')
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_module(name, args)
|
69
|
+
unless name.is_a? String
|
70
|
+
$logger.warn "Non string module name in #{@repo[:repo_name]}/Puppetfile"
|
71
|
+
return nil
|
72
|
+
end
|
73
|
+
name.gsub!('/', '-')
|
74
|
+
case args
|
75
|
+
when String, Symbol, NilClass
|
76
|
+
@modules << {
|
77
|
+
:module => name,
|
78
|
+
:type => :forge,
|
79
|
+
:source => :forge,
|
80
|
+
:version => args,
|
81
|
+
}
|
82
|
+
when Hash
|
83
|
+
@modules << parse_args(name, args)
|
84
|
+
else
|
85
|
+
$logger.warn "#{@repo[:repo_name]}/Puppetfile: Unknown format: mod('#{name}', #{args.inspect})"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def parse_args(name, args)
|
90
|
+
data = {:module => name}
|
91
|
+
|
92
|
+
if args.include? :git
|
93
|
+
data[:type] = :git
|
94
|
+
data[:source] = args[:git]
|
95
|
+
data[:version] = args[:ref] || args[:tag] || args[:commit] || args[:branch] || :latest
|
96
|
+
elsif args.include? :svn
|
97
|
+
data[:type] = :svn
|
98
|
+
data[:source] = args[:svn]
|
99
|
+
data[:version] = args[:rev] || args[:revision] || :latest
|
100
|
+
elsif args.include? :boxen
|
101
|
+
data[:type] = :boxen
|
102
|
+
data[:source] = args[:repo]
|
103
|
+
data[:version] = args[:version] || :latest
|
104
|
+
else
|
105
|
+
$logger.warn "#{@repo[:repo_name]}/Puppetfile: Unknown args format: mod('#{name}', #{args.inspect})"
|
106
|
+
return nil
|
107
|
+
end
|
108
|
+
|
109
|
+
data
|
110
|
+
end
|
111
|
+
|
112
|
+
def traverse(node)
|
113
|
+
begin
|
114
|
+
if node.type == :FCALL
|
115
|
+
name = node.children.first
|
116
|
+
args = node.children.last.children.map do |item|
|
117
|
+
next if item.nil?
|
118
|
+
|
119
|
+
case item.type
|
120
|
+
when :HASH
|
121
|
+
Hash[*item.children.first.children.compact.map {|n| n.children.first }]
|
122
|
+
else
|
123
|
+
item.children.first
|
124
|
+
end
|
125
|
+
end.compact
|
126
|
+
|
127
|
+
case name
|
128
|
+
when :mod
|
129
|
+
add_module(args.shift, args.shift)
|
130
|
+
when :forge
|
131
|
+
# noop
|
132
|
+
when :moduledir
|
133
|
+
# noop
|
134
|
+
when :github
|
135
|
+
# oh boxen, you so silly.
|
136
|
+
# The order of the unpacking below *is* important.
|
137
|
+
modname = args.shift
|
138
|
+
version = args.shift
|
139
|
+
data = args.shift || {}
|
140
|
+
|
141
|
+
# this is gross but I'm not sure I actually care right now.
|
142
|
+
if (modname.is_a? String and [String, NilClass].include? version.class and data.is_a? Hash)
|
143
|
+
data[:boxen] = :boxen
|
144
|
+
data[:version] = version
|
145
|
+
add_module(modname, data)
|
146
|
+
else
|
147
|
+
$logger.warn "#{@repo[:repo_name]}/Puppetfile: malformed boxen"
|
148
|
+
end
|
149
|
+
else
|
150
|
+
# Should we record unexpected Ruby code or just log it to stdout?
|
151
|
+
args = args.map {|a| a.is_a?(String) ? "'#{a}'" : a}.join(', ')
|
152
|
+
$logger.warn "#{@repo[:repo_name]}/Puppetfile: Unexpected invocation of #{name}(#{args})"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
node.children.each do |n|
|
157
|
+
next unless n.is_a? RubyVM::AbstractSyntaxTree::Node
|
158
|
+
|
159
|
+
traverse(n)
|
160
|
+
end
|
161
|
+
rescue => e
|
162
|
+
puts e.message
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def test()
|
167
|
+
require 'pry'
|
168
|
+
binding.pry
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
data/lib/mvp/runner.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
-
require 'mvp/
|
2
|
-
require 'mvp/
|
1
|
+
require 'mvp/forge'
|
2
|
+
require 'mvp/bigquery'
|
3
3
|
require 'mvp/stats'
|
4
|
+
require 'mvp/itemizer'
|
5
|
+
require 'mvp/puppetfile_parser'
|
6
|
+
|
7
|
+
require 'tty-spinner'
|
4
8
|
|
5
9
|
class Mvp
|
6
10
|
class Runner
|
@@ -11,40 +15,94 @@ class Mvp
|
|
11
15
|
end
|
12
16
|
|
13
17
|
def retrieve(target = :all, download = true)
|
14
|
-
|
18
|
+
bigquery = Mvp::Bigquery.new(@options)
|
15
19
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
+
begin
|
21
|
+
[:authors, :modules, :releases, :validations].each do |thing|
|
22
|
+
next unless [:all, thing].include? target
|
23
|
+
spinner = mkspinner("Retrieving #{thing} ...")
|
24
|
+
data = bigquery.retrieve(thing)
|
25
|
+
save_json(thing, data)
|
26
|
+
spinner.success('(OK)')
|
27
|
+
end
|
20
28
|
|
21
|
-
|
22
|
-
|
29
|
+
rescue => e
|
30
|
+
spinner.error("API error: #{e.message}")
|
31
|
+
$logger.error "API error: #{e.message}"
|
32
|
+
$logger.debug e.backtrace.join("\n")
|
33
|
+
sleep 10
|
23
34
|
end
|
24
35
|
end
|
25
36
|
|
26
|
-
def
|
27
|
-
|
37
|
+
def mirror(target = :all)
|
38
|
+
forge = Mvp::Forge.new(@options)
|
39
|
+
bigquery = Mvp::Bigquery.new(@options)
|
40
|
+
itemizer = Mvp::Itemizer.new(@options)
|
41
|
+
pfparser = Mvp::PuppetfileParser.new(@options)
|
28
42
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
43
|
+
begin
|
44
|
+
[:authors, :modules, :releases].each do |thing|
|
45
|
+
next unless [:all, thing].include? target
|
46
|
+
spinner = mkspinner("Mirroring #{thing}...")
|
47
|
+
bigquery.truncate(thing)
|
48
|
+
forge.retrieve(thing) do |data, offset|
|
49
|
+
spinner.update(title: "Mirroring #{thing} [#{offset}]...")
|
50
|
+
bigquery.insert(thing, data)
|
51
|
+
end
|
52
|
+
spinner.success('(OK)')
|
53
|
+
end
|
34
54
|
|
35
|
-
|
36
|
-
|
37
|
-
|
55
|
+
if [:all, :validations].include? target
|
56
|
+
spinner = mkspinner("Mirroring validations...")
|
57
|
+
modules = bigquery.get(:modules, [:slug])
|
58
|
+
bigquery.truncate(:validations)
|
59
|
+
forge.retrieve_validations(modules) do |data, offset|
|
60
|
+
spinner.update(title: "Mirroring validations [#{offset}]...")
|
61
|
+
bigquery.insert(:validations, data)
|
62
|
+
end
|
63
|
+
spinner.success('(OK)')
|
64
|
+
end
|
38
65
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
66
|
+
if [:all, :itemizations].include? target
|
67
|
+
spinner = mkspinner("Itemizing modules...")
|
68
|
+
bigquery.unitemized.each do |mod|
|
69
|
+
spinner.update(title: "Itemizing [#{mod[:slug]}]...")
|
70
|
+
rows = itemizer.itemized(mod)
|
71
|
+
bigquery.delete(:itemized, :module, mod[:slug])
|
72
|
+
bigquery.insert(:itemized, rows)
|
73
|
+
end
|
74
|
+
spinner.success('(OK)')
|
75
|
+
end
|
76
|
+
|
77
|
+
if [:all, :mirrors, :tables].include? target
|
78
|
+
@options[:gcloud][:mirrors].each do |entity|
|
79
|
+
spinner = mkspinner("Mirroring #{entity[:type]} #{entity[:name]} to BigQuery...")
|
80
|
+
bigquery.mirror_table(entity)
|
81
|
+
spinner.success('(OK)')
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
if [:all, :puppetfiles].include? target
|
86
|
+
spinner = mkspinner("Analyzing Puppetfile module references...")
|
87
|
+
if pfparser.suitable?
|
88
|
+
pfparser.sources = bigquery.module_sources
|
89
|
+
bigquery.puppetfiles.each do |repo|
|
90
|
+
spinner.update(title: "Analyzing [#{repo[:repo_name]}/Puppetfile]...")
|
91
|
+
rows = pfparser.parse(repo)
|
92
|
+
bigquery.delete(:puppetfile_usage, :repo_name, repo[:repo_name], :github)
|
93
|
+
bigquery.insert(:puppetfile_usage, rows, :github)
|
94
|
+
end
|
95
|
+
spinner.success('(OK)')
|
96
|
+
else
|
97
|
+
spinner.error("(Not functional on Ruby #{RUBY_VERSION})")
|
98
|
+
end
|
99
|
+
end
|
45
100
|
|
46
|
-
|
47
|
-
|
101
|
+
rescue => e
|
102
|
+
spinner.error("API error: #{e.message}")
|
103
|
+
$logger.error "API error: #{e.message}"
|
104
|
+
$logger.debug e.backtrace.join("\n")
|
105
|
+
sleep 10
|
48
106
|
end
|
49
107
|
end
|
50
108
|
|
@@ -57,6 +115,17 @@ class Mvp
|
|
57
115
|
end
|
58
116
|
end
|
59
117
|
|
118
|
+
def mkspinner(title)
|
119
|
+
spinner = TTY::Spinner.new("[:spinner] :title")
|
120
|
+
spinner.update(title: title)
|
121
|
+
spinner.auto_spin
|
122
|
+
spinner
|
123
|
+
end
|
124
|
+
|
125
|
+
def save_json(thing, data)
|
126
|
+
File.write("#{@cachedir}/#{thing}.json", data.to_json)
|
127
|
+
end
|
128
|
+
|
60
129
|
def test()
|
61
130
|
require 'pry'
|
62
131
|
binding.pry
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: puppet-community-mvp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Ford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -151,13 +151,14 @@ files:
|
|
151
151
|
- LICENSE
|
152
152
|
- README.md
|
153
153
|
- bin/mvp
|
154
|
+
- bin/pftest.rb
|
154
155
|
- lib/mvp.rb
|
155
|
-
- lib/mvp/
|
156
|
+
- lib/mvp/bigquery.rb
|
157
|
+
- lib/mvp/forge.rb
|
156
158
|
- lib/mvp/itemizer.rb
|
157
|
-
- lib/mvp/
|
159
|
+
- lib/mvp/puppetfile_parser.rb
|
158
160
|
- lib/mvp/runner.rb
|
159
161
|
- lib/mvp/stats.rb
|
160
|
-
- lib/mvp/uploader.rb
|
161
162
|
homepage:
|
162
163
|
licenses:
|
163
164
|
- Apache 2
|