puppet-community-mvp 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/mvp +9 -15
- data/bin/pftest.rb +22 -0
- data/lib/mvp.rb +1 -3
- data/lib/mvp/{uploader.rb → bigquery.rb} +75 -70
- data/lib/mvp/{downloader.rb → forge.rb} +44 -125
- data/lib/mvp/itemizer.rb +12 -4
- data/lib/mvp/puppetfile_parser.rb +171 -0
- data/lib/mvp/runner.rb +96 -27
- metadata +6 -5
- data/lib/mvp/monkeypatches.rb +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f5ed8c2978cad58a0ce52346eb854b3b4e82a9a
|
4
|
+
data.tar.gz: a5637f505ed5fe8c74a22f9633723b9211ad3c4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3ad31ba42a0e2f96ae8254bf383e523c74890fa219cb1b27946cae7718aba76b2d3532149dc98dfc1d59b5e47c4d31c70156c96203659425b55607c6a0d948d1
|
7
|
+
data.tar.gz: f8970bb1a709f807e3e5d2f45e2c74f7adbea034fc5d9293024e76f4dfb8b2e9eb59ecf636e38b12d69ce224867bbc96570f5e9ed1ddb5092783888e145ceec9
|
data/bin/mvp
CHANGED
@@ -13,16 +13,14 @@ optparse = OptionParser.new { |opts|
|
|
13
13
|
opts.banner = "Usage : #{NAME} [command] [target] [options]
|
14
14
|
|
15
15
|
This tool will scrape the Puppet Forge API for interesting module & author stats.
|
16
|
-
|
16
|
+
It can also mirror public BigQuery tables or views into our dataset for efficiency,
|
17
|
+
or download and itemize each Forge module.
|
17
18
|
|
18
|
-
* get | retrieve | download [target]
|
19
|
-
* Downloads and caches all Forge metadata.
|
20
|
-
* Optional targets: all, authors, modules, releases
|
21
|
-
* upload | insert [target]
|
22
|
-
* Uploads data to BigQuery
|
23
|
-
* Optional targets: all, authors, modules, releases, mirrors
|
24
19
|
* mirror [target]
|
25
20
|
* Runs the download & then upload tasks.
|
21
|
+
* Optional targets: all, authors, modules, releases, validations, itemizations, puppetfiles, tables
|
22
|
+
* get | retrieve | download [target]
|
23
|
+
* Downloads and caches data locally so you can run the stats task.
|
26
24
|
* Optional targets: all, authors, modules, releases
|
27
25
|
* stats
|
28
26
|
* Print out a summary of interesting stats.
|
@@ -64,6 +62,10 @@ The following CLI commands are available.
|
|
64
62
|
options[:debug] = true
|
65
63
|
end
|
66
64
|
|
65
|
+
opts.on("-n", "--noop", "Don't actually upload data.") do
|
66
|
+
options[:noop] = true
|
67
|
+
end
|
68
|
+
|
67
69
|
opts.separator('')
|
68
70
|
|
69
71
|
opts.on("-h", "--help", "Displays this help") do
|
@@ -100,14 +102,6 @@ when 'get', 'retrieve', 'download'
|
|
100
102
|
target ||= :all
|
101
103
|
runner.retrieve(target.to_sym)
|
102
104
|
|
103
|
-
when 'transform'
|
104
|
-
target ||= :all
|
105
|
-
runner.retrieve(target.to_sym, false)
|
106
|
-
|
107
|
-
when 'insert', 'upload'
|
108
|
-
target ||= :all
|
109
|
-
runner.upload(target.to_sym)
|
110
|
-
|
111
105
|
when 'mirror'
|
112
106
|
target ||= :all
|
113
107
|
runner.mirror(target.to_sym)
|
data/bin/pftest.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'mvp/puppetfile_parser'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'json'
|
6
|
+
require 'logger'
|
7
|
+
|
8
|
+
$logger = Logger::new(STDOUT)
|
9
|
+
$logger.level = Logger::INFO
|
10
|
+
$logger.formatter = proc { |severity,datetime,progname,msg| "#{severity}: #{msg}\n" }
|
11
|
+
|
12
|
+
pf = open(ARGV.first)
|
13
|
+
parser = Mvp::PuppetfileParser.new()
|
14
|
+
|
15
|
+
|
16
|
+
repo = {
|
17
|
+
:repo_name => 'testing',
|
18
|
+
:md5 => 'wakka wakka',
|
19
|
+
:content => pf.read,
|
20
|
+
}
|
21
|
+
|
22
|
+
puts JSON.pretty_generate(parser.parse(repo))
|
data/lib/mvp.rb
CHANGED
@@ -3,10 +3,10 @@ require 'tty-spinner'
|
|
3
3
|
require "google/cloud/bigquery"
|
4
4
|
|
5
5
|
class Mvp
|
6
|
-
class
|
6
|
+
class Bigquery
|
7
7
|
def initialize(options = {})
|
8
|
+
@options = options
|
8
9
|
@cachedir = options[:cachedir]
|
9
|
-
@mirrors = options[:gcloud][:mirrors]
|
10
10
|
@bigquery = Google::Cloud::Bigquery.new(
|
11
11
|
:project_id => options[:gcloud][:project],
|
12
12
|
:credentials => Google::Cloud::Bigquery::Credentials.new(options[:gcloud][:keyfile]),
|
@@ -27,9 +27,24 @@ class Mvp
|
|
27
27
|
s.integer "count", mode: :required
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
@puppetfile_usage = @dataset.table('github_puppetfile_usage') || @dataset.create_table('github_puppetfile_usage') do |table|
|
32
|
+
table.name = 'Puppetfile Module Usage'
|
33
|
+
table.description = 'A list of all modules referenced in public Puppetfiles'
|
34
|
+
table.schema do |s|
|
35
|
+
s.string "repo_name", mode: :required
|
36
|
+
s.string "module", mode: :required
|
37
|
+
s.string "type", mode: :required
|
38
|
+
s.string "source"
|
39
|
+
s.string "version"
|
40
|
+
s.string "md5", mode: :required
|
41
|
+
end
|
42
|
+
end
|
30
43
|
end
|
31
44
|
|
32
45
|
def truncate(entity)
|
46
|
+
return if @options[:noop]
|
47
|
+
|
33
48
|
begin
|
34
49
|
case entity
|
35
50
|
when :authors
|
@@ -163,95 +178,85 @@ class Mvp
|
|
163
178
|
end
|
164
179
|
end
|
165
180
|
|
166
|
-
def
|
167
|
-
|
168
|
-
end
|
169
|
-
|
170
|
-
def modules()
|
171
|
-
upload('modules')
|
181
|
+
def retrieve(entity)
|
182
|
+
get(entity, ['*'])
|
172
183
|
end
|
173
184
|
|
174
|
-
def
|
175
|
-
|
176
|
-
end
|
185
|
+
def mirror_table(entity)
|
186
|
+
return if @options[:noop]
|
177
187
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
@mirrors.each do |entity|
|
184
|
-
begin
|
185
|
-
spinner = TTY::Spinner.new("[:spinner] :title")
|
186
|
-
spinner.update(title: "Mirroring #{entity[:type]} #{entity[:name]} to BigQuery...")
|
187
|
-
spinner.auto_spin
|
188
|
-
|
189
|
-
case entity[:type]
|
190
|
-
when :view
|
191
|
-
@dataset.table(entity[:name]).delete rescue nil # delete if exists
|
192
|
-
@dataset.create_view(entity[:name], entity[:query],
|
193
|
-
:legacy_sql => true)
|
194
|
-
|
195
|
-
when :table
|
196
|
-
job = @dataset.query_job(entity[:query],
|
197
|
-
:legacy_sql => true,
|
198
|
-
:write => 'truncate',
|
199
|
-
:table => @dataset.table(entity[:name], :skip_lookup => true))
|
200
|
-
job.wait_until_done!
|
188
|
+
begin
|
189
|
+
case entity[:type]
|
190
|
+
when :view
|
191
|
+
@dataset.table(entity[:name]).delete rescue nil # delete if exists
|
192
|
+
@dataset.create_view(entity[:name], entity[:query])
|
201
193
|
|
202
|
-
|
203
|
-
|
204
|
-
|
194
|
+
when :table
|
195
|
+
job = @dataset.query_job(entity[:query],
|
196
|
+
:write => 'truncate',
|
197
|
+
:table => @dataset.table(entity[:name], :skip_lookup => true))
|
198
|
+
job.wait_until_done!
|
205
199
|
|
206
|
-
|
207
|
-
|
208
|
-
spinner.error("(Google Cloud error: #{e.message})")
|
209
|
-
$logger.error e.backtrace.join("\n")
|
200
|
+
else
|
201
|
+
$logger.error "Unknown mirror type: #{entity[:type]}"
|
210
202
|
end
|
203
|
+
rescue => e
|
204
|
+
$logger.error("(Google Cloud error: #{e.message})")
|
205
|
+
$logger.debug e.backtrace.join("\n")
|
211
206
|
end
|
212
207
|
end
|
213
208
|
|
214
|
-
def insert(entity, data)
|
215
|
-
|
209
|
+
def insert(entity, data, suite = 'forge')
|
210
|
+
return if @options[:noop]
|
211
|
+
return if data.empty?
|
212
|
+
|
213
|
+
table = @dataset.table("#{suite}_#{entity}")
|
216
214
|
response = table.insert(data)
|
217
215
|
|
218
216
|
unless response.success?
|
219
|
-
errors = {}
|
220
217
|
response.insert_errors.each do |err|
|
221
|
-
|
218
|
+
$logger.error JSON.pretty_generate(err.row)
|
219
|
+
$logger.error JSON.pretty_generate(err.errors)
|
222
220
|
end
|
223
|
-
$logger.error JSON.pretty_generate(errors)
|
224
221
|
end
|
225
222
|
end
|
226
223
|
|
227
|
-
def
|
228
|
-
|
229
|
-
|
230
|
-
spinner.update(title: "Uploading #{entity} to BigQuery ...")
|
231
|
-
spinner.auto_spin
|
224
|
+
def delete(entity, field, match, suite = 'forge')
|
225
|
+
@dataset.query("DELETE FROM #{suite}_#{entity} WHERE #{field} = '#{match}'")
|
226
|
+
end
|
232
227
|
|
233
|
-
|
234
|
-
|
235
|
-
|
228
|
+
def get(entity, fields, suite = 'forge')
|
229
|
+
raise 'pass fields as an array' unless fields.is_a? Array
|
230
|
+
@dataset.query("SELECT #{fields.join(', ')} FROM #{suite}_#{entity}")
|
231
|
+
end
|
236
232
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
#
|
241
|
-
# begin
|
242
|
-
# table.insert data
|
243
|
-
# rescue
|
244
|
-
# require 'pry'
|
245
|
-
# binding.pry
|
246
|
-
# end
|
247
|
-
# end
|
233
|
+
def module_sources()
|
234
|
+
get('modules', ['slug', 'source'])
|
235
|
+
end
|
248
236
|
|
237
|
+
def puppetfiles()
|
238
|
+
sql = 'SELECT f.repo_name, f.path, c.content, c.md5
|
239
|
+
FROM github_puppetfile_files AS f
|
240
|
+
JOIN github_puppetfile_contents AS c
|
241
|
+
ON c.id = f.id
|
249
242
|
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
243
|
+
WHERE c.md5 NOT IN (
|
244
|
+
SELECT u.md5
|
245
|
+
FROM github_puppetfile_usage AS u
|
246
|
+
WHERE u.repo_name = f.repo_name
|
247
|
+
) AND LOWER(repo_name) NOT LIKE "%boxen%"'
|
248
|
+
@dataset.query(sql)
|
249
|
+
end
|
250
|
+
|
251
|
+
def unitemized()
|
252
|
+
sql = 'SELECT m.name, m.slug, m.version, m.dependencies
|
253
|
+
FROM forge_modules AS m
|
254
|
+
WHERE m.version NOT IN (
|
255
|
+
SELECT i.version
|
256
|
+
FROM forge_itemized AS i
|
257
|
+
WHERE module = m.slug
|
258
|
+
)'
|
259
|
+
@dataset.query(sql)
|
255
260
|
end
|
256
261
|
|
257
262
|
def version_itemized?(mod, version)
|
@@ -2,151 +2,82 @@ require 'json'
|
|
2
2
|
require 'httparty'
|
3
3
|
require 'tty-spinner'
|
4
4
|
require 'semantic_puppet'
|
5
|
-
require 'mvp/monkeypatches'
|
6
|
-
require 'mvp/itemizer'
|
7
5
|
|
8
6
|
class Mvp
|
9
|
-
class
|
7
|
+
class Forge
|
10
8
|
def initialize(options = {})
|
11
9
|
@useragent = 'Puppet Community Stats Monitor'
|
12
|
-
@cachedir = options[:cachedir]
|
13
10
|
@forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
|
14
|
-
@itemizer = Mvp::Itemizer.new(options)
|
15
11
|
end
|
16
12
|
|
17
|
-
def
|
18
|
-
|
19
|
-
item = (entity == :authors) ? 'users' : entity.to_s
|
20
|
-
download(item) do |data|
|
21
|
-
case entity
|
22
|
-
when :modules
|
23
|
-
uploader.insert(:validations, flatten_validations(retrieve_validations(data)))
|
24
|
-
data = flatten_modules(data)
|
25
|
-
|
26
|
-
@itemizer.run!(data, uploader)
|
27
|
-
when :releases
|
28
|
-
data = flatten_releases(data)
|
29
|
-
end
|
30
|
-
|
31
|
-
uploader.insert(entity, data)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def retrieve(entity, download = true)
|
36
|
-
if download
|
37
|
-
# I am focusing on authorship rather than just users, so for now I'm using the word authors
|
38
|
-
item = (entity == :authors) ? 'users' : entity.to_s
|
39
|
-
data = []
|
40
|
-
download(item) do |resp|
|
41
|
-
data.concat resp
|
42
|
-
end
|
43
|
-
save_json(entity, data)
|
44
|
-
else
|
45
|
-
data = File.read("#{@cachedir}/#{entity}.json")
|
46
|
-
end
|
47
|
-
|
48
|
-
case entity
|
49
|
-
when :modules
|
50
|
-
data = flatten_modules(data)
|
51
|
-
when :releases
|
52
|
-
data = flatten_releases(data)
|
53
|
-
end
|
54
|
-
save_nld_json(entity.to_s, data)
|
55
|
-
end
|
56
|
-
|
57
|
-
def retrieve_validations(modules, period = 25)
|
58
|
-
results = {}
|
13
|
+
def retrieve(entity)
|
14
|
+
raise 'Please process downloaded data by passing a block' unless block_given?
|
59
15
|
|
16
|
+
# using authors for git repo terminology consistency
|
17
|
+
entity = :users if entity == :authors
|
60
18
|
begin
|
61
19
|
offset = 0
|
62
|
-
endpoint = "/
|
63
|
-
|
64
|
-
|
65
|
-
response = HTTParty.get("#{@forgeapi}#{endpoint}
|
20
|
+
endpoint = "/v3/#{entity}?sort_by=downloads&limit=50"
|
21
|
+
|
22
|
+
while endpoint do
|
23
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
|
66
24
|
raise "Forge Error: #{@response.body}" unless response.code == 200
|
25
|
+
data = JSON.parse(response.body)
|
26
|
+
results = munge_dates(data['results'])
|
27
|
+
|
28
|
+
case entity
|
29
|
+
when :modules
|
30
|
+
results = flatten_modules(results)
|
31
|
+
when :releases
|
32
|
+
results = flatten_releases(results)
|
33
|
+
end
|
67
34
|
|
68
|
-
results
|
69
|
-
offset += 1
|
35
|
+
yield results, offset
|
70
36
|
|
71
|
-
|
72
|
-
|
37
|
+
offset += 50
|
38
|
+
endpoint = data['pagination']['next']
|
39
|
+
if (endpoint and (offset % 250 == 0))
|
73
40
|
GC.start
|
74
41
|
end
|
75
42
|
end
|
43
|
+
|
76
44
|
rescue => e
|
77
45
|
$logger.error e.message
|
78
46
|
$logger.debug e.backtrace.join("\n")
|
79
47
|
end
|
80
48
|
|
81
|
-
|
49
|
+
nil
|
82
50
|
end
|
83
51
|
|
84
|
-
def
|
85
|
-
|
86
|
-
|
87
|
-
if File.exist? cache
|
88
|
-
module_data = JSON.parse(File.read(cache))
|
89
|
-
else
|
90
|
-
module_data = retrieve(:modules)
|
91
|
-
end
|
52
|
+
def retrieve_validations(modules, period = 25)
|
53
|
+
raise 'Please process validations by passing a block' unless block_given?
|
92
54
|
|
55
|
+
offset = 0
|
93
56
|
begin
|
94
|
-
|
95
|
-
|
96
|
-
|
57
|
+
modules.each_slice(period) do |group|
|
58
|
+
offset += period
|
59
|
+
results = group.map { |mod| validations(mod[:slug]) }
|
97
60
|
|
98
|
-
|
99
|
-
|
61
|
+
yield results, offset
|
62
|
+
GC.start
|
100
63
|
end
|
101
|
-
|
102
|
-
spinner.success('(OK)')
|
103
64
|
rescue => e
|
104
|
-
spinner.error('API error')
|
105
65
|
$logger.error e.message
|
106
66
|
$logger.debug e.backtrace.join("\n")
|
107
67
|
end
|
108
68
|
|
109
|
-
|
110
|
-
save_nld_json('validations', flatten_validations(results))
|
111
|
-
results
|
69
|
+
nil
|
112
70
|
end
|
113
71
|
|
114
|
-
def
|
115
|
-
|
72
|
+
def validations(name)
|
73
|
+
endpoint = "/private/validations/"
|
74
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
|
75
|
+
raise "Forge Error: #{@response.body}" unless response.code == 200
|
116
76
|
|
117
|
-
|
118
|
-
offset = 0
|
119
|
-
endpoint = "/v3/#{entity}?sort_by=downloads&limit=50"
|
120
|
-
spinner = TTY::Spinner.new("[:spinner] :title")
|
121
|
-
spinner.update(title: "Downloading #{entity} ...")
|
122
|
-
spinner.auto_spin
|
123
|
-
|
124
|
-
while endpoint do
|
125
|
-
response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
|
126
|
-
raise "Forge Error: #{@response.body}" unless response.code == 200
|
127
|
-
data = JSON.parse(response.body)
|
128
|
-
|
129
|
-
offset += 50
|
130
|
-
endpoint = data['pagination']['next']
|
131
|
-
|
132
|
-
yield munge_dates(data['results'])
|
133
|
-
|
134
|
-
if (endpoint and (offset % 250 == 0))
|
135
|
-
spinner.update(title: "Downloading #{entity} [#{offset}]...")
|
136
|
-
GC.start
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
spinner.success('(OK)')
|
141
|
-
rescue => e
|
142
|
-
spinner.error('API error')
|
143
|
-
$logger.error e.message
|
144
|
-
$logger.debug e.backtrace.join("\n")
|
145
|
-
end
|
146
|
-
|
147
|
-
nil
|
77
|
+
flatten_validations(name, JSON.parse(response.body))
|
148
78
|
end
|
149
79
|
|
80
|
+
|
150
81
|
# transform dates into a format that bigquery knows
|
151
82
|
def munge_dates(object)
|
152
83
|
["created_at", "updated_at", "deprecated_at", "deleted_at"].each do |field|
|
@@ -160,16 +91,6 @@ class Mvp
|
|
160
91
|
object
|
161
92
|
end
|
162
93
|
|
163
|
-
def save_json(thing, data)
|
164
|
-
File.write("#{@cachedir}/#{thing}.json", data.to_json)
|
165
|
-
end
|
166
|
-
|
167
|
-
# store data in a way that bigquery can grok
|
168
|
-
# uploading files is far easier than streaming data, when replacing a dataset
|
169
|
-
def save_nld_json(thing, data)
|
170
|
-
File.write("#{@cachedir}/nld_#{thing}.json", data.to_newline_delimited_json)
|
171
|
-
end
|
172
|
-
|
173
94
|
def flatten_modules(data)
|
174
95
|
data.each do |row|
|
175
96
|
row['owner'] = row['owner']['username']
|
@@ -209,14 +130,12 @@ class Mvp
|
|
209
130
|
data
|
210
131
|
end
|
211
132
|
|
212
|
-
def flatten_validations(
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
row[entry['name']] = entry['score']
|
217
|
-
end
|
218
|
-
row
|
133
|
+
def flatten_validations(name, scores)
|
134
|
+
row = { 'name' => name }
|
135
|
+
scores.each do |entry|
|
136
|
+
row[entry['name']] = entry['score']
|
219
137
|
end
|
138
|
+
row
|
220
139
|
end
|
221
140
|
|
222
141
|
def simplify_metadata(data, metadata)
|
data/lib/mvp/itemizer.rb
CHANGED
@@ -27,6 +27,14 @@ class Mvp
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
+
def itemized(mod)
|
31
|
+
modname = mod[:slug]
|
32
|
+
version = mod[:version]
|
33
|
+
baserow = { :module => modname, :version => version, :kind => 'admin', :element => 'version', :count => 0}
|
34
|
+
|
35
|
+
table(itemize(modname, version), mod) << baserow
|
36
|
+
end
|
37
|
+
|
30
38
|
def download(path, modname, version)
|
31
39
|
filename = "#{modname}-#{version}.tar.gz"
|
32
40
|
Dir.chdir(path) do
|
@@ -58,10 +66,10 @@ class Mvp
|
|
58
66
|
# Build a table with this schema
|
59
67
|
# module | version | source | kind | element | count
|
60
68
|
def table(itemized, data)
|
61
|
-
modname = data[
|
62
|
-
slug = data[
|
63
|
-
version = data[
|
64
|
-
dependencies = data[
|
69
|
+
modname = data[:name]
|
70
|
+
slug = data[:slug]
|
71
|
+
version = data[:version]
|
72
|
+
dependencies = data[:dependencies]
|
65
73
|
|
66
74
|
itemized.map do |kind, elements|
|
67
75
|
# the kind of element comes pluralized from puppet-itemize
|
@@ -0,0 +1,171 @@
|
|
1
|
+
class Mvp
|
2
|
+
class PuppetfileParser
|
3
|
+
def initialize(options = {})
|
4
|
+
@sources = {}
|
5
|
+
@modules = []
|
6
|
+
@repo = nil
|
7
|
+
end
|
8
|
+
|
9
|
+
def suitable?
|
10
|
+
defined?(RubyVM::AbstractSyntaxTree)
|
11
|
+
end
|
12
|
+
|
13
|
+
def sources=(modules)
|
14
|
+
modules.each do |row|
|
15
|
+
next unless row[:source]
|
16
|
+
next if row[:source] == 'UNKNOWN'
|
17
|
+
|
18
|
+
@sources[canonical_git_repo(row[:source])] = row[:slug]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse(repo)
|
23
|
+
# This only works on Ruby 2.6+
|
24
|
+
return unless suitable?
|
25
|
+
|
26
|
+
begin
|
27
|
+
root = RubyVM::AbstractSyntaxTree.parse(repo[:content])
|
28
|
+
rescue SyntaxError => e
|
29
|
+
$logger.warn "Syntax error in #{repo[:repo_name]}/Puppetfile"
|
30
|
+
$logger.warn e.message
|
31
|
+
end
|
32
|
+
|
33
|
+
@repo = repo
|
34
|
+
@modules = []
|
35
|
+
traverse(root)
|
36
|
+
@modules.compact.map do |row|
|
37
|
+
row[:repo_name] = repo[:repo_name]
|
38
|
+
row[:md5] = repo[:md5]
|
39
|
+
row[:module] = canonical_name(row[:module], row[:source])
|
40
|
+
stringify(row)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def stringify(row)
|
45
|
+
row.each do |key, value|
|
46
|
+
if value.is_a? RubyVM::AbstractSyntaxTree::Node
|
47
|
+
row[key] = :'#<programmatically generated via ruby code>'
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def canonical_name(name, repo)
|
53
|
+
return name if name.include?('-')
|
54
|
+
repo = canonical_git_repo(repo)
|
55
|
+
|
56
|
+
return @sources[repo] if @sources.include?(repo)
|
57
|
+
name
|
58
|
+
end
|
59
|
+
|
60
|
+
def canonical_git_repo(repo)
|
61
|
+
return unless repo
|
62
|
+
return unless repo.is_a? String
|
63
|
+
repo.sub(/^git@github.com\:/, 'github.com/')
|
64
|
+
.sub(/^(git|https?)\:\/\//, '')
|
65
|
+
.sub(/\.git$/, '')
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_module(name, args)
|
69
|
+
unless name.is_a? String
|
70
|
+
$logger.warn "Non string module name in #{@repo[:repo_name]}/Puppetfile"
|
71
|
+
return nil
|
72
|
+
end
|
73
|
+
name.gsub!('/', '-')
|
74
|
+
case args
|
75
|
+
when String, Symbol, NilClass
|
76
|
+
@modules << {
|
77
|
+
:module => name,
|
78
|
+
:type => :forge,
|
79
|
+
:source => :forge,
|
80
|
+
:version => args,
|
81
|
+
}
|
82
|
+
when Hash
|
83
|
+
@modules << parse_args(name, args)
|
84
|
+
else
|
85
|
+
$logger.warn "#{@repo[:repo_name]}/Puppetfile: Unknown format: mod('#{name}', #{args.inspect})"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def parse_args(name, args)
|
90
|
+
data = {:module => name}
|
91
|
+
|
92
|
+
if args.include? :git
|
93
|
+
data[:type] = :git
|
94
|
+
data[:source] = args[:git]
|
95
|
+
data[:version] = args[:ref] || args[:tag] || args[:commit] || args[:branch] || :latest
|
96
|
+
elsif args.include? :svn
|
97
|
+
data[:type] = :svn
|
98
|
+
data[:source] = args[:svn]
|
99
|
+
data[:version] = args[:rev] || args[:revision] || :latest
|
100
|
+
elsif args.include? :boxen
|
101
|
+
data[:type] = :boxen
|
102
|
+
data[:source] = args[:repo]
|
103
|
+
data[:version] = args[:version] || :latest
|
104
|
+
else
|
105
|
+
$logger.warn "#{@repo[:repo_name]}/Puppetfile: Unknown args format: mod('#{name}', #{args.inspect})"
|
106
|
+
return nil
|
107
|
+
end
|
108
|
+
|
109
|
+
data
|
110
|
+
end
|
111
|
+
|
112
|
+
def traverse(node)
|
113
|
+
begin
|
114
|
+
if node.type == :FCALL
|
115
|
+
name = node.children.first
|
116
|
+
args = node.children.last.children.map do |item|
|
117
|
+
next if item.nil?
|
118
|
+
|
119
|
+
case item.type
|
120
|
+
when :HASH
|
121
|
+
Hash[*item.children.first.children.compact.map {|n| n.children.first }]
|
122
|
+
else
|
123
|
+
item.children.first
|
124
|
+
end
|
125
|
+
end.compact
|
126
|
+
|
127
|
+
case name
|
128
|
+
when :mod
|
129
|
+
add_module(args.shift, args.shift)
|
130
|
+
when :forge
|
131
|
+
# noop
|
132
|
+
when :moduledir
|
133
|
+
# noop
|
134
|
+
when :github
|
135
|
+
# oh boxen, you so silly.
|
136
|
+
# The order of the unpacking below *is* important.
|
137
|
+
modname = args.shift
|
138
|
+
version = args.shift
|
139
|
+
data = args.shift || {}
|
140
|
+
|
141
|
+
# this is gross but I'm not sure I actually care right now.
|
142
|
+
if (modname.is_a? String and [String, NilClass].include? version.class and data.is_a? Hash)
|
143
|
+
data[:boxen] = :boxen
|
144
|
+
data[:version] = version
|
145
|
+
add_module(modname, data)
|
146
|
+
else
|
147
|
+
$logger.warn "#{@repo[:repo_name]}/Puppetfile: malformed boxen"
|
148
|
+
end
|
149
|
+
else
|
150
|
+
# Should we record unexpected Ruby code or just log it to stdout?
|
151
|
+
args = args.map {|a| a.is_a?(String) ? "'#{a}'" : a}.join(', ')
|
152
|
+
$logger.warn "#{@repo[:repo_name]}/Puppetfile: Unexpected invocation of #{name}(#{args})"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
node.children.each do |n|
|
157
|
+
next unless n.is_a? RubyVM::AbstractSyntaxTree::Node
|
158
|
+
|
159
|
+
traverse(n)
|
160
|
+
end
|
161
|
+
rescue => e
|
162
|
+
puts e.message
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def test()
|
167
|
+
require 'pry'
|
168
|
+
binding.pry
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
data/lib/mvp/runner.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
-
require 'mvp/
|
2
|
-
require 'mvp/
|
1
|
+
require 'mvp/forge'
|
2
|
+
require 'mvp/bigquery'
|
3
3
|
require 'mvp/stats'
|
4
|
+
require 'mvp/itemizer'
|
5
|
+
require 'mvp/puppetfile_parser'
|
6
|
+
|
7
|
+
require 'tty-spinner'
|
4
8
|
|
5
9
|
class Mvp
|
6
10
|
class Runner
|
@@ -11,40 +15,94 @@ class Mvp
|
|
11
15
|
end
|
12
16
|
|
13
17
|
def retrieve(target = :all, download = true)
|
14
|
-
|
18
|
+
bigquery = Mvp::Bigquery.new(@options)
|
15
19
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
+
begin
|
21
|
+
[:authors, :modules, :releases, :validations].each do |thing|
|
22
|
+
next unless [:all, thing].include? target
|
23
|
+
spinner = mkspinner("Retrieving #{thing} ...")
|
24
|
+
data = bigquery.retrieve(thing)
|
25
|
+
save_json(thing, data)
|
26
|
+
spinner.success('(OK)')
|
27
|
+
end
|
20
28
|
|
21
|
-
|
22
|
-
|
29
|
+
rescue => e
|
30
|
+
spinner.error("API error: #{e.message}")
|
31
|
+
$logger.error "API error: #{e.message}"
|
32
|
+
$logger.debug e.backtrace.join("\n")
|
33
|
+
sleep 10
|
23
34
|
end
|
24
35
|
end
|
25
36
|
|
26
|
-
def
|
27
|
-
|
37
|
+
def mirror(target = :all)
|
38
|
+
forge = Mvp::Forge.new(@options)
|
39
|
+
bigquery = Mvp::Bigquery.new(@options)
|
40
|
+
itemizer = Mvp::Itemizer.new(@options)
|
41
|
+
pfparser = Mvp::PuppetfileParser.new(@options)
|
28
42
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
43
|
+
begin
|
44
|
+
[:authors, :modules, :releases].each do |thing|
|
45
|
+
next unless [:all, thing].include? target
|
46
|
+
spinner = mkspinner("Mirroring #{thing}...")
|
47
|
+
bigquery.truncate(thing)
|
48
|
+
forge.retrieve(thing) do |data, offset|
|
49
|
+
spinner.update(title: "Mirroring #{thing} [#{offset}]...")
|
50
|
+
bigquery.insert(thing, data)
|
51
|
+
end
|
52
|
+
spinner.success('(OK)')
|
53
|
+
end
|
34
54
|
|
35
|
-
|
36
|
-
|
37
|
-
|
55
|
+
if [:all, :validations].include? target
|
56
|
+
spinner = mkspinner("Mirroring validations...")
|
57
|
+
modules = bigquery.get(:modules, [:slug])
|
58
|
+
bigquery.truncate(:validations)
|
59
|
+
forge.retrieve_validations(modules) do |data, offset|
|
60
|
+
spinner.update(title: "Mirroring validations [#{offset}]...")
|
61
|
+
bigquery.insert(:validations, data)
|
62
|
+
end
|
63
|
+
spinner.success('(OK)')
|
64
|
+
end
|
38
65
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
66
|
+
if [:all, :itemizations].include? target
|
67
|
+
spinner = mkspinner("Itemizing modules...")
|
68
|
+
bigquery.unitemized.each do |mod|
|
69
|
+
spinner.update(title: "Itemizing [#{mod[:slug]}]...")
|
70
|
+
rows = itemizer.itemized(mod)
|
71
|
+
bigquery.delete(:itemized, :module, mod[:slug])
|
72
|
+
bigquery.insert(:itemized, rows)
|
73
|
+
end
|
74
|
+
spinner.success('(OK)')
|
75
|
+
end
|
76
|
+
|
77
|
+
if [:all, :mirrors, :tables].include? target
|
78
|
+
@options[:gcloud][:mirrors].each do |entity|
|
79
|
+
spinner = mkspinner("Mirroring #{entity[:type]} #{entity[:name]} to BigQuery...")
|
80
|
+
bigquery.mirror_table(entity)
|
81
|
+
spinner.success('(OK)')
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
if [:all, :puppetfiles].include? target
|
86
|
+
spinner = mkspinner("Analyzing Puppetfile module references...")
|
87
|
+
if pfparser.suitable?
|
88
|
+
pfparser.sources = bigquery.module_sources
|
89
|
+
bigquery.puppetfiles.each do |repo|
|
90
|
+
spinner.update(title: "Analyzing [#{repo[:repo_name]}/Puppetfile]...")
|
91
|
+
rows = pfparser.parse(repo)
|
92
|
+
bigquery.delete(:puppetfile_usage, :repo_name, repo[:repo_name], :github)
|
93
|
+
bigquery.insert(:puppetfile_usage, rows, :github)
|
94
|
+
end
|
95
|
+
spinner.success('(OK)')
|
96
|
+
else
|
97
|
+
spinner.error("(Not functional on Ruby #{RUBY_VERSION})")
|
98
|
+
end
|
99
|
+
end
|
45
100
|
|
46
|
-
|
47
|
-
|
101
|
+
rescue => e
|
102
|
+
spinner.error("API error: #{e.message}")
|
103
|
+
$logger.error "API error: #{e.message}"
|
104
|
+
$logger.debug e.backtrace.join("\n")
|
105
|
+
sleep 10
|
48
106
|
end
|
49
107
|
end
|
50
108
|
|
@@ -57,6 +115,17 @@ class Mvp
|
|
57
115
|
end
|
58
116
|
end
|
59
117
|
|
118
|
+
def mkspinner(title)
|
119
|
+
spinner = TTY::Spinner.new("[:spinner] :title")
|
120
|
+
spinner.update(title: title)
|
121
|
+
spinner.auto_spin
|
122
|
+
spinner
|
123
|
+
end
|
124
|
+
|
125
|
+
def save_json(thing, data)
|
126
|
+
File.write("#{@cachedir}/#{thing}.json", data.to_json)
|
127
|
+
end
|
128
|
+
|
60
129
|
def test()
|
61
130
|
require 'pry'
|
62
131
|
binding.pry
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: puppet-community-mvp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Ford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -151,13 +151,14 @@ files:
|
|
151
151
|
- LICENSE
|
152
152
|
- README.md
|
153
153
|
- bin/mvp
|
154
|
+
- bin/pftest.rb
|
154
155
|
- lib/mvp.rb
|
155
|
-
- lib/mvp/
|
156
|
+
- lib/mvp/bigquery.rb
|
157
|
+
- lib/mvp/forge.rb
|
156
158
|
- lib/mvp/itemizer.rb
|
157
|
-
- lib/mvp/
|
159
|
+
- lib/mvp/puppetfile_parser.rb
|
158
160
|
- lib/mvp/runner.rb
|
159
161
|
- lib/mvp/stats.rb
|
160
|
-
- lib/mvp/uploader.rb
|
161
162
|
homepage:
|
162
163
|
licenses:
|
163
164
|
- Apache 2
|