puppet-community-mvp 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +78 -0
- data/bin/mvp +22 -1
- data/lib/mvp/forge.rb +2 -2
- data/lib/mvp/itemizer.rb +37 -2
- data/lib/mvp/runner.rb +27 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dd83202b003a900b8744b0fc8da5bb14b6024ca37f1419c841d68afaa4b487dd
|
4
|
+
data.tar.gz: c69cfa9c035b30136593d10dbad96d588fd0fe8dd870ba0763068f0c756af5cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5370badaaa4208281fa6e864a398482cd6403aeacaa8ef1ec7ac661d39287cff944a483c3f16ed352feb27b3c230573d627ebf75c43d5be4d496313553706f11
|
7
|
+
data.tar.gz: 1418192f0b6adc010b7982b34c2b8f1654b6a3fa6fbd88533a2bf766aea2a4ad3ffb63ba6d3d3481c67c1cf74fa0ec9796dc520474558750bda62fd32d05d24a
|
data/README.md
CHANGED
@@ -0,0 +1,78 @@
|
|
1
|
+
# Puppet Community MVP tool
|
2
|
+
|
3
|
+
This is a simple tool to generate stats about the Puppet community. It was
|
4
|
+
originally intended to show the "most valuable players" but has since morphed to
|
5
|
+
show a lot of other things too. We primarily use it on a weekly cron job to
|
6
|
+
gather information using the Forge APIs and normalizing them so that they can be
|
7
|
+
easily combined with simple SQL queries to generate usage information.
|
8
|
+
|
9
|
+
## Interactive usage
|
10
|
+
|
11
|
+
If you're not working on our community stats pipeline, then there are only three
|
12
|
+
subcommands you'll be interested in.
|
13
|
+
|
14
|
+
### `stats`
|
15
|
+
|
16
|
+
This subcommand will use cached data to generate a report of Forge community
|
17
|
+
statistics. For example, it will generate distributions of module quality
|
18
|
+
scores, or releases per module, or modules per author, etc. And it will generate
|
19
|
+
sparklines showing the contributions over time of the most prolific Forge
|
20
|
+
authors and it will show authors who aren't as active as they used to be.
|
21
|
+
|
22
|
+
Unfortunately, this report is not customizable or templatable at this point.
|
23
|
+
|
24
|
+
You will need cached data before you can generate this report. See the `get` subcommand.
|
25
|
+
|
26
|
+
|
27
|
+
### `get`
|
28
|
+
|
29
|
+
This subcommand will download and cache a local mirror of the data stored in our
|
30
|
+
BigQuery database. This data is used for the `stats` command.
|
31
|
+
|
32
|
+
|
33
|
+
### `analyze`
|
34
|
+
|
35
|
+
This subcommand is maybe the most interesting. Many interesting bits of
|
36
|
+
information can be gathered by inspecting the source code of modules, not by
|
37
|
+
running SQL queries about their statistics. For example, `find manifests/ -name
|
38
|
+
'*.pp' | wc -l` will tell you how many manifests any given module includes, and
|
39
|
+
`grep -rn '--no-external-facts' facts.d/` will tell you how many external facts
|
40
|
+
are invoking `facter` to gather and use _other_ facts while running.
|
41
|
+
|
42
|
+
This command lets you write that little bit of analysis code as a script, and
|
43
|
+
then systematically run that script against the current release of every single
|
44
|
+
module on the Forge and collate the generated output.
|
45
|
+
|
46
|
+
A script can be written in any language and will be executed from the root of
|
47
|
+
the unpacked module. It will be invoked with an environment containing the following
|
48
|
+
variables:
|
49
|
+
|
50
|
+
* `mvp_owner` -- the Forge namespace of the module, aka the author's username
|
51
|
+
* `mvp_name` -- the name of the module itself
|
52
|
+
* `mvp_version` -- the current version of the module
|
53
|
+
* `mvp_downloads` -- the number of downloads this module has. A *rough* estimation of popularity
|
54
|
+
|
55
|
+
The script should print an array of arrays in JSON format to STDOUT. These will be
|
56
|
+
combined to make a CSV file, the columns of which are defined by the data you
|
57
|
+
return. In other words, the items in the inner array(s) are totally up to you.
|
58
|
+
They will become the columns of the generated CSV file.
|
59
|
+
|
60
|
+
The parameters relevant to this subcommand are:
|
61
|
+
|
62
|
+
```
|
63
|
+
-o, --output_file OUTPUT_FILE The path to save a csv report.
|
64
|
+
--script SCRIPT The script file to analyze a module. See docs for interface.
|
65
|
+
--count N For debugging. Select a random list of this many modules to analyze.
|
66
|
+
-d, --debug Display extra debugging information.
|
67
|
+
```
|
68
|
+
|
69
|
+
See files in the `scripts/` directory for examples of analysis scripts. To use,
|
70
|
+
just path of a script, like
|
71
|
+
|
72
|
+
```
|
73
|
+
$ mvp analyze --script scripts/manifest_count.rb --count 5
|
74
|
+
[✔] stdlib (OK)
|
75
|
+
$ cat analyzed.csv
|
76
|
+
...
|
77
|
+
```
|
78
|
+
|
data/bin/mvp
CHANGED
@@ -24,6 +24,10 @@ or download and itemize each Forge module.
|
|
24
24
|
* Optional targets: all, authors, modules, releases
|
25
25
|
* stats
|
26
26
|
* Print out a summary of interesting stats.
|
27
|
+
* analyze <script file>
|
28
|
+
* Run a specified script to analyze each module to generate arbitrary stats
|
29
|
+
* Writes output to a csv file, analyzed.csv by default
|
30
|
+
|
27
31
|
"
|
28
32
|
|
29
33
|
opts.on("-f FORGEAPI", "--forgeapi FORGEAPI", "Forge API server. Rarely needed.") do |arg|
|
@@ -58,6 +62,14 @@ or download and itemize each Forge module.
|
|
58
62
|
options[:output_file] = arg
|
59
63
|
end
|
60
64
|
|
65
|
+
opts.on("--script SCRIPT", "The script file to analyze a module. See docs for interface.") do |arg|
|
66
|
+
options[:script] = arg
|
67
|
+
end
|
68
|
+
|
69
|
+
opts.on("--count N", "For debugging. Select a random list of this many modules to analyze.") do |arg|
|
70
|
+
options[:count] = arg.to_i
|
71
|
+
end
|
72
|
+
|
61
73
|
opts.on("-d", "--debug", "Display extra debugging information.") do
|
62
74
|
options[:debug] = true
|
63
75
|
end
|
@@ -85,18 +97,24 @@ options[:gcloud][:dataset] ||= 'community'
|
|
85
97
|
options[:gcloud][:project] ||= 'puppet'
|
86
98
|
options[:gcloud][:keyfile] ||= '~/.mvp/credentials.json'
|
87
99
|
|
100
|
+
options[:script] = File.expand_path(options[:script]) if options[:script]
|
88
101
|
options[:cachedir] = File.expand_path(options[:cachedir])
|
89
102
|
options[:github_data] = File.expand_path(options[:github_data])
|
90
103
|
options[:gcloud][:keyfile] = File.expand_path(options[:gcloud][:keyfile])
|
91
104
|
FileUtils.mkdir_p(options[:cachedir])
|
92
105
|
|
106
|
+
command, target = ARGV
|
107
|
+
case command
|
108
|
+
when 'analyze'
|
109
|
+
options[:output_file] ||= 'analyzed.csv'
|
110
|
+
end
|
111
|
+
|
93
112
|
$logger = Logger::new(STDOUT)
|
94
113
|
$logger.level = options[:debug] ? Logger::DEBUG : Logger::INFO
|
95
114
|
$logger.formatter = proc { |severity,datetime,progname,msg| "#{severity}: #{msg}\n" }
|
96
115
|
|
97
116
|
runner = Mvp::Runner.new(options)
|
98
117
|
|
99
|
-
command, target = ARGV
|
100
118
|
case command
|
101
119
|
when 'get', 'retrieve', 'download'
|
102
120
|
target ||= :all
|
@@ -110,6 +128,9 @@ when 'stats'
|
|
110
128
|
target ||= :all
|
111
129
|
runner.stats(target.to_sym)
|
112
130
|
|
131
|
+
when 'analyze'
|
132
|
+
runner.analyze
|
133
|
+
|
113
134
|
when 'test'
|
114
135
|
runner.test
|
115
136
|
|
data/lib/mvp/forge.rb
CHANGED
@@ -128,8 +128,8 @@ class Mvp
|
|
128
128
|
|
129
129
|
simplify_metadata(row, row['metadata'])
|
130
130
|
|
131
|
-
# These items are just too big to store in the table
|
132
|
-
['module', 'changelog', 'readme', 'reference'].each do |column|
|
131
|
+
# These items are just too big to store in the table, and the malware scan isn't done yet
|
132
|
+
['module', 'changelog', 'readme', 'reference', 'malware_scan'].each do |column|
|
133
133
|
row.delete(column)
|
134
134
|
end
|
135
135
|
end
|
data/lib/mvp/itemizer.rb
CHANGED
@@ -12,7 +12,7 @@ class Mvp
|
|
12
12
|
|
13
13
|
def run!(data, uploader)
|
14
14
|
data.each do |mod|
|
15
|
-
modname = mod['
|
15
|
+
modname = mod['name']
|
16
16
|
version = mod['version']
|
17
17
|
return if uploader.version_itemized?(modname, version)
|
18
18
|
|
@@ -41,7 +41,9 @@ class Mvp
|
|
41
41
|
File.open(filename, "w") do |file|
|
42
42
|
file << HTTParty.get( "#{@forge}/v3/files/#{filename}" )
|
43
43
|
end
|
44
|
-
|
44
|
+
# Why is tar terrible?
|
45
|
+
FileUtils.mkdir("#{modname}-#{version}")
|
46
|
+
system("tar -xf #{filename} -C #{modname}-#{version} --strip-components=1")
|
45
47
|
FileUtils.rm(filename)
|
46
48
|
end
|
47
49
|
end
|
@@ -63,6 +65,39 @@ class Mvp
|
|
63
65
|
end
|
64
66
|
end
|
65
67
|
|
68
|
+
def analyze(mod, script, debug)
|
69
|
+
require 'open3'
|
70
|
+
require 'json'
|
71
|
+
|
72
|
+
# sanitize an environment
|
73
|
+
env = {'mvp_script' => script}
|
74
|
+
mod.each do |key, value|
|
75
|
+
env["mvp_#{key}"] = value.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
downloads = mod[:downloads]
|
79
|
+
Dir.mktmpdir('mvp') do |path|
|
80
|
+
download(path, "#{mod[:owner]}-#{mod[:name]}", mod[:version])
|
81
|
+
|
82
|
+
rows = []
|
83
|
+
Dir.chdir("#{path}/#{mod[:owner]}-#{mod[:name]}-#{mod[:version]}") do
|
84
|
+
if debug
|
85
|
+
exit(1) unless system(env, ENV['SHELL'])
|
86
|
+
end
|
87
|
+
|
88
|
+
stdout, stderr, status = Open3.capture3(env, script)
|
89
|
+
|
90
|
+
if status.success?
|
91
|
+
rows = JSON.parse(stdout)
|
92
|
+
else
|
93
|
+
$logger.error stderr
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
return rows unless rows.empty?
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
66
101
|
# Build a table with this schema
|
67
102
|
# module | version | source | kind | element | count
|
68
103
|
def table(itemized, data)
|
data/lib/mvp/runner.rb
CHANGED
@@ -106,6 +106,33 @@ class Mvp
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
|
109
|
+
def analyze
|
110
|
+
bigquery = Mvp::Bigquery.new(@options)
|
111
|
+
itemizer = Mvp::Itemizer.new(@options)
|
112
|
+
|
113
|
+
begin
|
114
|
+
spinner = mkspinner("Analyzing modules...")
|
115
|
+
modules = bigquery.get(:modules, [:owner, :name, :version, :downloads])
|
116
|
+
modules = modules.sample(@options[:count]) if @options[:count]
|
117
|
+
|
118
|
+
require 'csv'
|
119
|
+
csv_string = CSV.generate do |csv|
|
120
|
+
modules.each do |mod|
|
121
|
+
spinner.stop if @options[:debug]
|
122
|
+
rows = itemizer.analyze(mod, @options[:script], @options[:debug])
|
123
|
+
spinner.start if @options[:debug]
|
124
|
+
|
125
|
+
next unless rows
|
126
|
+
spinner.update(title: mod[:name])
|
127
|
+
rows.each {|row| csv << row}
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
File.write(@options[:output_file], csv_string)
|
132
|
+
spinner.success('(OK)')
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
109
136
|
def stats(target)
|
110
137
|
stats = Mvp::Stats.new(@options)
|
111
138
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: puppet-community-mvp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Ford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
178
|
- !ruby/object:Gem::Version
|
179
179
|
version: '0'
|
180
180
|
requirements: []
|
181
|
-
rubygems_version: 3.0.
|
181
|
+
rubygems_version: 3.0.3
|
182
182
|
signing_key:
|
183
183
|
specification_version: 4
|
184
184
|
summary: Generate some stats about the Puppet Community.
|