puppet-community-mvp 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +202 -0
- data/README.md +0 -0
- data/bin/mvp +121 -0
- data/lib/mvp.rb +4 -0
- data/lib/mvp/downloader.rb +199 -0
- data/lib/mvp/monkeypatches.rb +8 -0
- data/lib/mvp/runner.rb +54 -0
- data/lib/mvp/stats.rb +339 -0
- data/lib/mvp/uploader.rb +100 -0
- metadata +170 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8ed5308091443f5847159a6a481611fba281f4d5
|
4
|
+
data.tar.gz: f7bb0dd50ea248c04b5809144d67355ad7c6c202
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ececdc2a2121c4054fc49b16385892e78364b9ed197b2ac3e38a5542de2f5be94cf52ac9e6d6e1590c7e91b912fa54f4e9a70e71e60ae831fe545b98731021ee
|
7
|
+
data.tar.gz: 5f87defac101d2105403c0b5b54d34c37cac1b552fc77a2008d7d62624df41c8d1d4813be9c2cf15ded62942d6e0cbc70f36862d39866caa2bd77d6d9528aba0
|
data/LICENSE
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
|
2
|
+
Apache License
|
3
|
+
Version 2.0, January 2004
|
4
|
+
http://www.apache.org/licenses/
|
5
|
+
|
6
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
7
|
+
|
8
|
+
1. Definitions.
|
9
|
+
|
10
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
11
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
12
|
+
|
13
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
14
|
+
the copyright owner that is granting the License.
|
15
|
+
|
16
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
17
|
+
other entities that control, are controlled by, or are under common
|
18
|
+
control with that entity. For the purposes of this definition,
|
19
|
+
"control" means (i) the power, direct or indirect, to cause the
|
20
|
+
direction or management of such entity, whether by contract or
|
21
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
23
|
+
|
24
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
25
|
+
exercising permissions granted by this License.
|
26
|
+
|
27
|
+
"Source" form shall mean the preferred form for making modifications,
|
28
|
+
including but not limited to software source code, documentation
|
29
|
+
source, and configuration files.
|
30
|
+
|
31
|
+
"Object" form shall mean any form resulting from mechanical
|
32
|
+
transformation or translation of a Source form, including but
|
33
|
+
not limited to compiled object code, generated documentation,
|
34
|
+
and conversions to other media types.
|
35
|
+
|
36
|
+
"Work" shall mean the work of authorship, whether in Source or
|
37
|
+
Object form, made available under the License, as indicated by a
|
38
|
+
copyright notice that is included in or attached to the work
|
39
|
+
(an example is provided in the Appendix below).
|
40
|
+
|
41
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
42
|
+
form, that is based on (or derived from) the Work and for which the
|
43
|
+
editorial revisions, annotations, elaborations, or other modifications
|
44
|
+
represent, as a whole, an original work of authorship. For the purposes
|
45
|
+
of this License, Derivative Works shall not include works that remain
|
46
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
47
|
+
the Work and Derivative Works thereof.
|
48
|
+
|
49
|
+
"Contribution" shall mean any work of authorship, including
|
50
|
+
the original version of the Work and any modifications or additions
|
51
|
+
to that Work or Derivative Works thereof, that is intentionally
|
52
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
53
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
54
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
55
|
+
means any form of electronic, verbal, or written communication sent
|
56
|
+
to the Licensor or its representatives, including but not limited to
|
57
|
+
communication on electronic mailing lists, source code control systems,
|
58
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
59
|
+
Licensor for the purpose of discussing and improving the Work, but
|
60
|
+
excluding communication that is conspicuously marked or otherwise
|
61
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
62
|
+
|
63
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64
|
+
on behalf of whom a Contribution has been received by Licensor and
|
65
|
+
subsequently incorporated within the Work.
|
66
|
+
|
67
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
68
|
+
this License, each Contributor hereby grants to You a perpetual,
|
69
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70
|
+
copyright license to reproduce, prepare Derivative Works of,
|
71
|
+
publicly display, publicly perform, sublicense, and distribute the
|
72
|
+
Work and such Derivative Works in Source or Object form.
|
73
|
+
|
74
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
75
|
+
this License, each Contributor hereby grants to You a perpetual,
|
76
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77
|
+
(except as stated in this section) patent license to make, have made,
|
78
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79
|
+
where such license applies only to those patent claims licensable
|
80
|
+
by such Contributor that are necessarily infringed by their
|
81
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
82
|
+
with the Work to which such Contribution(s) was submitted. If You
|
83
|
+
institute patent litigation against any entity (including a
|
84
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85
|
+
or a Contribution incorporated within the Work constitutes direct
|
86
|
+
or contributory patent infringement, then any patent licenses
|
87
|
+
granted to You under this License for that Work shall terminate
|
88
|
+
as of the date such litigation is filed.
|
89
|
+
|
90
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
91
|
+
Work or Derivative Works thereof in any medium, with or without
|
92
|
+
modifications, and in Source or Object form, provided that You
|
93
|
+
meet the following conditions:
|
94
|
+
|
95
|
+
(a) You must give any other recipients of the Work or
|
96
|
+
Derivative Works a copy of this License; and
|
97
|
+
|
98
|
+
(b) You must cause any modified files to carry prominent notices
|
99
|
+
stating that You changed the files; and
|
100
|
+
|
101
|
+
(c) You must retain, in the Source form of any Derivative Works
|
102
|
+
that You distribute, all copyright, patent, trademark, and
|
103
|
+
attribution notices from the Source form of the Work,
|
104
|
+
excluding those notices that do not pertain to any part of
|
105
|
+
the Derivative Works; and
|
106
|
+
|
107
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
108
|
+
distribution, then any Derivative Works that You distribute must
|
109
|
+
include a readable copy of the attribution notices contained
|
110
|
+
within such NOTICE file, excluding those notices that do not
|
111
|
+
pertain to any part of the Derivative Works, in at least one
|
112
|
+
of the following places: within a NOTICE text file distributed
|
113
|
+
as part of the Derivative Works; within the Source form or
|
114
|
+
documentation, if provided along with the Derivative Works; or,
|
115
|
+
within a display generated by the Derivative Works, if and
|
116
|
+
wherever such third-party notices normally appear. The contents
|
117
|
+
of the NOTICE file are for informational purposes only and
|
118
|
+
do not modify the License. You may add Your own attribution
|
119
|
+
notices within Derivative Works that You distribute, alongside
|
120
|
+
or as an addendum to the NOTICE text from the Work, provided
|
121
|
+
that such additional attribution notices cannot be construed
|
122
|
+
as modifying the License.
|
123
|
+
|
124
|
+
You may add Your own copyright statement to Your modifications and
|
125
|
+
may provide additional or different license terms and conditions
|
126
|
+
for use, reproduction, or distribution of Your modifications, or
|
127
|
+
for any such Derivative Works as a whole, provided Your use,
|
128
|
+
reproduction, and distribution of the Work otherwise complies with
|
129
|
+
the conditions stated in this License.
|
130
|
+
|
131
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132
|
+
any Contribution intentionally submitted for inclusion in the Work
|
133
|
+
by You to the Licensor shall be under the terms and conditions of
|
134
|
+
this License, without any additional terms or conditions.
|
135
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
136
|
+
the terms of any separate license agreement you may have executed
|
137
|
+
with Licensor regarding such Contributions.
|
138
|
+
|
139
|
+
6. Trademarks. This License does not grant permission to use the trade
|
140
|
+
names, trademarks, service marks, or product names of the Licensor,
|
141
|
+
except as required for reasonable and customary use in describing the
|
142
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
143
|
+
|
144
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
145
|
+
agreed to in writing, Licensor provides the Work (and each
|
146
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148
|
+
implied, including, without limitation, any warranties or conditions
|
149
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151
|
+
appropriateness of using or redistributing the Work and assume any
|
152
|
+
risks associated with Your exercise of permissions under this License.
|
153
|
+
|
154
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
155
|
+
whether in tort (including negligence), contract, or otherwise,
|
156
|
+
unless required by applicable law (such as deliberate and grossly
|
157
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
158
|
+
liable to You for damages, including any direct, indirect, special,
|
159
|
+
incidental, or consequential damages of any character arising as a
|
160
|
+
result of this License or out of the use or inability to use the
|
161
|
+
Work (including but not limited to damages for loss of goodwill,
|
162
|
+
work stoppage, computer failure or malfunction, or any and all
|
163
|
+
other commercial damages or losses), even if such Contributor
|
164
|
+
has been advised of the possibility of such damages.
|
165
|
+
|
166
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
167
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
168
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
169
|
+
or other liability obligations and/or rights consistent with this
|
170
|
+
License. However, in accepting such obligations, You may act only
|
171
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
172
|
+
of any other Contributor, and only if You agree to indemnify,
|
173
|
+
defend, and hold each Contributor harmless for any liability
|
174
|
+
incurred by, or claims asserted against, such Contributor by reason
|
175
|
+
of your accepting any such warranty or additional liability.
|
176
|
+
|
177
|
+
END OF TERMS AND CONDITIONS
|
178
|
+
|
179
|
+
APPENDIX: How to apply the Apache License to your work.
|
180
|
+
|
181
|
+
To apply the Apache License to your work, attach the following
|
182
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
183
|
+
replaced with your own identifying information. (Don't include
|
184
|
+
the brackets!) The text should be enclosed in the appropriate
|
185
|
+
comment syntax for the file format. We also recommend that a
|
186
|
+
file or class name and description of purpose be included on the
|
187
|
+
same "printed page" as the copyright notice for easier
|
188
|
+
identification within third-party archives.
|
189
|
+
|
190
|
+
Copyright [yyyy] [name of copyright owner]
|
191
|
+
|
192
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
193
|
+
you may not use this file except in compliance with the License.
|
194
|
+
You may obtain a copy of the License at
|
195
|
+
|
196
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
197
|
+
|
198
|
+
Unless required by applicable law or agreed to in writing, software
|
199
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
200
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201
|
+
See the License for the specific language governing permissions and
|
202
|
+
limitations under the License.
|
data/README.md
ADDED
File without changes
|
data/bin/mvp
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'optparse'
|
5
|
+
require 'yaml'
|
6
|
+
require 'fileutils'
|
7
|
+
require 'logger'
|
8
|
+
require 'mvp'
|
9
|
+
|
10
|
+
NAME = File.basename($PROGRAM_NAME)
|
11
|
+
options = {:config => File.expand_path('~/.mvp.config.yaml')}
|
12
|
+
optparse = OptionParser.new { |opts|
|
13
|
+
opts.banner = "Usage : #{NAME} [command] [target] [options]
|
14
|
+
|
15
|
+
This tool will scrape the Puppet Forge API for interesting module & author stats.
|
16
|
+
The following CLI commands are available.
|
17
|
+
|
18
|
+
* get | retrieve | download [target]
|
19
|
+
* Downloads and caches all Forge metadata.
|
20
|
+
* Optional targets: all, authors, modules, releases
|
21
|
+
* upload | insert [target]
|
22
|
+
* Uploads data to BigQuery
|
23
|
+
* Optional targets: all, authors, modules, releases, mirrors
|
24
|
+
* stats
|
25
|
+
* Print out a summary of interesting stats.
|
26
|
+
"
|
27
|
+
|
28
|
+
opts.on("-f FORGEAPI", "--forgeapi FORGEAPI", "Forge API server. Rarely needed.") do |arg|
|
29
|
+
options[:forgeapi] = arg
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on("-c config", "--config CONFIG", "Location of config.yaml.") do |arg|
|
33
|
+
options[:config] = File.expand_path(arg)
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on("-C CACHEDIR", "--cachedir CACHEDIR", "Where data should be cached.") do |arg|
|
37
|
+
options[:cachedir] = arg
|
38
|
+
end
|
39
|
+
|
40
|
+
opts.on("-g GITHUB_DATA", "--github_data GITHUB_DATA", "The path to a csv file containing GitHub repos & stars.") do |arg|
|
41
|
+
options[:github_data] = arg
|
42
|
+
end
|
43
|
+
|
44
|
+
opts.on("--project PROJECT", "The gcloud project to use.") do |arg|
|
45
|
+
options[:gcloud][:project] = arg
|
46
|
+
end
|
47
|
+
|
48
|
+
opts.on("--dataset DATASET", "The gcloud dataset to use.") do |arg|
|
49
|
+
options[:gcloud][:dataset] = arg
|
50
|
+
end
|
51
|
+
|
52
|
+
opts.on("--keyfile KEYFILE", "The gcloud keyfile to use.") do |arg|
|
53
|
+
options[:gcloud][:keyfile] = arg
|
54
|
+
end
|
55
|
+
|
56
|
+
opts.on("-o OUTPUT_FILE", "--output_file OUTPUT_FILE", "The path to save a csv report.") do |arg|
|
57
|
+
options[:output_file] = arg
|
58
|
+
end
|
59
|
+
|
60
|
+
opts.on("-d", "--debug", "Display extra debugging information.") do
|
61
|
+
options[:debug] = true
|
62
|
+
end
|
63
|
+
|
64
|
+
opts.separator('')
|
65
|
+
|
66
|
+
opts.on("-h", "--help", "Displays this help") do
|
67
|
+
puts opts
|
68
|
+
exit
|
69
|
+
end
|
70
|
+
}
|
71
|
+
optparse.parse!
|
72
|
+
|
73
|
+
options = (YAML.load_file(options[:config]) rescue {}).merge(options)
|
74
|
+
|
75
|
+
options[:cachedir] ||= '~/.mvp/cache'
|
76
|
+
options[:forgeapi] ||= 'https://forgeapi.puppet.com'
|
77
|
+
options[:gcloud] ||= {}
|
78
|
+
options[:gcloud][:dataset] ||= 'community'
|
79
|
+
options[:gcloud][:project] ||= 'puppet'
|
80
|
+
options[:gcloud][:keyfile] ||= '~/.mvp/credentials.json'
|
81
|
+
|
82
|
+
options[:cachedir] = File.expand_path(options[:cachedir])
|
83
|
+
options[:gcloud][:keyfile] = File.expand_path(options[:gcloud][:keyfile])
|
84
|
+
FileUtils.mkdir_p(options[:cachedir])
|
85
|
+
|
86
|
+
$logger = Logger::new(STDOUT)
|
87
|
+
$logger.level = options[:debug] ? Logger::DEBUG : Logger::INFO
|
88
|
+
$logger.formatter = proc { |severity,datetime,progname,msg| "#{severity}: #{msg}\n" }
|
89
|
+
|
90
|
+
runner = Mvp::Runner.new(options)
|
91
|
+
|
92
|
+
command, target = ARGV
|
93
|
+
case command
|
94
|
+
when 'get', 'retrieve', 'download'
|
95
|
+
target ||= :all
|
96
|
+
runner.retrieve(target.to_sym)
|
97
|
+
|
98
|
+
when 'transform'
|
99
|
+
target ||= :all
|
100
|
+
runner.retrieve(target.to_sym, false)
|
101
|
+
|
102
|
+
when 'insert', 'upload'
|
103
|
+
target ||= :all
|
104
|
+
runner.upload(target.to_sym)
|
105
|
+
|
106
|
+
when 'mirror'
|
107
|
+
target ||= :all
|
108
|
+
runner.mirror(target.to_sym)
|
109
|
+
|
110
|
+
when 'stats'
|
111
|
+
target ||= :all
|
112
|
+
runner.stats(target.to_sym)
|
113
|
+
|
114
|
+
when 'test'
|
115
|
+
runner.test
|
116
|
+
|
117
|
+
else
|
118
|
+
puts "Unknown command: #{command}"
|
119
|
+
puts "Run #{NAME} -h for usage."
|
120
|
+
exit 1
|
121
|
+
end
|
data/lib/mvp.rb
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'httparty'
|
3
|
+
require 'tty-spinner'
|
4
|
+
require 'semantic_puppet'
|
5
|
+
require 'mvp/monkeypatches'
|
6
|
+
|
7
|
+
class Mvp
|
8
|
+
class Downloader
|
9
|
+
def initialize(options = {})
|
10
|
+
@cachedir = options[:cachedir]
|
11
|
+
@forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
|
12
|
+
end
|
13
|
+
|
14
|
+
def retrieve(entity, download = true)
|
15
|
+
if download
|
16
|
+
# I am focusing on authorship rather than just users, so for now I'm using the word authors
|
17
|
+
item = (entity == :authors) ? 'users' : entity.to_s
|
18
|
+
data = download(item)
|
19
|
+
save_json(entity, data)
|
20
|
+
else
|
21
|
+
data = File.read("#{@cachedir}/#{entity}.json")
|
22
|
+
end
|
23
|
+
|
24
|
+
case entity
|
25
|
+
when :modules
|
26
|
+
data = flatten_modules(data)
|
27
|
+
when :releases
|
28
|
+
data = flatten_releases(data)
|
29
|
+
end
|
30
|
+
save_nld_json(entity.to_s, data)
|
31
|
+
end
|
32
|
+
|
33
|
+
def validations()
|
34
|
+
results = {}
|
35
|
+
cache = "#{@cachedir}/modules.json"
|
36
|
+
|
37
|
+
if File.exist? cache
|
38
|
+
module_data = JSON.parse(File.read(cache))
|
39
|
+
else
|
40
|
+
module_data = retrieve(:modules)
|
41
|
+
end
|
42
|
+
|
43
|
+
begin
|
44
|
+
offset = 0
|
45
|
+
endpoint = "/private/validations/"
|
46
|
+
spinner = TTY::Spinner.new("[:spinner] :title")
|
47
|
+
spinner.update(title: "Downloading module validations ...")
|
48
|
+
spinner.auto_spin
|
49
|
+
|
50
|
+
module_data.each do |mod|
|
51
|
+
name = "#{mod['owner']['username']}-#{mod['name']}"
|
52
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {"User-Agent" => "Puppet Community Stats Monitor"})
|
53
|
+
raise "Forge Error: #{@response.body}" unless response.code == 200
|
54
|
+
|
55
|
+
data = JSON.parse(response.body)
|
56
|
+
offset += 1
|
57
|
+
results[name] = data
|
58
|
+
|
59
|
+
spinner.update(title: "Downloading module validations [#{offset}]...") if (offset % 25 == 0)
|
60
|
+
end
|
61
|
+
|
62
|
+
spinner.success('(OK)')
|
63
|
+
rescue => e
|
64
|
+
spinner.error('API error')
|
65
|
+
$logger.error e.message
|
66
|
+
$logger.debug e.backtrace.join("\n")
|
67
|
+
end
|
68
|
+
|
69
|
+
save_json('validations', results)
|
70
|
+
save_nld_json('validations', flatten_validations(results))
|
71
|
+
results
|
72
|
+
end
|
73
|
+
|
74
|
+
def download(entity)
|
75
|
+
results = []
|
76
|
+
|
77
|
+
begin
|
78
|
+
offset = 0
|
79
|
+
endpoint = "/v3/#{entity}?sort_by=downloads&limit=50"
|
80
|
+
spinner = TTY::Spinner.new("[:spinner] :title")
|
81
|
+
spinner.update(title: "Downloading #{entity} ...")
|
82
|
+
spinner.auto_spin
|
83
|
+
|
84
|
+
while endpoint do
|
85
|
+
response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => "Puppet Community Stats Monitor"})
|
86
|
+
raise "Forge Error: #{@response.body}" unless response.code == 200
|
87
|
+
|
88
|
+
data = JSON.parse(response.body)
|
89
|
+
offset += 50
|
90
|
+
results += data['results']
|
91
|
+
endpoint = data['pagination']['next']
|
92
|
+
|
93
|
+
spinner.update(title: "Downloading #{entity} [#{offset}]...") if (endpoint and (offset % 250 == 0))
|
94
|
+
end
|
95
|
+
|
96
|
+
spinner.success('(OK)')
|
97
|
+
rescue => e
|
98
|
+
spinner.error('API error')
|
99
|
+
$logger.error e.message
|
100
|
+
$logger.debug e.backtrace.join("\n")
|
101
|
+
end
|
102
|
+
|
103
|
+
munge_dates(results)
|
104
|
+
end
|
105
|
+
|
106
|
+
# transform dates into a format that bigquery knows
|
107
|
+
def munge_dates(object)
|
108
|
+
["created_at", "updated_at", "deprecated_at", "deleted_at"].each do |field|
|
109
|
+
next unless object.first.keys.include? field
|
110
|
+
|
111
|
+
object.each do |record|
|
112
|
+
next unless record[field]
|
113
|
+
record[field] = DateTime.parse(record[field]).strftime("%Y-%m-%d %H:%M:%S")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
object
|
117
|
+
end
|
118
|
+
|
119
|
+
def save_json(thing, data)
|
120
|
+
File.write("#{@cachedir}/#{thing}.json", data.to_json)
|
121
|
+
end
|
122
|
+
|
123
|
+
# store data in a way that bigquery can grok
|
124
|
+
# uploading files is far easier than streaming data, when replacing a dataset
|
125
|
+
def save_nld_json(thing, data)
|
126
|
+
File.write("#{@cachedir}/nld_#{thing}.json", data.to_newline_delimited_json)
|
127
|
+
end
|
128
|
+
|
129
|
+
def flatten_modules(data)
|
130
|
+
data.each do |row|
|
131
|
+
row['owner'] = row['owner']['username']
|
132
|
+
row['superseded_by'] = row['superseded_by']['slug'] rescue nil
|
133
|
+
row['pdk'] = row['current_release']['pdk']
|
134
|
+
row['supported'] = row['current_release']['supported']
|
135
|
+
row['version'] = row['current_release']['version']
|
136
|
+
row['validation_score'] = row['current_release']['validation_score']
|
137
|
+
row['license'] = row['current_release']['metadata']['license']
|
138
|
+
row['source'] = row['current_release']['metadata']['source']
|
139
|
+
row['project_page'] = row['current_release']['metadata']['project_page']
|
140
|
+
row['issues_url'] = row['current_release']['metadata']['issues_url']
|
141
|
+
row['tasks'] = row['current_release']['tasks'].map{|task| task['name']}
|
142
|
+
|
143
|
+
row['release_count'] = row['releases'].count rescue 0
|
144
|
+
row['releases'] = row['releases'].map{|r| r['version']} rescue []
|
145
|
+
|
146
|
+
simplify_metadata(row, row['current_release']['metadata'])
|
147
|
+
row.delete('current_release')
|
148
|
+
end
|
149
|
+
data
|
150
|
+
end
|
151
|
+
|
152
|
+
def flatten_releases(data)
|
153
|
+
data.each do |row|
|
154
|
+
row['name'] = row['module']['name']
|
155
|
+
row['owner'] = row['module']['username']
|
156
|
+
row['license'] = row['metadata']['license']
|
157
|
+
row['source'] = row['metadata']['source']
|
158
|
+
row['project_page'] = row['metadata']['project_page']
|
159
|
+
row['issues_url'] = row['metadata']['issues_url']
|
160
|
+
row['tasks'] = row['tasks'].map{|task| task['name']}
|
161
|
+
|
162
|
+
simplify_metadata(row, row['metadata'])
|
163
|
+
row.delete('module')
|
164
|
+
end
|
165
|
+
data
|
166
|
+
end
|
167
|
+
|
168
|
+
def flatten_validations(data)
|
169
|
+
data.map do |name, scores|
|
170
|
+
row = { 'name' => name }
|
171
|
+
scores.each do |entry|
|
172
|
+
row[entry['name']] = entry['score']
|
173
|
+
end
|
174
|
+
row
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def simplify_metadata(data, metadata)
|
179
|
+
data['operatingsystem'] = metadata['operatingsystem_support'].map{|i| i['operatingsystem']} rescue nil
|
180
|
+
data['dependencies'] = metadata['dependencies'].map{|i| i['name']} rescue nil
|
181
|
+
data['puppet_range'] = metadata['requirements'].select{|r| r['name'] == 'puppet'}.first['version_requirement'] rescue nil
|
182
|
+
data['metadata'] = metadata.to_json
|
183
|
+
|
184
|
+
if data['puppet_range']
|
185
|
+
range = SemanticPuppet::VersionRange.parse(data['puppet_range'])
|
186
|
+
data['puppet_2x'] = range.include? SemanticPuppet::Version.parse('2.99.99')
|
187
|
+
data['puppet_3x'] = range.include? SemanticPuppet::Version.parse('3.99.99')
|
188
|
+
data['puppet_4x'] = range.include? SemanticPuppet::Version.parse('4.99.99')
|
189
|
+
data['puppet_5x'] = range.include? SemanticPuppet::Version.parse('5.99.99')
|
190
|
+
data['puppet_6x'] = range.include? SemanticPuppet::Version.parse('6.99.99')
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def test()
|
195
|
+
require 'pry'
|
196
|
+
binding.pry
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
data/lib/mvp/runner.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'mvp/downloader'
|
2
|
+
require 'mvp/uploader'
|
3
|
+
require 'mvp/stats'
|
4
|
+
|
5
|
+
class Mvp
|
6
|
+
class Runner
|
7
|
+
def initialize(options = {})
|
8
|
+
@cachedir = options[:cachedir]
|
9
|
+
@debug = options[:debug]
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
def retrieve(target = :all, download = true)
|
14
|
+
downloader = Mvp::Downloader.new(@options)
|
15
|
+
|
16
|
+
[:authors, :modules, :releases].each do |thing|
|
17
|
+
next unless [:all, thing].include? target
|
18
|
+
downloader.retrieve(thing, download)
|
19
|
+
end
|
20
|
+
|
21
|
+
if [:all, :validations].include? target
|
22
|
+
downloader.validations()
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def upload(target = :all)
|
27
|
+
uploader = Mvp::Uploader.new(@options)
|
28
|
+
|
29
|
+
[:authors, :modules, :releases, :validations, :mirrors].each do |thing|
|
30
|
+
next unless [:all, thing].include? target
|
31
|
+
uploader.send(thing)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def mirror(target = :all)
|
36
|
+
retrieve(target)
|
37
|
+
upload(target)
|
38
|
+
end
|
39
|
+
|
40
|
+
def stats(target)
|
41
|
+
stats = Mvp::Stats.new(@options)
|
42
|
+
|
43
|
+
[:authors, :modules, :releases, :relationships, :github, :validations].each do |thing|
|
44
|
+
next unless [:all, thing].include? target
|
45
|
+
stats.send(thing)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test()
|
50
|
+
require 'pry'
|
51
|
+
binding.pry
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/mvp/stats.rb
ADDED
@@ -0,0 +1,339 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'histogram'
|
3
|
+
require 'ascii_charts'
|
4
|
+
require 'histogram/array'
|
5
|
+
require 'sparkr'
|
6
|
+
|
7
|
+
class Mvp
|
8
|
+
class Stats
|
9
|
+
def initialize(options = {})
|
10
|
+
@cachedir = options[:cachedir]
|
11
|
+
@today = Date.today
|
12
|
+
@github_data = options[:github_data]
|
13
|
+
@output_file = options[:output_file]
|
14
|
+
end
|
15
|
+
|
16
|
+
def load(entity)
|
17
|
+
JSON.parse(File.read("#{@cachedir}/#{entity}.json"))
|
18
|
+
end
|
19
|
+
|
20
|
+
def draw_graph(series, width, title = nil)
|
21
|
+
series.compact!
|
22
|
+
graph = []
|
23
|
+
(bins, freqs) = series.histogram(:bin_width => width)
|
24
|
+
|
25
|
+
bins.each_with_index do |item, index|
|
26
|
+
graph << [ item, freqs[index] ]
|
27
|
+
end
|
28
|
+
puts AsciiCharts::Cartesian.new(graph, :bar => true, :hide_zero => true, :title => title).draw
|
29
|
+
end
|
30
|
+
|
31
|
+
# TODO: improve this to discard outliers and slightly weight larger series
|
32
|
+
def average(series)
|
33
|
+
series.compact!
|
34
|
+
return 0 if series.empty?
|
35
|
+
|
36
|
+
series.inject(0.0) { |sum, el| sum + el } / series.size
|
37
|
+
end
|
38
|
+
|
39
|
+
def days_ago(datestr)
|
40
|
+
@today - Date.parse(datestr)
|
41
|
+
end
|
42
|
+
|
43
|
+
def years_ago(datestr)
|
44
|
+
days_ago(datestr)/365
|
45
|
+
end
|
46
|
+
|
47
|
+
def tally_author_info(releases, target, scope='module_count')
|
48
|
+
# update the author records with the fields we need
|
49
|
+
target.each do |author|
|
50
|
+
author['release_dates'] = []
|
51
|
+
author['scores'] = []
|
52
|
+
end
|
53
|
+
|
54
|
+
releases.each do |mod|
|
55
|
+
username = mod['module']['owner']['username']
|
56
|
+
score = mod['validation_score']
|
57
|
+
author = target.select{|m| m['username'] == username}.first
|
58
|
+
|
59
|
+
author['release_dates'] << mod['created_at']
|
60
|
+
author['scores'] << score if score
|
61
|
+
end
|
62
|
+
|
63
|
+
target.each do |author|
|
64
|
+
author['average'] = average(author['scores']).to_i
|
65
|
+
author['impact'] = author['average'] * author[scope]
|
66
|
+
author['newest_release'] = author['release_dates'].max_by {|r| Date.parse(r) }
|
67
|
+
author['oldest_release'] = author['release_dates'].min_by {|r| Date.parse(r) }
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def authors()
|
72
|
+
data = load('authors').reject {|u| u['username'] == 'puppetlabs' }
|
73
|
+
casual = data.select {|u| (2...10).include? u['module_count'] }
|
74
|
+
prolific = data.select {|u| u['module_count'] > 9}
|
75
|
+
topmost = data.sort_by {|u| u['module_count']}.reverse[0...20]
|
76
|
+
releases = data.sort_by {|u| u['release_count']}.reverse[0...20]
|
77
|
+
|
78
|
+
puts "* Prolific in this case is more than 9 released modules."
|
79
|
+
|
80
|
+
draw_graph(casual.map {|u| u['module_count']}, 1, 'Number of modules from casual authors')
|
81
|
+
draw_graph(prolific.map {|u| u['module_count']}, 5, 'Number of modules from prolific authors')
|
82
|
+
|
83
|
+
puts
|
84
|
+
puts
|
85
|
+
puts "Author Statistics:"
|
86
|
+
puts " └── Number of users: #{data.count}"
|
87
|
+
puts " └── Number who have never published a module: #{data.select {|u| u['module_count'] == 0}.count}"
|
88
|
+
puts " └── Number who have published a single module: #{data.select {|u| u['module_count'] == 1}.count}"
|
89
|
+
puts " └── Number who have published multiple modules: #{data.select {|u| u['module_count'] > 1}.count}"
|
90
|
+
puts " └── Number who have published two modules: #{data.select {|u| u['module_count'] == 2}.count}"
|
91
|
+
puts " └── Number who have published more than 5 modules: #{data.select {|u| u['module_count'] > 5}.count}"
|
92
|
+
puts " └── Number who have published more than 10 modules: #{data.select {|u| u['module_count'] > 10}.count}"
|
93
|
+
puts " └── Number who have published more than 20 modules: #{data.select {|u| u['module_count'] > 20}.count}"
|
94
|
+
puts " └── Number who have published more than 30 modules: #{data.select {|u| u['module_count'] > 30}.count}"
|
95
|
+
puts " └── Number who have published more than 50 modules: #{data.select {|u| u['module_count'] > 50}.count}"
|
96
|
+
|
97
|
+
puts
|
98
|
+
puts "Top 20 prolific module authors by number of modules | number of releases:"
|
99
|
+
topmost.each do |author|
|
100
|
+
puts " └── %-55s: %d | %d" % [ "#{author['display_name']} (#{author['username']})",
|
101
|
+
author['module_count'],
|
102
|
+
author['release_count'] ]
|
103
|
+
end
|
104
|
+
puts
|
105
|
+
puts "Top 20 active module authors by number of releases | number of modules:"
|
106
|
+
releases.each do |author|
|
107
|
+
puts " └── %-55s: %d | %d" % [ "#{author['display_name']} (#{author['username']})",
|
108
|
+
author['release_count'],
|
109
|
+
author['module_count'] ]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def modules()
|
114
|
+
data_m = load('modules').reject {|m| m['owner']['username'] == 'puppetlabs' }
|
115
|
+
data_a = load('authors').reject {|u| u['username'] == 'puppetlabs' or u['module_count'] == 0}
|
116
|
+
current = data_m.map {|m| m['current_release'] }
|
117
|
+
|
118
|
+
tally_author_info(current, data_a, 'module_count')
|
119
|
+
|
120
|
+
prolific = data_a.select{|a| a['impact']>1000}.sort_by {|a| a['impact']}
|
121
|
+
topmost = data_a.sort_by {|a| a['impact']}.reverse[0...20]
|
122
|
+
published = data_a.reject {|u| u['newest_release'].nil?}
|
123
|
+
|
124
|
+
puts '* Validation score is a Forge ranking based on the scores of an individual module release.'
|
125
|
+
puts "* I am defining impact as an author's average validation * the number of modules releases they've made / 100."
|
126
|
+
puts "* Prolific in this case is impact > 100."
|
127
|
+
|
128
|
+
draw_graph(current.map {|m| years_ago(m['created_at']).round(1)}, 0.5, 'Age (in years) distribution by module')
|
129
|
+
draw_graph(published.map {|m| years_ago(m['newest_release']).round(1)}, 0.5, "Distribution of author's newest module by years old")
|
130
|
+
draw_graph(current.map {|m| m['validation_score']}, 10, 'Validation score distribution by module')
|
131
|
+
draw_graph(data_a.map {|a| average(a['scores']).to_i }, 10, 'Validation score distribution by author')
|
132
|
+
draw_graph(prolific.map {|a| a['impact']/100 }, 5, 'Impact distribution by prolific authors')
|
133
|
+
|
134
|
+
puts
|
135
|
+
puts
|
136
|
+
puts "Module Statistics:"
|
137
|
+
puts " └── Number of modules: #{data_m.count}"
|
138
|
+
puts " └── Modules less than a year old: #{current.select {|m| days_ago(m['created_at']) < 365}.count}"
|
139
|
+
puts " └── Modules more than a year old: #{current.select {|m| days_ago(m['created_at']) > 365}.count}"
|
140
|
+
puts " └── Modules more than two years old: #{current.select {|m| years_ago(m['created_at']) > 2}.count}"
|
141
|
+
puts " └── Modules more than three years old: #{current.select {|m| years_ago(m['created_at']) > 3}.count}"
|
142
|
+
puts " └── Modules more than four years old: #{current.select {|m| years_ago(m['created_at']) > 4}.count}"
|
143
|
+
puts " └── Modules more than five years old: #{current.select {|m| years_ago(m['created_at']) > 5}.count}"
|
144
|
+
puts " └── Authors with 'perfect' validation scores: #{data_a.select {|u| average(u['scores']).to_i == 100}.count}"
|
145
|
+
puts " └── Authors who've released in the last year: #{published.select {|u| days_ago(u['newest_release']) < 365}.count}"
|
146
|
+
puts " └── Authors with no outdated (1yr) modules: #{published.select {|u| days_ago(u['oldest_release']) < 365}.count}"
|
147
|
+
|
148
|
+
puts
|
149
|
+
puts "Top 20 high impact module authors by impact | number of modules:"
|
150
|
+
topmost.each do |author|
|
151
|
+
puts " └── %-55s: %d | %d" % [ "#{author['display_name']} (#{author['username']})",
|
152
|
+
author['impact']/100,
|
153
|
+
author['module_count'] ]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def releases()
|
158
|
+
data_r = load('releases').reject {|m| m['module']['owner']['username'] == 'puppetlabs' }
|
159
|
+
data_a = load('authors').reject {|u| u['username'] == 'puppetlabs' or u['module_count'] == 0}
|
160
|
+
|
161
|
+
tally_author_info(data_r, data_a, 'release_count')
|
162
|
+
|
163
|
+
impactful = data_a.select{|a| a['impact']>5000}.sort_by {|a| a['impact']}
|
164
|
+
topmost = data_a.sort_by {|a| a['impact']}.reverse[0...20]
|
165
|
+
published = data_a.reject {|u| u['newest_release'].nil?}
|
166
|
+
multiple = published.select {|u| u['module_count'] > 1}
|
167
|
+
prolific = published.select {|u| u['module_count'] > 9}
|
168
|
+
current = multiple.sort_by {|a| days_ago(a['oldest_release'])}[0...20]
|
169
|
+
|
170
|
+
# Authors that used to be active, but don't seem to be any more
|
171
|
+
faded = published.select do |author|
|
172
|
+
count_old = author['release_dates'].select {|r| years_ago(r) > 2 }.count
|
173
|
+
count_new = author['release_dates'].select {|r| years_ago(r) < 1.5 }.count
|
174
|
+
|
175
|
+
(count_old > 25 and count_old > (50*count_new))
|
176
|
+
end
|
177
|
+
|
178
|
+
oldest = years_ago(faded.map { |u| u['release_dates']}.flatten.max_by {|r| days_ago(r) }).to_i
|
179
|
+
faded.each do |author|
|
180
|
+
author['annual_releases'] = []
|
181
|
+
|
182
|
+
(1..oldest).each do |age|
|
183
|
+
author['annual_releases'] << author['release_dates'].select {|r| years_ago(r).to_i == age }.count
|
184
|
+
end
|
185
|
+
author['annual_releases'].reverse!
|
186
|
+
end
|
187
|
+
|
188
|
+
puts '* Validation score is a Forge ranking based on the scores of an individual module release.'
|
189
|
+
puts "* I am defining impact as an author's average validation * the number of modules releases they've made / 100."
|
190
|
+
puts "* Prolific in this case is more than 9 released modules."
|
191
|
+
|
192
|
+
draw_graph(data_a.map {|a| average(a['scores']).to_i }, 10, 'Validation score distribution by author')
|
193
|
+
draw_graph(impactful.map {|a| a['impact']/100 }, 50, 'Impact distribution by impactful authors')
|
194
|
+
|
195
|
+
puts
|
196
|
+
puts
|
197
|
+
puts "Release Statistics:"
|
198
|
+
puts " └── Number of releases: #{data_r.count}"
|
199
|
+
puts " └── Authors with no releases: #{data_a.count - published.count}"
|
200
|
+
puts " └── Authors with only a single releases: #{published.count - multiple.count}"
|
201
|
+
puts " └── Authors with no releases in one year: #{published.select {|m| years_ago(m['newest_release']) >1}.count}"
|
202
|
+
puts " └── Authors with no releases in two years: #{published.select {|m| years_ago(m['newest_release']) >2}.count}"
|
203
|
+
puts " └── Authors with no releases in three years: #{published.select {|m| years_ago(m['newest_release']) >3}.count}"
|
204
|
+
puts " └── Authors with no releases in four years: #{published.select {|m| years_ago(m['newest_release']) >4}.count}"
|
205
|
+
puts " └── Authors with no releases in five years: #{published.select {|m| years_ago(m['newest_release']) >5}.count}"
|
206
|
+
puts " └── Authors with multiple releases, all newer than a month: #{multiple.select {|u| days_ago(u['oldest_release']) < 30}.count}"
|
207
|
+
puts " └── Authors with multiple releases, all newer than 3 months: #{multiple.select {|u| days_ago(u['oldest_release']) < 90}.count}"
|
208
|
+
puts " └── Authors with multiple releases, all newer than 6 months: #{multiple.select {|u| days_ago(u['oldest_release']) < 180}.count}"
|
209
|
+
puts " └── Authors with multiple releases, all newer than a year: #{multiple.select {|u| days_ago(u['oldest_release']) < 365}.count}"
|
210
|
+
puts " └── Prolific authors, with releases all newer than 3 months: #{prolific.select {|u| days_ago(u['oldest_release']) < 90}.count}"
|
211
|
+
puts " └── Prolific authors, with releases all newer than 6 months: #{prolific.select {|u| days_ago(u['oldest_release']) < 180}.count}"
|
212
|
+
puts " └── Prolific authors, with releases all newer than a year: #{prolific.select {|u| days_ago(u['oldest_release']) < 365}.count}"
|
213
|
+
puts " └── Prolific authors, with releases all newer than 2 years: #{prolific.select {|u| years_ago(u['oldest_release']) < 2}.count}"
|
214
|
+
|
215
|
+
puts
|
216
|
+
puts "Top 20 high impact module authors by impact | number of releases:"
|
217
|
+
topmost.each do |author|
|
218
|
+
puts " └── %-55s: %d | %d" % [ "#{author['display_name']} (#{author['username']})",
|
219
|
+
author['impact']/100,
|
220
|
+
author['release_count'] ]
|
221
|
+
end
|
222
|
+
puts
|
223
|
+
puts "Top 20 current module authors by oldest release | number of releases:"
|
224
|
+
current.each do |author|
|
225
|
+
puts " └── %-55s: %s | %d" % [ "#{author['display_name']} (#{author['username']})",
|
226
|
+
Date.parse(author['oldest_release']).strftime('%v'),
|
227
|
+
author['release_count'] ]
|
228
|
+
end
|
229
|
+
puts
|
230
|
+
puts "Authors who are no longer as active as they used to be:"
|
231
|
+
faded.each do |author|
|
232
|
+
puts " └── %-55s: %s %s" % [ "#{author['display_name']} (#{author['username']})",
|
233
|
+
Sparkr.sparkline(author['annual_releases']),
|
234
|
+
author['annual_releases'].to_s ]
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
def relationships()
|
239
|
+
data_m = load('modules').reject {|m| m['owner']['username'] == 'puppetlabs' }
|
240
|
+
data_a = load('authors').reject {|u| u['username'] == 'puppetlabs' or u['module_count'] == 0}
|
241
|
+
current = data_m.map {|m| m['current_release'] }
|
242
|
+
|
243
|
+
current.each do |mod|
|
244
|
+
mod['metadata']['dependants'] = []
|
245
|
+
end
|
246
|
+
current.each do |mod|
|
247
|
+
mod['metadata']['dependencies'].each do |dependency|
|
248
|
+
target = current.select {|m| m['metadata']['name'] == dependency['name'].sub('/','-')}.first
|
249
|
+
next unless target
|
250
|
+
|
251
|
+
target['metadata']['dependants'] << mod['metadata']['name']
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
data_a.each { |a| a['dependants'] = [] }
|
256
|
+
current.each do |mod|
|
257
|
+
count = mod['metadata']['dependants'].count
|
258
|
+
next unless count > 0
|
259
|
+
|
260
|
+
author = data_a.select{|m| m['username'] == mod['module']['owner']['username']}.first
|
261
|
+
author['dependants'] << count
|
262
|
+
end
|
263
|
+
data_a.each { |a| a['average_dependants'] = average(a['dependants']) }
|
264
|
+
|
265
|
+
top_mods = current.sort_by {|m| m['metadata']['dependants'].count}.reverse[0...20]
|
266
|
+
connected = data_a.sort_by {|a| a['average_dependants'] }.reverse[0...20]
|
267
|
+
|
268
|
+
low_conn = current.select {|m| (2..10).include? m['metadata']['dependants'].count}
|
269
|
+
high_conn = current.select {|m| m['metadata']['dependants'].count > 10}
|
270
|
+
|
271
|
+
draw_graph(low_conn.map {|m| m['metadata']['dependants'].count }, 1, 'Number of dependent modules for low connection modules')
|
272
|
+
draw_graph(high_conn.map {|m| m['metadata']['dependants'].count }, 10, 'Number of dependent modules for high connection modules')
|
273
|
+
draw_graph(connected.map {|a| a['average_dependants'].to_i }, 5, 'Average number of dependent modules by author')
|
274
|
+
|
275
|
+
puts
|
276
|
+
puts "Top 20 connected module authors by number of dependants | number of modules | number of releases:"
|
277
|
+
connected.each do |author|
|
278
|
+
puts " └── %-55s: %s | %d | %d" % [ "#{author['display_name']} (#{author['username']})",
|
279
|
+
author['average_dependants'].to_i,
|
280
|
+
author['module_count'],
|
281
|
+
author['release_count'] ]
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
def github()
|
286
|
+
require 'csv'
|
287
|
+
require 'net/http'
|
288
|
+
raise "Need to provide a data file to gather GitHub stats!" unless @github_data
|
289
|
+
|
290
|
+
unfound = []
|
291
|
+
modules = load('modules').map {|m| m['slug']}
|
292
|
+
CSV.foreach(@github_data) do |row|
|
293
|
+
repo, stars = row
|
294
|
+
next unless repo =~ /^\w+\/\w+$/
|
295
|
+
|
296
|
+
begin
|
297
|
+
uri_path = "https://raw.githubusercontent.com/#{repo}/master/metadata.json"
|
298
|
+
metadata = JSON.parse(Net::HTTP.get(URI.parse(uri_path)))
|
299
|
+
|
300
|
+
unless modules.include? metadata['name'].sub('/', '-')
|
301
|
+
repo_path = "https://github.com/#{repo}"
|
302
|
+
unfound << { :repo => repo_path, :stars => stars}
|
303
|
+
end
|
304
|
+
rescue => e
|
305
|
+
puts "#{e.class} for #{uri_path}"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
# sort the list by number of stars, descending then alphabatize by repo
|
310
|
+
unfound.sort! do |a, b|
|
311
|
+
[b[:stars], a[:repo]] <=> [a[:stars], b[:repo]]
|
312
|
+
end
|
313
|
+
|
314
|
+
if @output_file
|
315
|
+
CSV.open("outreach.csv", "w+") do |csv|
|
316
|
+
unfound.each do |mod|
|
317
|
+
csv << [ mod[:repo], mod[:stars] ]
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
puts "The following #{unfound.count} module repositories were not represented on the Forge:" unless unfound.empty?
|
323
|
+
unfound.each do |mod|
|
324
|
+
puts " └── %-65s: %d" % [ mod[:repo], mod[:stars] ]
|
325
|
+
end
|
326
|
+
|
327
|
+
|
328
|
+
end
|
329
|
+
|
330
|
+
def validations()
|
331
|
+
puts 'got nothing for you yet'
|
332
|
+
end
|
333
|
+
|
334
|
+
def test()
|
335
|
+
require 'pry'
|
336
|
+
binding.pry
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end
|
data/lib/mvp/uploader.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'tty-spinner'
|
3
|
+
require "google/cloud/bigquery"
|
4
|
+
|
5
|
+
class Mvp
|
6
|
+
class Uploader
|
7
|
+
def initialize(options = {})
|
8
|
+
@cachedir = options[:cachedir]
|
9
|
+
@mirrors = options[:gcloud][:mirrors]
|
10
|
+
@bigquery = Google::Cloud::Bigquery.new(
|
11
|
+
:project_id => options[:gcloud][:project],
|
12
|
+
:credentials => Google::Cloud::Bigquery::Credentials.new(options[:gcloud][:keyfile]),
|
13
|
+
)
|
14
|
+
@dataset = @bigquery.dataset(options[:gcloud][:dataset])
|
15
|
+
end
|
16
|
+
|
17
|
+
def authors()
|
18
|
+
upload('authors')
|
19
|
+
end
|
20
|
+
|
21
|
+
def modules()
|
22
|
+
upload('modules')
|
23
|
+
end
|
24
|
+
|
25
|
+
def releases()
|
26
|
+
upload('releases')
|
27
|
+
end
|
28
|
+
|
29
|
+
def validations()
|
30
|
+
upload('validations')
|
31
|
+
end
|
32
|
+
|
33
|
+
def mirrors()
|
34
|
+
@mirrors.each do |entity|
|
35
|
+
begin
|
36
|
+
spinner = TTY::Spinner.new("[:spinner] :title")
|
37
|
+
spinner.update(title: "Mirroring #{entity[:type]} #{entity[:name]} to BigQuery...")
|
38
|
+
spinner.auto_spin
|
39
|
+
|
40
|
+
case entity[:type]
|
41
|
+
when :view
|
42
|
+
@dataset.table(entity[:name]).delete rescue nil # delete if exists
|
43
|
+
@dataset.create_view(entity[:name], entity[:query],
|
44
|
+
:legacy_sql => true)
|
45
|
+
|
46
|
+
when :table
|
47
|
+
job = @dataset.query_job(entity[:query],
|
48
|
+
:legacy_sql => true,
|
49
|
+
:write => 'truncate',
|
50
|
+
:table => @dataset.table(entity[:name], :skip_lookup => true))
|
51
|
+
job.wait_until_done!
|
52
|
+
|
53
|
+
else
|
54
|
+
$logger.error "Unknown mirror type: #{entity[:type]}"
|
55
|
+
end
|
56
|
+
|
57
|
+
spinner.success('(OK)')
|
58
|
+
rescue => e
|
59
|
+
spinner.error("(Google Cloud error: #{e.message})")
|
60
|
+
$logger.error e.backtrace.join("\n")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def upload(entity)
|
66
|
+
begin
|
67
|
+
spinner = TTY::Spinner.new("[:spinner] :title")
|
68
|
+
spinner.update(title: "Uploading #{entity} to BigQuery ...")
|
69
|
+
spinner.auto_spin
|
70
|
+
|
71
|
+
@dataset.load("forge_#{entity}", "#{@cachedir}/nld_#{entity}.json",
|
72
|
+
:write => 'truncate',
|
73
|
+
:autodetect => true)
|
74
|
+
|
75
|
+
# table = @dataset.table("forge_#{entity}")
|
76
|
+
# File.readlines("#{@cachedir}/nld_#{entity}.json").each do |line|
|
77
|
+
# data = JSON.parse(line)
|
78
|
+
#
|
79
|
+
# begin
|
80
|
+
# table.insert data
|
81
|
+
# rescue
|
82
|
+
# require 'pry'
|
83
|
+
# binding.pry
|
84
|
+
# end
|
85
|
+
# end
|
86
|
+
|
87
|
+
|
88
|
+
spinner.success('(OK)')
|
89
|
+
rescue => e
|
90
|
+
spinner.error("(Google Cloud error: #{e.message})")
|
91
|
+
$logger.error e.backtrace.join("\n")
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def test()
|
96
|
+
require 'pry'
|
97
|
+
binding.pry
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
metadata
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: puppet-community-mvp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ben Ford
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-06-27 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: json
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: histogram
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ascii_charts
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sparkr
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: semantic_puppet
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: httparty
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: tty-spinner
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: google-cloud
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
description: |2
|
126
|
+
Nothing exciting. Just gathers stats about the Puppet Community. Currently
|
127
|
+
draws data from the Puppet Forge, GitHub, and Slack. Optionally pushes data
|
128
|
+
into BigQuery for later consumption.
|
129
|
+
|
130
|
+
Run `mvp --help` to get started.
|
131
|
+
email: ben.ford@puppet.com
|
132
|
+
executables:
|
133
|
+
- mvp
|
134
|
+
extensions: []
|
135
|
+
extra_rdoc_files: []
|
136
|
+
files:
|
137
|
+
- LICENSE
|
138
|
+
- README.md
|
139
|
+
- bin/mvp
|
140
|
+
- lib/mvp.rb
|
141
|
+
- lib/mvp/downloader.rb
|
142
|
+
- lib/mvp/monkeypatches.rb
|
143
|
+
- lib/mvp/runner.rb
|
144
|
+
- lib/mvp/stats.rb
|
145
|
+
- lib/mvp/uploader.rb
|
146
|
+
homepage:
|
147
|
+
licenses:
|
148
|
+
- Apache 2
|
149
|
+
metadata: {}
|
150
|
+
post_install_message:
|
151
|
+
rdoc_options: []
|
152
|
+
require_paths:
|
153
|
+
- lib
|
154
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ">="
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
160
|
+
requirements:
|
161
|
+
- - ">="
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0'
|
164
|
+
requirements: []
|
165
|
+
rubyforge_project:
|
166
|
+
rubygems_version: 2.5.2.3
|
167
|
+
signing_key:
|
168
|
+
specification_version: 4
|
169
|
+
summary: Generate some stats about the Puppet Community.
|
170
|
+
test_files: []
|