puppet-community-mvp 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e45ff6e06d776fb8c3129789b4f469f25f86ea59
4
- data.tar.gz: 640dd10a4620d77281ebb58ec4d097dc40b9bfbd
3
+ metadata.gz: 3f5ed8c2978cad58a0ce52346eb854b3b4e82a9a
4
+ data.tar.gz: a5637f505ed5fe8c74a22f9633723b9211ad3c4a
5
5
  SHA512:
6
- metadata.gz: 00fffd365248a810b1542eb7e4e370398330d469834a2e5cecab70f0fe370149d8bc87ac7027c9260c6a2d0a8f24a06915e5946bf5d84c5dd6c07a8e4ac95ac3
7
- data.tar.gz: ac1e153462e94226eb56621e687667f369aa0ca35f3d34dd6e3c9096e0d7dc56dcb1f4584f0b2072cda3f47a5c5f1b2b645f106d5f314ef45fdcc3d9f6af6a25
6
+ metadata.gz: 3ad31ba42a0e2f96ae8254bf383e523c74890fa219cb1b27946cae7718aba76b2d3532149dc98dfc1d59b5e47c4d31c70156c96203659425b55607c6a0d948d1
7
+ data.tar.gz: f8970bb1a709f807e3e5d2f45e2c74f7adbea034fc5d9293024e76f4dfb8b2e9eb59ecf636e38b12d69ce224867bbc96570f5e9ed1ddb5092783888e145ceec9
data/bin/mvp CHANGED
@@ -13,16 +13,14 @@ optparse = OptionParser.new { |opts|
13
13
  opts.banner = "Usage : #{NAME} [command] [target] [options]
14
14
 
15
15
  This tool will scrape the Puppet Forge API for interesting module & author stats.
16
- The following CLI commands are available.
16
+ It can also mirror public BigQuery tables or views into our dataset for efficiency,
17
+ or download and itemize each Forge module.
17
18
 
18
- * get | retrieve | download [target]
19
- * Downloads and caches all Forge metadata.
20
- * Optional targets: all, authors, modules, releases
21
- * upload | insert [target]
22
- * Uploads data to BigQuery
23
- * Optional targets: all, authors, modules, releases, mirrors
24
19
  * mirror [target]
25
20
  * Runs the download & then upload tasks.
21
+ * Optional targets: all, authors, modules, releases, validations, itemizations, puppetfiles, tables
22
+ * get | retrieve | download [target]
23
+ * Downloads and caches data locally so you can run the stats task.
26
24
  * Optional targets: all, authors, modules, releases
27
25
  * stats
28
26
  * Print out a summary of interesting stats.
@@ -64,6 +62,10 @@ The following CLI commands are available.
64
62
  options[:debug] = true
65
63
  end
66
64
 
65
+ opts.on("-n", "--noop", "Don't actually upload data.") do
66
+ options[:noop] = true
67
+ end
68
+
67
69
  opts.separator('')
68
70
 
69
71
  opts.on("-h", "--help", "Displays this help") do
@@ -100,14 +102,6 @@ when 'get', 'retrieve', 'download'
100
102
  target ||= :all
101
103
  runner.retrieve(target.to_sym)
102
104
 
103
- when 'transform'
104
- target ||= :all
105
- runner.retrieve(target.to_sym, false)
106
-
107
- when 'insert', 'upload'
108
- target ||= :all
109
- runner.upload(target.to_sym)
110
-
111
105
  when 'mirror'
112
106
  target ||= :all
113
107
  runner.mirror(target.to_sym)
data/bin/pftest.rb ADDED
@@ -0,0 +1,22 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'mvp/puppetfile_parser'
4
+ require 'open-uri'
5
+ require 'json'
6
+ require 'logger'
7
+
8
+ $logger = Logger::new(STDOUT)
9
+ $logger.level = Logger::INFO
10
+ $logger.formatter = proc { |severity,datetime,progname,msg| "#{severity}: #{msg}\n" }
11
+
12
+ pf = open(ARGV.first)
13
+ parser = Mvp::PuppetfileParser.new()
14
+
15
+
16
+ repo = {
17
+ :repo_name => 'testing',
18
+ :md5 => 'wakka wakka',
19
+ :content => pf.read,
20
+ }
21
+
22
+ puts JSON.pretty_generate(parser.parse(repo))
data/lib/mvp.rb CHANGED
@@ -1,4 +1,2 @@
1
1
  require 'mvp/runner'
2
- require 'mvp/downloader'
3
- require 'mvp/uploader'
4
- require 'mvp/stats'
2
+ require 'mvp/stats'
@@ -3,10 +3,10 @@ require 'tty-spinner'
3
3
  require "google/cloud/bigquery"
4
4
 
5
5
  class Mvp
6
- class Uploader
6
+ class Bigquery
7
7
  def initialize(options = {})
8
+ @options = options
8
9
  @cachedir = options[:cachedir]
9
- @mirrors = options[:gcloud][:mirrors]
10
10
  @bigquery = Google::Cloud::Bigquery.new(
11
11
  :project_id => options[:gcloud][:project],
12
12
  :credentials => Google::Cloud::Bigquery::Credentials.new(options[:gcloud][:keyfile]),
@@ -27,9 +27,24 @@ class Mvp
27
27
  s.integer "count", mode: :required
28
28
  end
29
29
  end
30
+
31
+ @puppetfile_usage = @dataset.table('github_puppetfile_usage') || @dataset.create_table('github_puppetfile_usage') do |table|
32
+ table.name = 'Puppetfile Module Usage'
33
+ table.description = 'A list of all modules referenced in public Puppetfiles'
34
+ table.schema do |s|
35
+ s.string "repo_name", mode: :required
36
+ s.string "module", mode: :required
37
+ s.string "type", mode: :required
38
+ s.string "source"
39
+ s.string "version"
40
+ s.string "md5", mode: :required
41
+ end
42
+ end
30
43
  end
31
44
 
32
45
  def truncate(entity)
46
+ return if @options[:noop]
47
+
33
48
  begin
34
49
  case entity
35
50
  when :authors
@@ -163,95 +178,85 @@ class Mvp
163
178
  end
164
179
  end
165
180
 
166
- def authors()
167
- upload('authors')
168
- end
169
-
170
- def modules()
171
- upload('modules')
181
+ def retrieve(entity)
182
+ get(entity, ['*'])
172
183
  end
173
184
 
174
- def releases()
175
- upload('releases')
176
- end
185
+ def mirror_table(entity)
186
+ return if @options[:noop]
177
187
 
178
- def validations()
179
- upload('validations')
180
- end
181
-
182
- def github_mirrors()
183
- @mirrors.each do |entity|
184
- begin
185
- spinner = TTY::Spinner.new("[:spinner] :title")
186
- spinner.update(title: "Mirroring #{entity[:type]} #{entity[:name]} to BigQuery...")
187
- spinner.auto_spin
188
-
189
- case entity[:type]
190
- when :view
191
- @dataset.table(entity[:name]).delete rescue nil # delete if exists
192
- @dataset.create_view(entity[:name], entity[:query],
193
- :legacy_sql => true)
194
-
195
- when :table
196
- job = @dataset.query_job(entity[:query],
197
- :legacy_sql => true,
198
- :write => 'truncate',
199
- :table => @dataset.table(entity[:name], :skip_lookup => true))
200
- job.wait_until_done!
188
+ begin
189
+ case entity[:type]
190
+ when :view
191
+ @dataset.table(entity[:name]).delete rescue nil # delete if exists
192
+ @dataset.create_view(entity[:name], entity[:query])
201
193
 
202
- else
203
- $logger.error "Unknown mirror type: #{entity[:type]}"
204
- end
194
+ when :table
195
+ job = @dataset.query_job(entity[:query],
196
+ :write => 'truncate',
197
+ :table => @dataset.table(entity[:name], :skip_lookup => true))
198
+ job.wait_until_done!
205
199
 
206
- spinner.success('(OK)')
207
- rescue => e
208
- spinner.error("(Google Cloud error: #{e.message})")
209
- $logger.error e.backtrace.join("\n")
200
+ else
201
+ $logger.error "Unknown mirror type: #{entity[:type]}"
210
202
  end
203
+ rescue => e
204
+ $logger.error("(Google Cloud error: #{e.message})")
205
+ $logger.debug e.backtrace.join("\n")
211
206
  end
212
207
  end
213
208
 
214
- def insert(entity, data)
215
- table = @dataset.table("forge_#{entity}")
209
+ def insert(entity, data, suite = 'forge')
210
+ return if @options[:noop]
211
+ return if data.empty?
212
+
213
+ table = @dataset.table("#{suite}_#{entity}")
216
214
  response = table.insert(data)
217
215
 
218
216
  unless response.success?
219
- errors = {}
220
217
  response.insert_errors.each do |err|
221
- errors[err.row['slug']] = err.errors
218
+ $logger.error JSON.pretty_generate(err.row)
219
+ $logger.error JSON.pretty_generate(err.errors)
222
220
  end
223
- $logger.error JSON.pretty_generate(errors)
224
221
  end
225
222
  end
226
223
 
227
- def upload(entity)
228
- begin
229
- spinner = TTY::Spinner.new("[:spinner] :title")
230
- spinner.update(title: "Uploading #{entity} to BigQuery ...")
231
- spinner.auto_spin
224
+ def delete(entity, field, match, suite = 'forge')
225
+ @dataset.query("DELETE FROM #{suite}_#{entity} WHERE #{field} = '#{match}'")
226
+ end
232
227
 
233
- @dataset.load("forge_#{entity}", "#{@cachedir}/nld_#{entity}.json",
234
- :write => 'truncate',
235
- :autodetect => true)
228
+ def get(entity, fields, suite = 'forge')
229
+ raise 'pass fields as an array' unless fields.is_a? Array
230
+ @dataset.query("SELECT #{fields.join(', ')} FROM #{suite}_#{entity}")
231
+ end
236
232
 
237
- # table = @dataset.table("forge_#{entity}")
238
- # File.readlines("#{@cachedir}/nld_#{entity}.json").each do |line|
239
- # data = JSON.parse(line)
240
- #
241
- # begin
242
- # table.insert data
243
- # rescue
244
- # require 'pry'
245
- # binding.pry
246
- # end
247
- # end
233
+ def module_sources()
234
+ get('modules', ['slug', 'source'])
235
+ end
248
236
 
237
+ def puppetfiles()
238
+ sql = 'SELECT f.repo_name, f.path, c.content, c.md5
239
+ FROM github_puppetfile_files AS f
240
+ JOIN github_puppetfile_contents AS c
241
+ ON c.id = f.id
249
242
 
250
- spinner.success('(OK)')
251
- rescue => e
252
- spinner.error("(Google Cloud error: #{e.message})")
253
- $logger.error e.backtrace.join("\n")
254
- end
243
+ WHERE c.md5 NOT IN (
244
+ SELECT u.md5
245
+ FROM github_puppetfile_usage AS u
246
+ WHERE u.repo_name = f.repo_name
247
+ ) AND LOWER(repo_name) NOT LIKE "%boxen%"'
248
+ @dataset.query(sql)
249
+ end
250
+
251
+ def unitemized()
252
+ sql = 'SELECT m.name, m.slug, m.version, m.dependencies
253
+ FROM forge_modules AS m
254
+ WHERE m.version NOT IN (
255
+ SELECT i.version
256
+ FROM forge_itemized AS i
257
+ WHERE module = m.slug
258
+ )'
259
+ @dataset.query(sql)
255
260
  end
256
261
 
257
262
  def version_itemized?(mod, version)
@@ -2,151 +2,82 @@ require 'json'
2
2
  require 'httparty'
3
3
  require 'tty-spinner'
4
4
  require 'semantic_puppet'
5
- require 'mvp/monkeypatches'
6
- require 'mvp/itemizer'
7
5
 
8
6
  class Mvp
9
- class Downloader
7
+ class Forge
10
8
  def initialize(options = {})
11
9
  @useragent = 'Puppet Community Stats Monitor'
12
- @cachedir = options[:cachedir]
13
10
  @forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
14
- @itemizer = Mvp::Itemizer.new(options)
15
11
  end
16
12
 
17
- def mirror(entity, uploader)
18
- # using authors for git repo terminology consistency
19
- item = (entity == :authors) ? 'users' : entity.to_s
20
- download(item) do |data|
21
- case entity
22
- when :modules
23
- uploader.insert(:validations, flatten_validations(retrieve_validations(data)))
24
- data = flatten_modules(data)
25
-
26
- @itemizer.run!(data, uploader)
27
- when :releases
28
- data = flatten_releases(data)
29
- end
30
-
31
- uploader.insert(entity, data)
32
- end
33
- end
34
-
35
- def retrieve(entity, download = true)
36
- if download
37
- # I am focusing on authorship rather than just users, so for now I'm using the word authors
38
- item = (entity == :authors) ? 'users' : entity.to_s
39
- data = []
40
- download(item) do |resp|
41
- data.concat resp
42
- end
43
- save_json(entity, data)
44
- else
45
- data = File.read("#{@cachedir}/#{entity}.json")
46
- end
47
-
48
- case entity
49
- when :modules
50
- data = flatten_modules(data)
51
- when :releases
52
- data = flatten_releases(data)
53
- end
54
- save_nld_json(entity.to_s, data)
55
- end
56
-
57
- def retrieve_validations(modules, period = 25)
58
- results = {}
13
+ def retrieve(entity)
14
+ raise 'Please process downloaded data by passing a block' unless block_given?
59
15
 
16
+ # using authors for git repo terminology consistency
17
+ entity = :users if entity == :authors
60
18
  begin
61
19
  offset = 0
62
- endpoint = "/private/validations/"
63
- modules.each do |mod|
64
- name = "#{mod['owner']['username']}-#{mod['name']}"
65
- response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
20
+ endpoint = "/v3/#{entity}?sort_by=downloads&limit=50"
21
+
22
+ while endpoint do
23
+ response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
66
24
  raise "Forge Error: #{@response.body}" unless response.code == 200
25
+ data = JSON.parse(response.body)
26
+ results = munge_dates(data['results'])
27
+
28
+ case entity
29
+ when :modules
30
+ results = flatten_modules(results)
31
+ when :releases
32
+ results = flatten_releases(results)
33
+ end
67
34
 
68
- results[name] = JSON.parse(response.body)
69
- offset += 1
35
+ yield results, offset
70
36
 
71
- if block_given? and (offset % period == 0)
72
- yield offset
37
+ offset += 50
38
+ endpoint = data['pagination']['next']
39
+ if (endpoint and (offset % 250 == 0))
73
40
  GC.start
74
41
  end
75
42
  end
43
+
76
44
  rescue => e
77
45
  $logger.error e.message
78
46
  $logger.debug e.backtrace.join("\n")
79
47
  end
80
48
 
81
- results
49
+ nil
82
50
  end
83
51
 
84
- def validations()
85
- cache = "#{@cachedir}/modules.json"
86
-
87
- if File.exist? cache
88
- module_data = JSON.parse(File.read(cache))
89
- else
90
- module_data = retrieve(:modules)
91
- end
52
+ def retrieve_validations(modules, period = 25)
53
+ raise 'Please process validations by passing a block' unless block_given?
92
54
 
55
+ offset = 0
93
56
  begin
94
- spinner = TTY::Spinner.new("[:spinner] :title")
95
- spinner.update(title: "Downloading module validations ...")
96
- spinner.auto_spin
57
+ modules.each_slice(period) do |group|
58
+ offset += period
59
+ results = group.map { |mod| validations(mod[:slug]) }
97
60
 
98
- results = retrieve_validations(module_data) do |offset|
99
- spinner.update(title: "Downloading module validations [#{offset}]...")
61
+ yield results, offset
62
+ GC.start
100
63
  end
101
-
102
- spinner.success('(OK)')
103
64
  rescue => e
104
- spinner.error('API error')
105
65
  $logger.error e.message
106
66
  $logger.debug e.backtrace.join("\n")
107
67
  end
108
68
 
109
- save_json('validations', results)
110
- save_nld_json('validations', flatten_validations(results))
111
- results
69
+ nil
112
70
  end
113
71
 
114
- def download(entity)
115
- raise 'Please process downloaded data by passing a block' unless block_given?
72
+ def validations(name)
73
+ endpoint = "/private/validations/"
74
+ response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
75
+ raise "Forge Error: #{@response.body}" unless response.code == 200
116
76
 
117
- begin
118
- offset = 0
119
- endpoint = "/v3/#{entity}?sort_by=downloads&limit=50"
120
- spinner = TTY::Spinner.new("[:spinner] :title")
121
- spinner.update(title: "Downloading #{entity} ...")
122
- spinner.auto_spin
123
-
124
- while endpoint do
125
- response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
126
- raise "Forge Error: #{@response.body}" unless response.code == 200
127
- data = JSON.parse(response.body)
128
-
129
- offset += 50
130
- endpoint = data['pagination']['next']
131
-
132
- yield munge_dates(data['results'])
133
-
134
- if (endpoint and (offset % 250 == 0))
135
- spinner.update(title: "Downloading #{entity} [#{offset}]...")
136
- GC.start
137
- end
138
- end
139
-
140
- spinner.success('(OK)')
141
- rescue => e
142
- spinner.error('API error')
143
- $logger.error e.message
144
- $logger.debug e.backtrace.join("\n")
145
- end
146
-
147
- nil
77
+ flatten_validations(name, JSON.parse(response.body))
148
78
  end
149
79
 
80
+
150
81
  # transform dates into a format that bigquery knows
151
82
  def munge_dates(object)
152
83
  ["created_at", "updated_at", "deprecated_at", "deleted_at"].each do |field|
@@ -160,16 +91,6 @@ class Mvp
160
91
  object
161
92
  end
162
93
 
163
- def save_json(thing, data)
164
- File.write("#{@cachedir}/#{thing}.json", data.to_json)
165
- end
166
-
167
- # store data in a way that bigquery can grok
168
- # uploading files is far easier than streaming data, when replacing a dataset
169
- def save_nld_json(thing, data)
170
- File.write("#{@cachedir}/nld_#{thing}.json", data.to_newline_delimited_json)
171
- end
172
-
173
94
  def flatten_modules(data)
174
95
  data.each do |row|
175
96
  row['owner'] = row['owner']['username']
@@ -209,14 +130,12 @@ class Mvp
209
130
  data
210
131
  end
211
132
 
212
- def flatten_validations(data)
213
- data.map do |name, scores|
214
- row = { 'name' => name }
215
- scores.each do |entry|
216
- row[entry['name']] = entry['score']
217
- end
218
- row
133
+ def flatten_validations(name, scores)
134
+ row = { 'name' => name }
135
+ scores.each do |entry|
136
+ row[entry['name']] = entry['score']
219
137
  end
138
+ row
220
139
  end
221
140
 
222
141
  def simplify_metadata(data, metadata)
data/lib/mvp/itemizer.rb CHANGED
@@ -27,6 +27,14 @@ class Mvp
27
27
  end
28
28
  end
29
29
 
30
+ def itemized(mod)
31
+ modname = mod[:slug]
32
+ version = mod[:version]
33
+ baserow = { :module => modname, :version => version, :kind => 'admin', :element => 'version', :count => 0}
34
+
35
+ table(itemize(modname, version), mod) << baserow
36
+ end
37
+
30
38
  def download(path, modname, version)
31
39
  filename = "#{modname}-#{version}.tar.gz"
32
40
  Dir.chdir(path) do
@@ -58,10 +66,10 @@ class Mvp
58
66
  # Build a table with this schema
59
67
  # module | version | source | kind | element | count
60
68
  def table(itemized, data)
61
- modname = data['name']
62
- slug = data['slug']
63
- version = data['version']
64
- dependencies = data['dependencies']
69
+ modname = data[:name]
70
+ slug = data[:slug]
71
+ version = data[:version]
72
+ dependencies = data[:dependencies]
65
73
 
66
74
  itemized.map do |kind, elements|
67
75
  # the kind of element comes pluralized from puppet-itemize
@@ -0,0 +1,171 @@
1
+ class Mvp
2
+ class PuppetfileParser
3
+ def initialize(options = {})
4
+ @sources = {}
5
+ @modules = []
6
+ @repo = nil
7
+ end
8
+
9
+ def suitable?
10
+ defined?(RubyVM::AbstractSyntaxTree)
11
+ end
12
+
13
+ def sources=(modules)
14
+ modules.each do |row|
15
+ next unless row[:source]
16
+ next if row[:source] == 'UNKNOWN'
17
+
18
+ @sources[canonical_git_repo(row[:source])] = row[:slug]
19
+ end
20
+ end
21
+
22
+ def parse(repo)
23
+ # This only works on Ruby 2.6+
24
+ return unless suitable?
25
+
26
+ begin
27
+ root = RubyVM::AbstractSyntaxTree.parse(repo[:content])
28
+ rescue SyntaxError => e
29
+ $logger.warn "Syntax error in #{repo[:repo_name]}/Puppetfile"
30
+ $logger.warn e.message
31
+ end
32
+
33
+ @repo = repo
34
+ @modules = []
35
+ traverse(root)
36
+ @modules.compact.map do |row|
37
+ row[:repo_name] = repo[:repo_name]
38
+ row[:md5] = repo[:md5]
39
+ row[:module] = canonical_name(row[:module], row[:source])
40
+ stringify(row)
41
+ end
42
+ end
43
+
44
+ def stringify(row)
45
+ row.each do |key, value|
46
+ if value.is_a? RubyVM::AbstractSyntaxTree::Node
47
+ row[key] = :'#<programmatically generated via ruby code>'
48
+ end
49
+ end
50
+ end
51
+
52
+ def canonical_name(name, repo)
53
+ return name if name.include?('-')
54
+ repo = canonical_git_repo(repo)
55
+
56
+ return @sources[repo] if @sources.include?(repo)
57
+ name
58
+ end
59
+
60
+ def canonical_git_repo(repo)
61
+ return unless repo
62
+ return unless repo.is_a? String
63
+ repo.sub(/^git@github.com\:/, 'github.com/')
64
+ .sub(/^(git|https?)\:\/\//, '')
65
+ .sub(/\.git$/, '')
66
+ end
67
+
68
+ def add_module(name, args)
69
+ unless name.is_a? String
70
+ $logger.warn "Non string module name in #{@repo[:repo_name]}/Puppetfile"
71
+ return nil
72
+ end
73
+ name.gsub!('/', '-')
74
+ case args
75
+ when String, Symbol, NilClass
76
+ @modules << {
77
+ :module => name,
78
+ :type => :forge,
79
+ :source => :forge,
80
+ :version => args,
81
+ }
82
+ when Hash
83
+ @modules << parse_args(name, args)
84
+ else
85
+ $logger.warn "#{@repo[:repo_name]}/Puppetfile: Unknown format: mod('#{name}', #{args.inspect})"
86
+ end
87
+ end
88
+
89
+ def parse_args(name, args)
90
+ data = {:module => name}
91
+
92
+ if args.include? :git
93
+ data[:type] = :git
94
+ data[:source] = args[:git]
95
+ data[:version] = args[:ref] || args[:tag] || args[:commit] || args[:branch] || :latest
96
+ elsif args.include? :svn
97
+ data[:type] = :svn
98
+ data[:source] = args[:svn]
99
+ data[:version] = args[:rev] || args[:revision] || :latest
100
+ elsif args.include? :boxen
101
+ data[:type] = :boxen
102
+ data[:source] = args[:repo]
103
+ data[:version] = args[:version] || :latest
104
+ else
105
+ $logger.warn "#{@repo[:repo_name]}/Puppetfile: Unknown args format: mod('#{name}', #{args.inspect})"
106
+ return nil
107
+ end
108
+
109
+ data
110
+ end
111
+
112
+ def traverse(node)
113
+ begin
114
+ if node.type == :FCALL
115
+ name = node.children.first
116
+ args = node.children.last.children.map do |item|
117
+ next if item.nil?
118
+
119
+ case item.type
120
+ when :HASH
121
+ Hash[*item.children.first.children.compact.map {|n| n.children.first }]
122
+ else
123
+ item.children.first
124
+ end
125
+ end.compact
126
+
127
+ case name
128
+ when :mod
129
+ add_module(args.shift, args.shift)
130
+ when :forge
131
+ # noop
132
+ when :moduledir
133
+ # noop
134
+ when :github
135
+ # oh boxen, you so silly.
136
+ # The order of the unpacking below *is* important.
137
+ modname = args.shift
138
+ version = args.shift
139
+ data = args.shift || {}
140
+
141
+ # this is gross but I'm not sure I actually care right now.
142
+ if (modname.is_a? String and [String, NilClass].include? version.class and data.is_a? Hash)
143
+ data[:boxen] = :boxen
144
+ data[:version] = version
145
+ add_module(modname, data)
146
+ else
147
+ $logger.warn "#{@repo[:repo_name]}/Puppetfile: malformed boxen"
148
+ end
149
+ else
150
+ # Should we record unexpected Ruby code or just log it to stdout?
151
+ args = args.map {|a| a.is_a?(String) ? "'#{a}'" : a}.join(', ')
152
+ $logger.warn "#{@repo[:repo_name]}/Puppetfile: Unexpected invocation of #{name}(#{args})"
153
+ end
154
+ end
155
+
156
+ node.children.each do |n|
157
+ next unless n.is_a? RubyVM::AbstractSyntaxTree::Node
158
+
159
+ traverse(n)
160
+ end
161
+ rescue => e
162
+ puts e.message
163
+ end
164
+ end
165
+
166
+ def test()
167
+ require 'pry'
168
+ binding.pry
169
+ end
170
+ end
171
+ end
data/lib/mvp/runner.rb CHANGED
@@ -1,6 +1,10 @@
1
- require 'mvp/downloader'
2
- require 'mvp/uploader'
1
+ require 'mvp/forge'
2
+ require 'mvp/bigquery'
3
3
  require 'mvp/stats'
4
+ require 'mvp/itemizer'
5
+ require 'mvp/puppetfile_parser'
6
+
7
+ require 'tty-spinner'
4
8
 
5
9
  class Mvp
6
10
  class Runner
@@ -11,40 +15,94 @@ class Mvp
11
15
  end
12
16
 
13
17
  def retrieve(target = :all, download = true)
14
- downloader = Mvp::Downloader.new(@options)
18
+ bigquery = Mvp::Bigquery.new(@options)
15
19
 
16
- [:authors, :modules, :releases].each do |thing|
17
- next unless [:all, thing].include? target
18
- downloader.retrieve(thing, download)
19
- end
20
+ begin
21
+ [:authors, :modules, :releases, :validations].each do |thing|
22
+ next unless [:all, thing].include? target
23
+ spinner = mkspinner("Retrieving #{thing} ...")
24
+ data = bigquery.retrieve(thing)
25
+ save_json(thing, data)
26
+ spinner.success('(OK)')
27
+ end
20
28
 
21
- if [:all, :validations].include? target
22
- downloader.validations()
29
+ rescue => e
30
+ spinner.error("API error: #{e.message}")
31
+ $logger.error "API error: #{e.message}"
32
+ $logger.debug e.backtrace.join("\n")
33
+ sleep 10
23
34
  end
24
35
  end
25
36
 
26
- def upload(target = :all)
27
- uploader = Mvp::Uploader.new(@options)
37
+ def mirror(target = :all)
38
+ forge = Mvp::Forge.new(@options)
39
+ bigquery = Mvp::Bigquery.new(@options)
40
+ itemizer = Mvp::Itemizer.new(@options)
41
+ pfparser = Mvp::PuppetfileParser.new(@options)
28
42
 
29
- [:authors, :modules, :releases, :validations, :github_mirrors].each do |thing|
30
- next unless [:all, thing].include? target
31
- uploader.send(thing)
32
- end
33
- end
43
+ begin
44
+ [:authors, :modules, :releases].each do |thing|
45
+ next unless [:all, thing].include? target
46
+ spinner = mkspinner("Mirroring #{thing}...")
47
+ bigquery.truncate(thing)
48
+ forge.retrieve(thing) do |data, offset|
49
+ spinner.update(title: "Mirroring #{thing} [#{offset}]...")
50
+ bigquery.insert(thing, data)
51
+ end
52
+ spinner.success('(OK)')
53
+ end
34
54
 
35
- def mirror(target = :all)
36
- downloader = Mvp::Downloader.new(@options)
37
- uploader = Mvp::Uploader.new(@options)
55
+ if [:all, :validations].include? target
56
+ spinner = mkspinner("Mirroring validations...")
57
+ modules = bigquery.get(:modules, [:slug])
58
+ bigquery.truncate(:validations)
59
+ forge.retrieve_validations(modules) do |data, offset|
60
+ spinner.update(title: "Mirroring validations [#{offset}]...")
61
+ bigquery.insert(:validations, data)
62
+ end
63
+ spinner.success('(OK)')
64
+ end
38
65
 
39
- # validations are downloaded with modules
40
- [:authors, :modules, :releases].each do |thing|
41
- next unless [:all, thing].include? target
42
- uploader.truncate(thing)
43
- downloader.mirror(thing, uploader)
44
- end
66
+ if [:all, :itemizations].include? target
67
+ spinner = mkspinner("Itemizing modules...")
68
+ bigquery.unitemized.each do |mod|
69
+ spinner.update(title: "Itemizing [#{mod[:slug]}]...")
70
+ rows = itemizer.itemized(mod)
71
+ bigquery.delete(:itemized, :module, mod[:slug])
72
+ bigquery.insert(:itemized, rows)
73
+ end
74
+ spinner.success('(OK)')
75
+ end
76
+
77
+ if [:all, :mirrors, :tables].include? target
78
+ @options[:gcloud][:mirrors].each do |entity|
79
+ spinner = mkspinner("Mirroring #{entity[:type]} #{entity[:name]} to BigQuery...")
80
+ bigquery.mirror_table(entity)
81
+ spinner.success('(OK)')
82
+ end
83
+ end
84
+
85
+ if [:all, :puppetfiles].include? target
86
+ spinner = mkspinner("Analyzing Puppetfile module references...")
87
+ if pfparser.suitable?
88
+ pfparser.sources = bigquery.module_sources
89
+ bigquery.puppetfiles.each do |repo|
90
+ spinner.update(title: "Analyzing [#{repo[:repo_name]}/Puppetfile]...")
91
+ rows = pfparser.parse(repo)
92
+ bigquery.delete(:puppetfile_usage, :repo_name, repo[:repo_name], :github)
93
+ bigquery.insert(:puppetfile_usage, rows, :github)
94
+ end
95
+ spinner.success('(OK)')
96
+ else
97
+ spinner.error("(Not functional on Ruby #{RUBY_VERSION})")
98
+ end
99
+ end
45
100
 
46
- if [:all, :mirrors].include? target
47
- uploader.github_mirrors()
101
+ rescue => e
102
+ spinner.error("API error: #{e.message}")
103
+ $logger.error "API error: #{e.message}"
104
+ $logger.debug e.backtrace.join("\n")
105
+ sleep 10
48
106
  end
49
107
  end
50
108
 
@@ -57,6 +115,17 @@ class Mvp
57
115
  end
58
116
  end
59
117
 
118
+ def mkspinner(title)
119
+ spinner = TTY::Spinner.new("[:spinner] :title")
120
+ spinner.update(title: title)
121
+ spinner.auto_spin
122
+ spinner
123
+ end
124
+
125
+ def save_json(thing, data)
126
+ File.write("#{@cachedir}/#{thing}.json", data.to_json)
127
+ end
128
+
60
129
  def test()
61
130
  require 'pry'
62
131
  binding.pry
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: puppet-community-mvp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Ford
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-31 00:00:00.000000000 Z
11
+ date: 2019-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -151,13 +151,14 @@ files:
151
151
  - LICENSE
152
152
  - README.md
153
153
  - bin/mvp
154
+ - bin/pftest.rb
154
155
  - lib/mvp.rb
155
- - lib/mvp/downloader.rb
156
+ - lib/mvp/bigquery.rb
157
+ - lib/mvp/forge.rb
156
158
  - lib/mvp/itemizer.rb
157
- - lib/mvp/monkeypatches.rb
159
+ - lib/mvp/puppetfile_parser.rb
158
160
  - lib/mvp/runner.rb
159
161
  - lib/mvp/stats.rb
160
- - lib/mvp/uploader.rb
161
162
  homepage:
162
163
  licenses:
163
164
  - Apache 2
@@ -1,8 +0,0 @@
1
- # BigQuery uses newline delimited json
2
- # https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON
3
-
4
- class Array
5
- def to_newline_delimited_json
6
- self.map(&:to_json).join("\n")
7
- end
8
- end