puppet-community-mvp 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e45ff6e06d776fb8c3129789b4f469f25f86ea59
4
- data.tar.gz: 640dd10a4620d77281ebb58ec4d097dc40b9bfbd
3
+ metadata.gz: 3f5ed8c2978cad58a0ce52346eb854b3b4e82a9a
4
+ data.tar.gz: a5637f505ed5fe8c74a22f9633723b9211ad3c4a
5
5
  SHA512:
6
- metadata.gz: 00fffd365248a810b1542eb7e4e370398330d469834a2e5cecab70f0fe370149d8bc87ac7027c9260c6a2d0a8f24a06915e5946bf5d84c5dd6c07a8e4ac95ac3
7
- data.tar.gz: ac1e153462e94226eb56621e687667f369aa0ca35f3d34dd6e3c9096e0d7dc56dcb1f4584f0b2072cda3f47a5c5f1b2b645f106d5f314ef45fdcc3d9f6af6a25
6
+ metadata.gz: 3ad31ba42a0e2f96ae8254bf383e523c74890fa219cb1b27946cae7718aba76b2d3532149dc98dfc1d59b5e47c4d31c70156c96203659425b55607c6a0d948d1
7
+ data.tar.gz: f8970bb1a709f807e3e5d2f45e2c74f7adbea034fc5d9293024e76f4dfb8b2e9eb59ecf636e38b12d69ce224867bbc96570f5e9ed1ddb5092783888e145ceec9
data/bin/mvp CHANGED
@@ -13,16 +13,14 @@ optparse = OptionParser.new { |opts|
13
13
  opts.banner = "Usage : #{NAME} [command] [target] [options]
14
14
 
15
15
  This tool will scrape the Puppet Forge API for interesting module & author stats.
16
- The following CLI commands are available.
16
+ It can also mirror public BigQuery tables or views into our dataset for efficiency,
17
+ or download and itemize each Forge module.
17
18
 
18
- * get | retrieve | download [target]
19
- * Downloads and caches all Forge metadata.
20
- * Optional targets: all, authors, modules, releases
21
- * upload | insert [target]
22
- * Uploads data to BigQuery
23
- * Optional targets: all, authors, modules, releases, mirrors
24
19
  * mirror [target]
25
20
  * Runs the download & then upload tasks.
21
+ * Optional targets: all, authors, modules, releases, validations, itemizations, puppetfiles, tables
22
+ * get | retrieve | download [target]
23
+ * Downloads and caches data locally so you can run the stats task.
26
24
  * Optional targets: all, authors, modules, releases
27
25
  * stats
28
26
  * Print out a summary of interesting stats.
@@ -64,6 +62,10 @@ The following CLI commands are available.
64
62
  options[:debug] = true
65
63
  end
66
64
 
65
+ opts.on("-n", "--noop", "Don't actually upload data.") do
66
+ options[:noop] = true
67
+ end
68
+
67
69
  opts.separator('')
68
70
 
69
71
  opts.on("-h", "--help", "Displays this help") do
@@ -100,14 +102,6 @@ when 'get', 'retrieve', 'download'
100
102
  target ||= :all
101
103
  runner.retrieve(target.to_sym)
102
104
 
103
- when 'transform'
104
- target ||= :all
105
- runner.retrieve(target.to_sym, false)
106
-
107
- when 'insert', 'upload'
108
- target ||= :all
109
- runner.upload(target.to_sym)
110
-
111
105
  when 'mirror'
112
106
  target ||= :all
113
107
  runner.mirror(target.to_sym)
data/bin/pftest.rb ADDED
@@ -0,0 +1,22 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'mvp/puppetfile_parser'
4
+ require 'open-uri'
5
+ require 'json'
6
+ require 'logger'
7
+
8
+ $logger = Logger::new(STDOUT)
9
+ $logger.level = Logger::INFO
10
+ $logger.formatter = proc { |severity,datetime,progname,msg| "#{severity}: #{msg}\n" }
11
+
12
+ pf = open(ARGV.first)
13
+ parser = Mvp::PuppetfileParser.new()
14
+
15
+
16
+ repo = {
17
+ :repo_name => 'testing',
18
+ :md5 => 'wakka wakka',
19
+ :content => pf.read,
20
+ }
21
+
22
+ puts JSON.pretty_generate(parser.parse(repo))
data/lib/mvp.rb CHANGED
@@ -1,4 +1,2 @@
1
1
  require 'mvp/runner'
2
- require 'mvp/downloader'
3
- require 'mvp/uploader'
4
- require 'mvp/stats'
2
+ require 'mvp/stats'
@@ -3,10 +3,10 @@ require 'tty-spinner'
3
3
  require "google/cloud/bigquery"
4
4
 
5
5
  class Mvp
6
- class Uploader
6
+ class Bigquery
7
7
  def initialize(options = {})
8
+ @options = options
8
9
  @cachedir = options[:cachedir]
9
- @mirrors = options[:gcloud][:mirrors]
10
10
  @bigquery = Google::Cloud::Bigquery.new(
11
11
  :project_id => options[:gcloud][:project],
12
12
  :credentials => Google::Cloud::Bigquery::Credentials.new(options[:gcloud][:keyfile]),
@@ -27,9 +27,24 @@ class Mvp
27
27
  s.integer "count", mode: :required
28
28
  end
29
29
  end
30
+
31
+ @puppetfile_usage = @dataset.table('github_puppetfile_usage') || @dataset.create_table('github_puppetfile_usage') do |table|
32
+ table.name = 'Puppetfile Module Usage'
33
+ table.description = 'A list of all modules referenced in public Puppetfiles'
34
+ table.schema do |s|
35
+ s.string "repo_name", mode: :required
36
+ s.string "module", mode: :required
37
+ s.string "type", mode: :required
38
+ s.string "source"
39
+ s.string "version"
40
+ s.string "md5", mode: :required
41
+ end
42
+ end
30
43
  end
31
44
 
32
45
  def truncate(entity)
46
+ return if @options[:noop]
47
+
33
48
  begin
34
49
  case entity
35
50
  when :authors
@@ -163,95 +178,85 @@ class Mvp
163
178
  end
164
179
  end
165
180
 
166
- def authors()
167
- upload('authors')
168
- end
169
-
170
- def modules()
171
- upload('modules')
181
+ def retrieve(entity)
182
+ get(entity, ['*'])
172
183
  end
173
184
 
174
- def releases()
175
- upload('releases')
176
- end
185
+ def mirror_table(entity)
186
+ return if @options[:noop]
177
187
 
178
- def validations()
179
- upload('validations')
180
- end
181
-
182
- def github_mirrors()
183
- @mirrors.each do |entity|
184
- begin
185
- spinner = TTY::Spinner.new("[:spinner] :title")
186
- spinner.update(title: "Mirroring #{entity[:type]} #{entity[:name]} to BigQuery...")
187
- spinner.auto_spin
188
-
189
- case entity[:type]
190
- when :view
191
- @dataset.table(entity[:name]).delete rescue nil # delete if exists
192
- @dataset.create_view(entity[:name], entity[:query],
193
- :legacy_sql => true)
194
-
195
- when :table
196
- job = @dataset.query_job(entity[:query],
197
- :legacy_sql => true,
198
- :write => 'truncate',
199
- :table => @dataset.table(entity[:name], :skip_lookup => true))
200
- job.wait_until_done!
188
+ begin
189
+ case entity[:type]
190
+ when :view
191
+ @dataset.table(entity[:name]).delete rescue nil # delete if exists
192
+ @dataset.create_view(entity[:name], entity[:query])
201
193
 
202
- else
203
- $logger.error "Unknown mirror type: #{entity[:type]}"
204
- end
194
+ when :table
195
+ job = @dataset.query_job(entity[:query],
196
+ :write => 'truncate',
197
+ :table => @dataset.table(entity[:name], :skip_lookup => true))
198
+ job.wait_until_done!
205
199
 
206
- spinner.success('(OK)')
207
- rescue => e
208
- spinner.error("(Google Cloud error: #{e.message})")
209
- $logger.error e.backtrace.join("\n")
200
+ else
201
+ $logger.error "Unknown mirror type: #{entity[:type]}"
210
202
  end
203
+ rescue => e
204
+ $logger.error("(Google Cloud error: #{e.message})")
205
+ $logger.debug e.backtrace.join("\n")
211
206
  end
212
207
  end
213
208
 
214
- def insert(entity, data)
215
- table = @dataset.table("forge_#{entity}")
209
+ def insert(entity, data, suite = 'forge')
210
+ return if @options[:noop]
211
+ return if data.empty?
212
+
213
+ table = @dataset.table("#{suite}_#{entity}")
216
214
  response = table.insert(data)
217
215
 
218
216
  unless response.success?
219
- errors = {}
220
217
  response.insert_errors.each do |err|
221
- errors[err.row['slug']] = err.errors
218
+ $logger.error JSON.pretty_generate(err.row)
219
+ $logger.error JSON.pretty_generate(err.errors)
222
220
  end
223
- $logger.error JSON.pretty_generate(errors)
224
221
  end
225
222
  end
226
223
 
227
- def upload(entity)
228
- begin
229
- spinner = TTY::Spinner.new("[:spinner] :title")
230
- spinner.update(title: "Uploading #{entity} to BigQuery ...")
231
- spinner.auto_spin
224
+ def delete(entity, field, match, suite = 'forge')
225
+ @dataset.query("DELETE FROM #{suite}_#{entity} WHERE #{field} = '#{match}'")
226
+ end
232
227
 
233
- @dataset.load("forge_#{entity}", "#{@cachedir}/nld_#{entity}.json",
234
- :write => 'truncate',
235
- :autodetect => true)
228
+ def get(entity, fields, suite = 'forge')
229
+ raise 'pass fields as an array' unless fields.is_a? Array
230
+ @dataset.query("SELECT #{fields.join(', ')} FROM #{suite}_#{entity}")
231
+ end
236
232
 
237
- # table = @dataset.table("forge_#{entity}")
238
- # File.readlines("#{@cachedir}/nld_#{entity}.json").each do |line|
239
- # data = JSON.parse(line)
240
- #
241
- # begin
242
- # table.insert data
243
- # rescue
244
- # require 'pry'
245
- # binding.pry
246
- # end
247
- # end
233
+ def module_sources()
234
+ get('modules', ['slug', 'source'])
235
+ end
248
236
 
237
+ def puppetfiles()
238
+ sql = 'SELECT f.repo_name, f.path, c.content, c.md5
239
+ FROM github_puppetfile_files AS f
240
+ JOIN github_puppetfile_contents AS c
241
+ ON c.id = f.id
249
242
 
250
- spinner.success('(OK)')
251
- rescue => e
252
- spinner.error("(Google Cloud error: #{e.message})")
253
- $logger.error e.backtrace.join("\n")
254
- end
243
+ WHERE c.md5 NOT IN (
244
+ SELECT u.md5
245
+ FROM github_puppetfile_usage AS u
246
+ WHERE u.repo_name = f.repo_name
247
+ ) AND LOWER(repo_name) NOT LIKE "%boxen%"'
248
+ @dataset.query(sql)
249
+ end
250
+
251
+ def unitemized()
252
+ sql = 'SELECT m.name, m.slug, m.version, m.dependencies
253
+ FROM forge_modules AS m
254
+ WHERE m.version NOT IN (
255
+ SELECT i.version
256
+ FROM forge_itemized AS i
257
+ WHERE module = m.slug
258
+ )'
259
+ @dataset.query(sql)
255
260
  end
256
261
 
257
262
  def version_itemized?(mod, version)
@@ -2,151 +2,82 @@ require 'json'
2
2
  require 'httparty'
3
3
  require 'tty-spinner'
4
4
  require 'semantic_puppet'
5
- require 'mvp/monkeypatches'
6
- require 'mvp/itemizer'
7
5
 
8
6
  class Mvp
9
- class Downloader
7
+ class Forge
10
8
  def initialize(options = {})
11
9
  @useragent = 'Puppet Community Stats Monitor'
12
- @cachedir = options[:cachedir]
13
10
  @forgeapi = options[:forgeapi] ||'https://forgeapi.puppet.com'
14
- @itemizer = Mvp::Itemizer.new(options)
15
11
  end
16
12
 
17
- def mirror(entity, uploader)
18
- # using authors for git repo terminology consistency
19
- item = (entity == :authors) ? 'users' : entity.to_s
20
- download(item) do |data|
21
- case entity
22
- when :modules
23
- uploader.insert(:validations, flatten_validations(retrieve_validations(data)))
24
- data = flatten_modules(data)
25
-
26
- @itemizer.run!(data, uploader)
27
- when :releases
28
- data = flatten_releases(data)
29
- end
30
-
31
- uploader.insert(entity, data)
32
- end
33
- end
34
-
35
- def retrieve(entity, download = true)
36
- if download
37
- # I am focusing on authorship rather than just users, so for now I'm using the word authors
38
- item = (entity == :authors) ? 'users' : entity.to_s
39
- data = []
40
- download(item) do |resp|
41
- data.concat resp
42
- end
43
- save_json(entity, data)
44
- else
45
- data = File.read("#{@cachedir}/#{entity}.json")
46
- end
47
-
48
- case entity
49
- when :modules
50
- data = flatten_modules(data)
51
- when :releases
52
- data = flatten_releases(data)
53
- end
54
- save_nld_json(entity.to_s, data)
55
- end
56
-
57
- def retrieve_validations(modules, period = 25)
58
- results = {}
13
+ def retrieve(entity)
14
+ raise 'Please process downloaded data by passing a block' unless block_given?
59
15
 
16
+ # using authors for git repo terminology consistency
17
+ entity = :users if entity == :authors
60
18
  begin
61
19
  offset = 0
62
- endpoint = "/private/validations/"
63
- modules.each do |mod|
64
- name = "#{mod['owner']['username']}-#{mod['name']}"
65
- response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
20
+ endpoint = "/v3/#{entity}?sort_by=downloads&limit=50"
21
+
22
+ while endpoint do
23
+ response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
66
24
  raise "Forge Error: #{@response.body}" unless response.code == 200
25
+ data = JSON.parse(response.body)
26
+ results = munge_dates(data['results'])
27
+
28
+ case entity
29
+ when :modules
30
+ results = flatten_modules(results)
31
+ when :releases
32
+ results = flatten_releases(results)
33
+ end
67
34
 
68
- results[name] = JSON.parse(response.body)
69
- offset += 1
35
+ yield results, offset
70
36
 
71
- if block_given? and (offset % period == 0)
72
- yield offset
37
+ offset += 50
38
+ endpoint = data['pagination']['next']
39
+ if (endpoint and (offset % 250 == 0))
73
40
  GC.start
74
41
  end
75
42
  end
43
+
76
44
  rescue => e
77
45
  $logger.error e.message
78
46
  $logger.debug e.backtrace.join("\n")
79
47
  end
80
48
 
81
- results
49
+ nil
82
50
  end
83
51
 
84
- def validations()
85
- cache = "#{@cachedir}/modules.json"
86
-
87
- if File.exist? cache
88
- module_data = JSON.parse(File.read(cache))
89
- else
90
- module_data = retrieve(:modules)
91
- end
52
+ def retrieve_validations(modules, period = 25)
53
+ raise 'Please process validations by passing a block' unless block_given?
92
54
 
55
+ offset = 0
93
56
  begin
94
- spinner = TTY::Spinner.new("[:spinner] :title")
95
- spinner.update(title: "Downloading module validations ...")
96
- spinner.auto_spin
57
+ modules.each_slice(period) do |group|
58
+ offset += period
59
+ results = group.map { |mod| validations(mod[:slug]) }
97
60
 
98
- results = retrieve_validations(module_data) do |offset|
99
- spinner.update(title: "Downloading module validations [#{offset}]...")
61
+ yield results, offset
62
+ GC.start
100
63
  end
101
-
102
- spinner.success('(OK)')
103
64
  rescue => e
104
- spinner.error('API error')
105
65
  $logger.error e.message
106
66
  $logger.debug e.backtrace.join("\n")
107
67
  end
108
68
 
109
- save_json('validations', results)
110
- save_nld_json('validations', flatten_validations(results))
111
- results
69
+ nil
112
70
  end
113
71
 
114
- def download(entity)
115
- raise 'Please process downloaded data by passing a block' unless block_given?
72
+ def validations(name)
73
+ endpoint = "/private/validations/"
74
+ response = HTTParty.get("#{@forgeapi}#{endpoint}#{name}", headers: {'User-Agent' => @useragent})
75
+ raise "Forge Error: #{@response.body}" unless response.code == 200
116
76
 
117
- begin
118
- offset = 0
119
- endpoint = "/v3/#{entity}?sort_by=downloads&limit=50"
120
- spinner = TTY::Spinner.new("[:spinner] :title")
121
- spinner.update(title: "Downloading #{entity} ...")
122
- spinner.auto_spin
123
-
124
- while endpoint do
125
- response = HTTParty.get("#{@forgeapi}#{endpoint}", headers: {"User-Agent" => @useragent})
126
- raise "Forge Error: #{@response.body}" unless response.code == 200
127
- data = JSON.parse(response.body)
128
-
129
- offset += 50
130
- endpoint = data['pagination']['next']
131
-
132
- yield munge_dates(data['results'])
133
-
134
- if (endpoint and (offset % 250 == 0))
135
- spinner.update(title: "Downloading #{entity} [#{offset}]...")
136
- GC.start
137
- end
138
- end
139
-
140
- spinner.success('(OK)')
141
- rescue => e
142
- spinner.error('API error')
143
- $logger.error e.message
144
- $logger.debug e.backtrace.join("\n")
145
- end
146
-
147
- nil
77
+ flatten_validations(name, JSON.parse(response.body))
148
78
  end
149
79
 
80
+
150
81
  # transform dates into a format that bigquery knows
151
82
  def munge_dates(object)
152
83
  ["created_at", "updated_at", "deprecated_at", "deleted_at"].each do |field|
@@ -160,16 +91,6 @@ class Mvp
160
91
  object
161
92
  end
162
93
 
163
- def save_json(thing, data)
164
- File.write("#{@cachedir}/#{thing}.json", data.to_json)
165
- end
166
-
167
- # store data in a way that bigquery can grok
168
- # uploading files is far easier than streaming data, when replacing a dataset
169
- def save_nld_json(thing, data)
170
- File.write("#{@cachedir}/nld_#{thing}.json", data.to_newline_delimited_json)
171
- end
172
-
173
94
  def flatten_modules(data)
174
95
  data.each do |row|
175
96
  row['owner'] = row['owner']['username']
@@ -209,14 +130,12 @@ class Mvp
209
130
  data
210
131
  end
211
132
 
212
- def flatten_validations(data)
213
- data.map do |name, scores|
214
- row = { 'name' => name }
215
- scores.each do |entry|
216
- row[entry['name']] = entry['score']
217
- end
218
- row
133
+ def flatten_validations(name, scores)
134
+ row = { 'name' => name }
135
+ scores.each do |entry|
136
+ row[entry['name']] = entry['score']
219
137
  end
138
+ row
220
139
  end
221
140
 
222
141
  def simplify_metadata(data, metadata)
data/lib/mvp/itemizer.rb CHANGED
@@ -27,6 +27,14 @@ class Mvp
27
27
  end
28
28
  end
29
29
 
30
+ def itemized(mod)
31
+ modname = mod[:slug]
32
+ version = mod[:version]
33
+ baserow = { :module => modname, :version => version, :kind => 'admin', :element => 'version', :count => 0}
34
+
35
+ table(itemize(modname, version), mod) << baserow
36
+ end
37
+
30
38
  def download(path, modname, version)
31
39
  filename = "#{modname}-#{version}.tar.gz"
32
40
  Dir.chdir(path) do
@@ -58,10 +66,10 @@ class Mvp
58
66
  # Build a table with this schema
59
67
  # module | version | source | kind | element | count
60
68
  def table(itemized, data)
61
- modname = data['name']
62
- slug = data['slug']
63
- version = data['version']
64
- dependencies = data['dependencies']
69
+ modname = data[:name]
70
+ slug = data[:slug]
71
+ version = data[:version]
72
+ dependencies = data[:dependencies]
65
73
 
66
74
  itemized.map do |kind, elements|
67
75
  # the kind of element comes pluralized from puppet-itemize
@@ -0,0 +1,171 @@
1
+ class Mvp
2
+ class PuppetfileParser
3
+ def initialize(options = {})
4
+ @sources = {}
5
+ @modules = []
6
+ @repo = nil
7
+ end
8
+
9
+ def suitable?
10
+ defined?(RubyVM::AbstractSyntaxTree)
11
+ end
12
+
13
+ def sources=(modules)
14
+ modules.each do |row|
15
+ next unless row[:source]
16
+ next if row[:source] == 'UNKNOWN'
17
+
18
+ @sources[canonical_git_repo(row[:source])] = row[:slug]
19
+ end
20
+ end
21
+
22
+ def parse(repo)
23
+ # This only works on Ruby 2.6+
24
+ return unless suitable?
25
+
26
+ begin
27
+ root = RubyVM::AbstractSyntaxTree.parse(repo[:content])
28
+ rescue SyntaxError => e
29
+ $logger.warn "Syntax error in #{repo[:repo_name]}/Puppetfile"
30
+ $logger.warn e.message
31
+ end
32
+
33
+ @repo = repo
34
+ @modules = []
35
+ traverse(root)
36
+ @modules.compact.map do |row|
37
+ row[:repo_name] = repo[:repo_name]
38
+ row[:md5] = repo[:md5]
39
+ row[:module] = canonical_name(row[:module], row[:source])
40
+ stringify(row)
41
+ end
42
+ end
43
+
44
+ def stringify(row)
45
+ row.each do |key, value|
46
+ if value.is_a? RubyVM::AbstractSyntaxTree::Node
47
+ row[key] = :'#<programmatically generated via ruby code>'
48
+ end
49
+ end
50
+ end
51
+
52
+ def canonical_name(name, repo)
53
+ return name if name.include?('-')
54
+ repo = canonical_git_repo(repo)
55
+
56
+ return @sources[repo] if @sources.include?(repo)
57
+ name
58
+ end
59
+
60
+ def canonical_git_repo(repo)
61
+ return unless repo
62
+ return unless repo.is_a? String
63
+ repo.sub(/^git@github.com\:/, 'github.com/')
64
+ .sub(/^(git|https?)\:\/\//, '')
65
+ .sub(/\.git$/, '')
66
+ end
67
+
68
+ def add_module(name, args)
69
+ unless name.is_a? String
70
+ $logger.warn "Non string module name in #{@repo[:repo_name]}/Puppetfile"
71
+ return nil
72
+ end
73
+ name.gsub!('/', '-')
74
+ case args
75
+ when String, Symbol, NilClass
76
+ @modules << {
77
+ :module => name,
78
+ :type => :forge,
79
+ :source => :forge,
80
+ :version => args,
81
+ }
82
+ when Hash
83
+ @modules << parse_args(name, args)
84
+ else
85
+ $logger.warn "#{@repo[:repo_name]}/Puppetfile: Unknown format: mod('#{name}', #{args.inspect})"
86
+ end
87
+ end
88
+
89
+ def parse_args(name, args)
90
+ data = {:module => name}
91
+
92
+ if args.include? :git
93
+ data[:type] = :git
94
+ data[:source] = args[:git]
95
+ data[:version] = args[:ref] || args[:tag] || args[:commit] || args[:branch] || :latest
96
+ elsif args.include? :svn
97
+ data[:type] = :svn
98
+ data[:source] = args[:svn]
99
+ data[:version] = args[:rev] || args[:revision] || :latest
100
+ elsif args.include? :boxen
101
+ data[:type] = :boxen
102
+ data[:source] = args[:repo]
103
+ data[:version] = args[:version] || :latest
104
+ else
105
+ $logger.warn "#{@repo[:repo_name]}/Puppetfile: Unknown args format: mod('#{name}', #{args.inspect})"
106
+ return nil
107
+ end
108
+
109
+ data
110
+ end
111
+
112
+ def traverse(node)
113
+ begin
114
+ if node.type == :FCALL
115
+ name = node.children.first
116
+ args = node.children.last.children.map do |item|
117
+ next if item.nil?
118
+
119
+ case item.type
120
+ when :HASH
121
+ Hash[*item.children.first.children.compact.map {|n| n.children.first }]
122
+ else
123
+ item.children.first
124
+ end
125
+ end.compact
126
+
127
+ case name
128
+ when :mod
129
+ add_module(args.shift, args.shift)
130
+ when :forge
131
+ # noop
132
+ when :moduledir
133
+ # noop
134
+ when :github
135
+ # oh boxen, you so silly.
136
+ # The order of the unpacking below *is* important.
137
+ modname = args.shift
138
+ version = args.shift
139
+ data = args.shift || {}
140
+
141
+ # this is gross but I'm not sure I actually care right now.
142
+ if (modname.is_a? String and [String, NilClass].include? version.class and data.is_a? Hash)
143
+ data[:boxen] = :boxen
144
+ data[:version] = version
145
+ add_module(modname, data)
146
+ else
147
+ $logger.warn "#{@repo[:repo_name]}/Puppetfile: malformed boxen"
148
+ end
149
+ else
150
+ # Should we record unexpected Ruby code or just log it to stdout?
151
+ args = args.map {|a| a.is_a?(String) ? "'#{a}'" : a}.join(', ')
152
+ $logger.warn "#{@repo[:repo_name]}/Puppetfile: Unexpected invocation of #{name}(#{args})"
153
+ end
154
+ end
155
+
156
+ node.children.each do |n|
157
+ next unless n.is_a? RubyVM::AbstractSyntaxTree::Node
158
+
159
+ traverse(n)
160
+ end
161
+ rescue => e
162
+ puts e.message
163
+ end
164
+ end
165
+
166
+ def test()
167
+ require 'pry'
168
+ binding.pry
169
+ end
170
+ end
171
+ end
data/lib/mvp/runner.rb CHANGED
@@ -1,6 +1,10 @@
1
- require 'mvp/downloader'
2
- require 'mvp/uploader'
1
+ require 'mvp/forge'
2
+ require 'mvp/bigquery'
3
3
  require 'mvp/stats'
4
+ require 'mvp/itemizer'
5
+ require 'mvp/puppetfile_parser'
6
+
7
+ require 'tty-spinner'
4
8
 
5
9
  class Mvp
6
10
  class Runner
@@ -11,40 +15,94 @@ class Mvp
11
15
  end
12
16
 
13
17
  def retrieve(target = :all, download = true)
14
- downloader = Mvp::Downloader.new(@options)
18
+ bigquery = Mvp::Bigquery.new(@options)
15
19
 
16
- [:authors, :modules, :releases].each do |thing|
17
- next unless [:all, thing].include? target
18
- downloader.retrieve(thing, download)
19
- end
20
+ begin
21
+ [:authors, :modules, :releases, :validations].each do |thing|
22
+ next unless [:all, thing].include? target
23
+ spinner = mkspinner("Retrieving #{thing} ...")
24
+ data = bigquery.retrieve(thing)
25
+ save_json(thing, data)
26
+ spinner.success('(OK)')
27
+ end
20
28
 
21
- if [:all, :validations].include? target
22
- downloader.validations()
29
+ rescue => e
30
+ spinner.error("API error: #{e.message}")
31
+ $logger.error "API error: #{e.message}"
32
+ $logger.debug e.backtrace.join("\n")
33
+ sleep 10
23
34
  end
24
35
  end
25
36
 
26
- def upload(target = :all)
27
- uploader = Mvp::Uploader.new(@options)
37
+ def mirror(target = :all)
38
+ forge = Mvp::Forge.new(@options)
39
+ bigquery = Mvp::Bigquery.new(@options)
40
+ itemizer = Mvp::Itemizer.new(@options)
41
+ pfparser = Mvp::PuppetfileParser.new(@options)
28
42
 
29
- [:authors, :modules, :releases, :validations, :github_mirrors].each do |thing|
30
- next unless [:all, thing].include? target
31
- uploader.send(thing)
32
- end
33
- end
43
+ begin
44
+ [:authors, :modules, :releases].each do |thing|
45
+ next unless [:all, thing].include? target
46
+ spinner = mkspinner("Mirroring #{thing}...")
47
+ bigquery.truncate(thing)
48
+ forge.retrieve(thing) do |data, offset|
49
+ spinner.update(title: "Mirroring #{thing} [#{offset}]...")
50
+ bigquery.insert(thing, data)
51
+ end
52
+ spinner.success('(OK)')
53
+ end
34
54
 
35
- def mirror(target = :all)
36
- downloader = Mvp::Downloader.new(@options)
37
- uploader = Mvp::Uploader.new(@options)
55
+ if [:all, :validations].include? target
56
+ spinner = mkspinner("Mirroring validations...")
57
+ modules = bigquery.get(:modules, [:slug])
58
+ bigquery.truncate(:validations)
59
+ forge.retrieve_validations(modules) do |data, offset|
60
+ spinner.update(title: "Mirroring validations [#{offset}]...")
61
+ bigquery.insert(:validations, data)
62
+ end
63
+ spinner.success('(OK)')
64
+ end
38
65
 
39
- # validations are downloaded with modules
40
- [:authors, :modules, :releases].each do |thing|
41
- next unless [:all, thing].include? target
42
- uploader.truncate(thing)
43
- downloader.mirror(thing, uploader)
44
- end
66
+ if [:all, :itemizations].include? target
67
+ spinner = mkspinner("Itemizing modules...")
68
+ bigquery.unitemized.each do |mod|
69
+ spinner.update(title: "Itemizing [#{mod[:slug]}]...")
70
+ rows = itemizer.itemized(mod)
71
+ bigquery.delete(:itemized, :module, mod[:slug])
72
+ bigquery.insert(:itemized, rows)
73
+ end
74
+ spinner.success('(OK)')
75
+ end
76
+
77
+ if [:all, :mirrors, :tables].include? target
78
+ @options[:gcloud][:mirrors].each do |entity|
79
+ spinner = mkspinner("Mirroring #{entity[:type]} #{entity[:name]} to BigQuery...")
80
+ bigquery.mirror_table(entity)
81
+ spinner.success('(OK)')
82
+ end
83
+ end
84
+
85
+ if [:all, :puppetfiles].include? target
86
+ spinner = mkspinner("Analyzing Puppetfile module references...")
87
+ if pfparser.suitable?
88
+ pfparser.sources = bigquery.module_sources
89
+ bigquery.puppetfiles.each do |repo|
90
+ spinner.update(title: "Analyzing [#{repo[:repo_name]}/Puppetfile]...")
91
+ rows = pfparser.parse(repo)
92
+ bigquery.delete(:puppetfile_usage, :repo_name, repo[:repo_name], :github)
93
+ bigquery.insert(:puppetfile_usage, rows, :github)
94
+ end
95
+ spinner.success('(OK)')
96
+ else
97
+ spinner.error("(Not functional on Ruby #{RUBY_VERSION})")
98
+ end
99
+ end
45
100
 
46
- if [:all, :mirrors].include? target
47
- uploader.github_mirrors()
101
+ rescue => e
102
+ spinner.error("API error: #{e.message}")
103
+ $logger.error "API error: #{e.message}"
104
+ $logger.debug e.backtrace.join("\n")
105
+ sleep 10
48
106
  end
49
107
  end
50
108
 
@@ -57,6 +115,17 @@ class Mvp
57
115
  end
58
116
  end
59
117
 
118
+ def mkspinner(title)
119
+ spinner = TTY::Spinner.new("[:spinner] :title")
120
+ spinner.update(title: title)
121
+ spinner.auto_spin
122
+ spinner
123
+ end
124
+
125
+ def save_json(thing, data)
126
+ File.write("#{@cachedir}/#{thing}.json", data.to_json)
127
+ end
128
+
60
129
  def test()
61
130
  require 'pry'
62
131
  binding.pry
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: puppet-community-mvp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Ford
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-31 00:00:00.000000000 Z
11
+ date: 2019-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -151,13 +151,14 @@ files:
151
151
  - LICENSE
152
152
  - README.md
153
153
  - bin/mvp
154
+ - bin/pftest.rb
154
155
  - lib/mvp.rb
155
- - lib/mvp/downloader.rb
156
+ - lib/mvp/bigquery.rb
157
+ - lib/mvp/forge.rb
156
158
  - lib/mvp/itemizer.rb
157
- - lib/mvp/monkeypatches.rb
159
+ - lib/mvp/puppetfile_parser.rb
158
160
  - lib/mvp/runner.rb
159
161
  - lib/mvp/stats.rb
160
- - lib/mvp/uploader.rb
161
162
  homepage:
162
163
  licenses:
163
164
  - Apache 2
@@ -1,8 +0,0 @@
1
- # BigQuery uses newline delimited json
2
- # https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON
3
-
4
- class Array
5
- def to_newline_delimited_json
6
- self.map(&:to_json).join("\n")
7
- end
8
- end