dropsonde 0.0.2 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +20 -7
- data/bin/dropsonde +60 -17
- data/lib/dropsonde/cache.rb +42 -31
- data/lib/dropsonde/metrics/dependencies.rb +46 -25
- data/lib/dropsonde/metrics/environments.rb +53 -0
- data/lib/dropsonde/metrics/modules.rb +86 -53
- data/lib/dropsonde/metrics/platforms.rb +126 -0
- data/lib/dropsonde/metrics/puppetfiles.rb +44 -25
- data/lib/dropsonde/metrics.rb +116 -76
- data/lib/dropsonde/monkeypatches.rb +12 -11
- data/lib/dropsonde/version.rb +3 -1
- data/lib/dropsonde.rb +75 -24
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4a87b2c8abe75d07a73a52a48041c2734bb7914a54df3471d696458f3f0cb05
|
4
|
+
data.tar.gz: cd2b7d3a36aa9aa386997ab21f7ffee57387819b08a79407b98c5bfdbbcd619d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a033ee5a796d3dab8c46d7562848255ca66552671f31f58ffde8a5a00f98dbcf1e9485a8279529b7c78094f96add8e932fa401e634ccfc067f5c9b72eed93c3
|
7
|
+
data.tar.gz: b86bc1f854fe414a388534ced35132d0744ac21001298d3eefce326afa97a63bd56ce9b250458889690831e7b0343963666377eaf10c25c38861b50e1160225d
|
data/README.md
CHANGED
@@ -43,10 +43,10 @@ schema so the system cannot gather any data that's not described in the schema.
|
|
43
43
|
See the full schema of all enabled plugins by running the command:
|
44
44
|
|
45
45
|
```
|
46
|
-
$ dropsonde schema
|
46
|
+
$ dropsonde dev schema
|
47
47
|
```
|
48
48
|
|
49
|
-
All information in the report is keyed off a non-reversible SHA512
|
49
|
+
All information in the report is keyed off a non-reversible SHA512 hashed site-id
|
50
50
|
to make it unidentifiable; this report cannot be linked back to you or to your
|
51
51
|
infrastructure. Now that said, we know that the more bits of data shared about a
|
52
52
|
specific site, the easier it is to fingerprint that site. See
|
@@ -63,7 +63,9 @@ For example, this aggregated data might include records that show a count of how
|
|
63
63
|
many sites are using various combinations of modules together, but it will never
|
64
64
|
include a record showing the full list of modules that any single site is using.
|
65
65
|
|
66
|
-
|
66
|
+
![Data aggregation workflow](./aggregation.png)
|
67
|
+
|
68
|
+
With your own Google Cloud account, you can use that [dataset](https://console.cloud.google.com/bigquery?p=dataops-puppet-public-data&d=community&t=forge_modules&page=table)
|
67
69
|
in your own tooling and you can see/contribute to the aggregation queries in its
|
68
70
|
own [repository](https://github.com/puppetlabs/dropsonde-aggregation).
|
69
71
|
|
@@ -85,7 +87,9 @@ possible: [privacy@puppet.com](mailto:privacy@puppet.com)
|
|
85
87
|
|
86
88
|
## Installation
|
87
89
|
|
88
|
-
This is distributed as a Ruby gem. Simply `gem install dropsonde
|
90
|
+
This is distributed as a Ruby gem. Simply `gem install dropsonde`. There's a
|
91
|
+
[Puppet module](https://github.com/puppetlabs/puppetlabs-dropsonde) to manage it
|
92
|
+
if that's more your thing.
|
89
93
|
|
90
94
|
|
91
95
|
## Configuration
|
@@ -98,7 +102,7 @@ will not report the `:puppetfiles` metrics.
|
|
98
102
|
``` yaml
|
99
103
|
---
|
100
104
|
:update: false
|
101
|
-
:
|
105
|
+
:disable:
|
102
106
|
- puppetfiles
|
103
107
|
```
|
104
108
|
|
@@ -112,8 +116,6 @@ Run `dropsonde --help` to see usage information.
|
|
112
116
|
* `preview`
|
113
117
|
* Generate and print out an example telemetry report in human readable form
|
114
118
|
* Annotated with descriptions of each plugin and each metric gathered.
|
115
|
-
* `schema`
|
116
|
-
* Generate and print out the complete combined schema.
|
117
119
|
* `list`
|
118
120
|
* See a quick list of the available metrics and what they do.
|
119
121
|
* `submit`
|
@@ -123,6 +125,17 @@ Run `dropsonde --help` to see usage information.
|
|
123
125
|
* Once a week, the list of public modules on the Forge will be updated. This
|
124
126
|
command will manually force that cache update to happen.
|
125
127
|
|
128
|
+
Developer comands
|
129
|
+
|
130
|
+
* `dev example`
|
131
|
+
* To make writing aggregation queries possible without access to the private
|
132
|
+
database, this will generate a randomized example of the dataset. This is
|
133
|
+
in JSONL format, so it can be imported directly into BigQuery.
|
134
|
+
* `dev schema`
|
135
|
+
* Generate and print out the complete combined schema of all metrics.
|
136
|
+
* `dev shell`
|
137
|
+
* Open up a Pry shell with all the relevant connections open and initialized.
|
138
|
+
|
126
139
|
|
127
140
|
## Architecture
|
128
141
|
|
data/bin/dropsonde
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'gli'
|
3
3
|
require 'dropsonde'
|
4
|
-
require 'puppet'
|
5
4
|
|
6
5
|
class Dropsonde
|
7
6
|
extend GLI::App
|
8
7
|
|
9
|
-
|
8
|
+
@cache = nil
|
9
|
+
@puppetdb_session = Dropsonde.new
|
10
10
|
|
11
11
|
program_desc 'A simple telemetry tool for Puppet infrastructures'
|
12
|
-
config_file
|
12
|
+
config_file "#{File.dirname(Puppet.settings[:confdir])}/telemetry.yaml"
|
13
13
|
version Dropsonde::VERSION
|
14
14
|
|
15
15
|
desc 'Verbose logging'
|
@@ -25,27 +25,26 @@ class Dropsonde
|
|
25
25
|
flag [:ttl], :default_value => 7, :type => Integer
|
26
26
|
|
27
27
|
desc 'List of metrics to omit'
|
28
|
-
flag [:
|
28
|
+
flag [:disable, :d], :type => Array
|
29
|
+
|
30
|
+
desc 'Only load these metrics'
|
31
|
+
flag [:enable, :e], :type => Array
|
29
32
|
|
30
33
|
desc 'Any number or string used to generate the randomized site ID.'
|
31
34
|
flag [:seed]
|
32
35
|
|
36
|
+
desc 'Static site ID'
|
37
|
+
flag [:siteid]
|
38
|
+
|
33
39
|
pre do |global, command, options, args|
|
34
40
|
Dropsonde.settings = global
|
35
|
-
Dropsonde::Cache.
|
41
|
+
@cache = Dropsonde::Cache.new(global[:cachepath], global[:ttl], global[:update])
|
36
42
|
end
|
37
43
|
|
38
44
|
desc 'Manually update the Forge module name cache'
|
39
45
|
command :update do |c|
|
40
46
|
c.action do |global, options, args|
|
41
|
-
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
desc 'Generate a complete schema set'
|
46
|
-
command :schema do |c|
|
47
|
-
c.action do |global, options, args|
|
48
|
-
Dropsonde.generate_schema
|
47
|
+
@cache.update
|
49
48
|
end
|
50
49
|
end
|
51
50
|
|
@@ -56,14 +55,14 @@ class Dropsonde
|
|
56
55
|
end
|
57
56
|
end
|
58
57
|
|
59
|
-
desc '
|
58
|
+
desc 'Preview the telemetry report that will be submitted'
|
60
59
|
command :preview do |c|
|
61
60
|
c.desc 'The output format to use'
|
62
61
|
c.flag [:format], :default_value => 'human'
|
63
62
|
|
64
63
|
c.action do |global, options, args|
|
65
|
-
|
66
|
-
Dropsonde.generate_report(options[:format])
|
64
|
+
@cache.autoupdate
|
65
|
+
Dropsonde.generate_report(options[:format], @puppetdb_session)
|
67
66
|
end
|
68
67
|
end
|
69
68
|
|
@@ -76,10 +75,54 @@ class Dropsonde
|
|
76
75
|
c.flag [:port], :default_value => 443, :type => Integer
|
77
76
|
|
78
77
|
c.action do |global, options, args|
|
79
|
-
|
78
|
+
@cache.autoupdate
|
80
79
|
Dropsonde.submit_report(options[:endpoint], options[:port])
|
81
80
|
end
|
82
81
|
end
|
82
|
+
|
83
|
+
desc "Commands useful for developers"
|
84
|
+
command :dev do |t|
|
85
|
+
t.desc 'Open a Pry shell for debugging'
|
86
|
+
t.command :shell do |c|
|
87
|
+
c.action do |global, options, args|
|
88
|
+
require 'pry'
|
89
|
+
binding.pry
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
t.desc 'Generate a complete schema for all metrics'
|
94
|
+
t.long_desc "This generates the schema that is used to create or update the BigQuery
|
95
|
+
database. Every report is also validated against this schema before
|
96
|
+
submission, so you can be assured that this is a complete representation
|
97
|
+
of what data is collected and run through aggregation filters."
|
98
|
+
t.command :schema do |c|
|
99
|
+
c.action do |global, options, args|
|
100
|
+
Dropsonde.generate_schema
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
t.desc 'Generate an example of random data to simulate actual reports'
|
105
|
+
t.long_desc "The submitted telemetry reports are treated as sensitive material. Very
|
106
|
+
few people have access to that raw data. Instead, it's run through some
|
107
|
+
data aggregation filters to generate the published statistics we share.
|
108
|
+
Writing those aggregation queries is difficult without data to work with,
|
109
|
+
so this command generates a representative example of random data.
|
110
|
+
|
111
|
+
This is in jsonl format for direct upload to BigQuery."
|
112
|
+
t.command :example do |c|
|
113
|
+
c.desc 'How many rows to generate'
|
114
|
+
c.flag [:size], :default_value => 100, :type => Integer
|
115
|
+
|
116
|
+
c.desc 'Filename for the output (in jsonl format).'
|
117
|
+
c.flag [:filename], :default_value => 'example.jsonl'
|
118
|
+
|
119
|
+
c.action do |global, options, args|
|
120
|
+
@cache.autoupdate
|
121
|
+
Dropsonde.generate_example(options[:size], options[:filename])
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
83
126
|
end
|
84
127
|
|
85
128
|
exit Dropsonde.run(ARGV)
|
data/lib/dropsonde/cache.rb
CHANGED
@@ -1,34 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'date'
|
2
4
|
require 'json'
|
3
5
|
require 'fileutils'
|
4
6
|
require 'puppet_forge'
|
5
7
|
|
8
|
+
# cache class
|
6
9
|
class Dropsonde::Cache
|
7
|
-
|
10
|
+
@autoupdate = false
|
8
11
|
|
9
|
-
def
|
12
|
+
def initialize(path, ttl, autoupdate)
|
10
13
|
FileUtils.mkdir_p(path)
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
if File.file?
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
PuppetForge.user_agent =
|
14
|
+
@path = "#{File.expand_path(path)}/forge.json"
|
15
|
+
@ttl = ttl
|
16
|
+
@autoupdate = autoupdate
|
17
|
+
|
18
|
+
@@cache = if File.file? @path # rubocop:disable Style/ClassVars
|
19
|
+
JSON.parse(File.read(@path))
|
20
|
+
else
|
21
|
+
{
|
22
|
+
'timestamp' => '2000-1-1', # long before any puppet modules were released!
|
23
|
+
'modules' => [],
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
PuppetForge.user_agent = 'Dropsonde Telemetry Client/0.0.1'
|
25
28
|
end
|
26
29
|
|
27
|
-
def
|
30
|
+
def modules
|
28
31
|
@@cache['modules']
|
29
32
|
end
|
30
33
|
|
31
|
-
def
|
34
|
+
def cache
|
35
|
+
@@cache
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.forge_module?(mod)
|
32
39
|
case mod
|
33
40
|
when Puppet::Module
|
34
41
|
modname = mod.forge_slug
|
@@ -39,38 +46,42 @@ class Dropsonde::Cache
|
|
39
46
|
end
|
40
47
|
return unless modname
|
41
48
|
|
42
|
-
modules.include? modname.tr('/','-')
|
49
|
+
@@cache['modules'].include? modname.tr('/', '-')
|
43
50
|
end
|
44
51
|
|
45
|
-
def
|
46
|
-
|
52
|
+
def update
|
53
|
+
puts 'Updating module cache...'
|
54
|
+
iter = PuppetForge::Module.all(sort_by: 'latest_release')
|
47
55
|
newest = DateTime.parse(@@cache['timestamp'])
|
48
56
|
|
49
|
-
@@cache['timestamp'] = iter.first.
|
57
|
+
@@cache['timestamp'] = iter.first.updated_at
|
50
58
|
|
51
59
|
until iter.next.nil?
|
52
60
|
# stop once we reach modules we've already cached
|
53
|
-
break if DateTime.parse(iter.first.
|
61
|
+
break if DateTime.parse(iter.first.updated_at) <= newest
|
54
62
|
|
55
|
-
@@cache['modules'].concat
|
63
|
+
@@cache['modules'].concat(iter.map { |mod| mod.slug })
|
56
64
|
|
57
65
|
iter = iter.next
|
58
66
|
print '.'
|
59
67
|
end
|
68
|
+
puts
|
60
69
|
@@cache['modules'].sort!
|
61
70
|
@@cache['modules'].uniq!
|
62
71
|
|
63
|
-
File.write(
|
72
|
+
File.write(@path, JSON.pretty_generate(@@cache))
|
64
73
|
end
|
65
74
|
|
66
|
-
def
|
67
|
-
return unless
|
68
|
-
|
69
|
-
update unless File.file? @@path
|
75
|
+
def autoupdate
|
76
|
+
return unless @autoupdate
|
70
77
|
|
71
|
-
|
78
|
+
unless File.file? @path
|
79
|
+
puts 'Dropsonde caches a list of all Forge modules to ensure that it only reports'
|
80
|
+
puts 'usage data on public modules. Generating this cache may take some time on'
|
81
|
+
puts "the first run and you'll see your screen fill up with dots."
|
72
82
|
update
|
73
83
|
end
|
74
|
-
end
|
75
84
|
|
85
|
+
return update if (Date.today - File.mtime(@path).to_date).to_i > @ttl
|
86
|
+
end
|
76
87
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# dependencies plugin
|
1
4
|
class Dropsonde::Metrics::Dependencies
|
2
5
|
def self.initialize_dependencies
|
3
6
|
# require any libraries needed here -- no need to load puppet; it's already initialized
|
@@ -5,10 +8,10 @@ class Dropsonde::Metrics::Dependencies
|
|
5
8
|
end
|
6
9
|
|
7
10
|
def self.description
|
8
|
-
<<~
|
11
|
+
<<~DESCRIPTION
|
9
12
|
This group of metrics discovers dependencies between modules in all
|
10
13
|
environments. It will omit dependencies on private modules.
|
11
|
-
|
14
|
+
DESCRIPTION
|
12
15
|
end
|
13
16
|
|
14
17
|
def self.schema
|
@@ -18,23 +21,23 @@ class Dropsonde::Metrics::Dependencies
|
|
18
21
|
{
|
19
22
|
"fields": [
|
20
23
|
{
|
21
|
-
"description":
|
22
|
-
"mode":
|
23
|
-
"name":
|
24
|
-
"type":
|
24
|
+
"description": 'The depended on module name',
|
25
|
+
"mode": 'NULLABLE',
|
26
|
+
"name": 'name',
|
27
|
+
"type": 'STRING',
|
25
28
|
},
|
26
29
|
{
|
27
|
-
"description":
|
28
|
-
"mode":
|
29
|
-
"name":
|
30
|
-
"type":
|
31
|
-
}
|
30
|
+
"description": 'The depended on module version requirement',
|
31
|
+
"mode": 'NULLABLE',
|
32
|
+
"name": 'version_requirement',
|
33
|
+
"type": 'STRING',
|
34
|
+
},
|
32
35
|
],
|
33
|
-
"description":
|
34
|
-
"mode":
|
35
|
-
"name":
|
36
|
-
"type":
|
37
|
-
}
|
36
|
+
"description": 'List of modules that private modules in all environments depend on.',
|
37
|
+
"mode": 'REPEATED',
|
38
|
+
"name": 'dependencies',
|
39
|
+
"type": 'RECORD',
|
40
|
+
},
|
38
41
|
]
|
39
42
|
end
|
40
43
|
|
@@ -42,26 +45,44 @@ class Dropsonde::Metrics::Dependencies
|
|
42
45
|
# run just before generating this metric
|
43
46
|
end
|
44
47
|
|
45
|
-
def self.run
|
48
|
+
def self.run(_puppetdb_session = nil)
|
46
49
|
# return an array of hashes representing the data to be merged into the combined checkin
|
47
|
-
environments = Puppet.lookup(:environments).list.map{|e|e.name}
|
48
|
-
modules = environments.map
|
50
|
+
environments = Puppet.lookup(:environments).list.map { |e| e.name }
|
51
|
+
modules = environments.map { |env|
|
49
52
|
Puppet.lookup(:environments).get(env).modules
|
50
|
-
|
53
|
+
}.flatten
|
51
54
|
|
52
55
|
# we want only PUBLIC modules that PRIVATE modules depend on
|
53
|
-
dependencies = modules.map
|
56
|
+
dependencies = modules.map { |mod|
|
54
57
|
next unless mod.dependencies
|
55
|
-
next if Dropsonde::Cache.
|
58
|
+
next if Dropsonde::Cache.forge_module? mod # skip unless this is a private module
|
56
59
|
|
57
60
|
# and return a list of all public modules it depends on
|
58
|
-
mod.dependencies.select {|
|
59
|
-
|
61
|
+
mod.dependencies.select { |dep| Dropsonde::Cache.forge_module? dep }
|
62
|
+
}.flatten.compact
|
60
63
|
|
61
64
|
[
|
62
|
-
{ :
|
65
|
+
{ dependencies: dependencies },
|
63
66
|
]
|
67
|
+
end
|
64
68
|
|
69
|
+
def self.example
|
70
|
+
# this method is used to generate a table filled with randomized data to
|
71
|
+
# make it easier to write data aggregation queries without access to the
|
72
|
+
# actual private data that users have submitted.
|
73
|
+
|
74
|
+
dropsonde_cache = Dropsonde::Cache.new('foo', 7, true)
|
75
|
+
versions = ['>= 1.5.2', '>= 4.3.2', '>= 3.0.0 < 4.0.0', '>= 2.2.1 < 5.0.0', '>= 5.0.0 < 7.0.0', '>= 4.11.0']
|
76
|
+
[
|
77
|
+
dependencies: dropsonde_cache.modules
|
78
|
+
.sample(rand(250))
|
79
|
+
.map do |item|
|
80
|
+
{
|
81
|
+
name: item,
|
82
|
+
version_requirement: versions.sample,
|
83
|
+
}
|
84
|
+
end,
|
85
|
+
]
|
65
86
|
end
|
66
87
|
|
67
88
|
def self.cleanup
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# environments plugin
|
4
|
+
class Dropsonde::Metrics::Environments
|
5
|
+
def self.initialize_environments
|
6
|
+
# Require any libraries needed here -- no need to load puppet or puppetdb;
|
7
|
+
# they're already loaded. This hook is named after the class name.
|
8
|
+
# All plugins are initialized at startup before any metrics are generated.
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.description
|
12
|
+
# This is a Ruby squiggle heredoc; just a multi-line string with indentation removed
|
13
|
+
<<~DESCRIPTION
|
14
|
+
This group of metrics gathers information about environments.
|
15
|
+
DESCRIPTION
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.schema
|
19
|
+
# return an array of hashes of a partial schema to be merged into the complete schema
|
20
|
+
[
|
21
|
+
{
|
22
|
+
"description": 'The number of environments',
|
23
|
+
"mode": 'NULLABLE',
|
24
|
+
"name": 'environment_count',
|
25
|
+
"type": 'INTEGER',
|
26
|
+
},
|
27
|
+
]
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.setup
|
31
|
+
# run just before generating this metric
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.run(_puppetdb_session = nil)
|
35
|
+
# return an array of hashes representing the data to be merged into the combined checkin
|
36
|
+
[
|
37
|
+
environment_count: Puppet.lookup(:environments).list.count,
|
38
|
+
]
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.example
|
42
|
+
# this method is used to generate a table filled with randomized data to
|
43
|
+
# make it easier to write data aggregation queries without access to the
|
44
|
+
# actual private data that users have submitted.
|
45
|
+
[
|
46
|
+
environment_count: rand(1..100),
|
47
|
+
]
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.cleanup
|
51
|
+
# run just after generating this metric
|
52
|
+
end
|
53
|
+
end
|