dropsonde 0.0.2 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -7
- data/bin/dropsonde +60 -17
- data/lib/dropsonde/cache.rb +42 -31
- data/lib/dropsonde/metrics/dependencies.rb +46 -25
- data/lib/dropsonde/metrics/environments.rb +53 -0
- data/lib/dropsonde/metrics/modules.rb +86 -53
- data/lib/dropsonde/metrics/platforms.rb +126 -0
- data/lib/dropsonde/metrics/puppetfiles.rb +44 -25
- data/lib/dropsonde/metrics.rb +116 -76
- data/lib/dropsonde/monkeypatches.rb +12 -11
- data/lib/dropsonde/version.rb +3 -1
- data/lib/dropsonde.rb +75 -24
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4a87b2c8abe75d07a73a52a48041c2734bb7914a54df3471d696458f3f0cb05
|
4
|
+
data.tar.gz: cd2b7d3a36aa9aa386997ab21f7ffee57387819b08a79407b98c5bfdbbcd619d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a033ee5a796d3dab8c46d7562848255ca66552671f31f58ffde8a5a00f98dbcf1e9485a8279529b7c78094f96add8e932fa401e634ccfc067f5c9b72eed93c3
|
7
|
+
data.tar.gz: b86bc1f854fe414a388534ced35132d0744ac21001298d3eefce326afa97a63bd56ce9b250458889690831e7b0343963666377eaf10c25c38861b50e1160225d
|
data/README.md
CHANGED
@@ -43,10 +43,10 @@ schema so the system cannot gather any data that's not described in the schema.
|
|
43
43
|
See the full schema of all enabled plugins by running the command:
|
44
44
|
|
45
45
|
```
|
46
|
-
$ dropsonde schema
|
46
|
+
$ dropsonde dev schema
|
47
47
|
```
|
48
48
|
|
49
|
-
All information in the report is keyed off a non-reversible SHA512
|
49
|
+
All information in the report is keyed off a non-reversible SHA512 hashed site-id
|
50
50
|
to make it unidentifiable; this report cannot be linked back to you or to your
|
51
51
|
infrastructure. Now that said, we know that the more bits of data shared about a
|
52
52
|
specific site, the easier it is to fingerprint that site. See
|
@@ -63,7 +63,9 @@ For example, this aggregated data might include records that show a count of how
|
|
63
63
|
many sites are using various combinations of modules together, but it will never
|
64
64
|
include a record showing the full list of modules that any single site is using.
|
65
65
|
|
66
|
-
|
66
|
+

|
67
|
+
|
68
|
+
With your own Google Cloud account, you can use that [dataset](https://console.cloud.google.com/bigquery?p=dataops-puppet-public-data&d=community&t=forge_modules&page=table)
|
67
69
|
in your own tooling and you can see/contribute to the aggregation queries in its
|
68
70
|
own [repository](https://github.com/puppetlabs/dropsonde-aggregation).
|
69
71
|
|
@@ -85,7 +87,9 @@ possible: [privacy@puppet.com](mailto:privacy@puppet.com)
|
|
85
87
|
|
86
88
|
## Installation
|
87
89
|
|
88
|
-
This is distributed as a Ruby gem. Simply `gem install dropsonde
|
90
|
+
This is distributed as a Ruby gem. Simply `gem install dropsonde`. There's a
|
91
|
+
[Puppet module](https://github.com/puppetlabs/puppetlabs-dropsonde) to manage it
|
92
|
+
if that's more your thing.
|
89
93
|
|
90
94
|
|
91
95
|
## Configuration
|
@@ -98,7 +102,7 @@ will not report the `:puppetfiles` metrics.
|
|
98
102
|
``` yaml
|
99
103
|
---
|
100
104
|
:update: false
|
101
|
-
:
|
105
|
+
:disable:
|
102
106
|
- puppetfiles
|
103
107
|
```
|
104
108
|
|
@@ -112,8 +116,6 @@ Run `dropsonde --help` to see usage information.
|
|
112
116
|
* `preview`
|
113
117
|
* Generate and print out an example telemetry report in human readable form
|
114
118
|
* Annotated with descriptions of each plugin and each metric gathered.
|
115
|
-
* `schema`
|
116
|
-
* Generate and print out the complete combined schema.
|
117
119
|
* `list`
|
118
120
|
* See a quick list of the available metrics and what they do.
|
119
121
|
* `submit`
|
@@ -123,6 +125,17 @@ Run `dropsonde --help` to see usage information.
|
|
123
125
|
* Once a week, the list of public modules on the Forge will be updated. This
|
124
126
|
command will manually force that cache update to happen.
|
125
127
|
|
128
|
+
Developer comands
|
129
|
+
|
130
|
+
* `dev example`
|
131
|
+
* To make writing aggregation queries possible without access to the private
|
132
|
+
database, this will generate a randomized example of the dataset. This is
|
133
|
+
in JSONL format, so it can be imported directly into BigQuery.
|
134
|
+
* `dev schema`
|
135
|
+
* Generate and print out the complete combined schema of all metrics.
|
136
|
+
* `dev shell`
|
137
|
+
* Open up a Pry shell with all the relevant connections open and initialized.
|
138
|
+
|
126
139
|
|
127
140
|
## Architecture
|
128
141
|
|
data/bin/dropsonde
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'gli'
|
3
3
|
require 'dropsonde'
|
4
|
-
require 'puppet'
|
5
4
|
|
6
5
|
class Dropsonde
|
7
6
|
extend GLI::App
|
8
7
|
|
9
|
-
|
8
|
+
@cache = nil
|
9
|
+
@puppetdb_session = Dropsonde.new
|
10
10
|
|
11
11
|
program_desc 'A simple telemetry tool for Puppet infrastructures'
|
12
|
-
config_file
|
12
|
+
config_file "#{File.dirname(Puppet.settings[:confdir])}/telemetry.yaml"
|
13
13
|
version Dropsonde::VERSION
|
14
14
|
|
15
15
|
desc 'Verbose logging'
|
@@ -25,27 +25,26 @@ class Dropsonde
|
|
25
25
|
flag [:ttl], :default_value => 7, :type => Integer
|
26
26
|
|
27
27
|
desc 'List of metrics to omit'
|
28
|
-
flag [:
|
28
|
+
flag [:disable, :d], :type => Array
|
29
|
+
|
30
|
+
desc 'Only load these metrics'
|
31
|
+
flag [:enable, :e], :type => Array
|
29
32
|
|
30
33
|
desc 'Any number or string used to generate the randomized site ID.'
|
31
34
|
flag [:seed]
|
32
35
|
|
36
|
+
desc 'Static site ID'
|
37
|
+
flag [:siteid]
|
38
|
+
|
33
39
|
pre do |global, command, options, args|
|
34
40
|
Dropsonde.settings = global
|
35
|
-
Dropsonde::Cache.
|
41
|
+
@cache = Dropsonde::Cache.new(global[:cachepath], global[:ttl], global[:update])
|
36
42
|
end
|
37
43
|
|
38
44
|
desc 'Manually update the Forge module name cache'
|
39
45
|
command :update do |c|
|
40
46
|
c.action do |global, options, args|
|
41
|
-
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
desc 'Generate a complete schema set'
|
46
|
-
command :schema do |c|
|
47
|
-
c.action do |global, options, args|
|
48
|
-
Dropsonde.generate_schema
|
47
|
+
@cache.update
|
49
48
|
end
|
50
49
|
end
|
51
50
|
|
@@ -56,14 +55,14 @@ class Dropsonde
|
|
56
55
|
end
|
57
56
|
end
|
58
57
|
|
59
|
-
desc '
|
58
|
+
desc 'Preview the telemetry report that will be submitted'
|
60
59
|
command :preview do |c|
|
61
60
|
c.desc 'The output format to use'
|
62
61
|
c.flag [:format], :default_value => 'human'
|
63
62
|
|
64
63
|
c.action do |global, options, args|
|
65
|
-
|
66
|
-
Dropsonde.generate_report(options[:format])
|
64
|
+
@cache.autoupdate
|
65
|
+
Dropsonde.generate_report(options[:format], @puppetdb_session)
|
67
66
|
end
|
68
67
|
end
|
69
68
|
|
@@ -76,10 +75,54 @@ class Dropsonde
|
|
76
75
|
c.flag [:port], :default_value => 443, :type => Integer
|
77
76
|
|
78
77
|
c.action do |global, options, args|
|
79
|
-
|
78
|
+
@cache.autoupdate
|
80
79
|
Dropsonde.submit_report(options[:endpoint], options[:port])
|
81
80
|
end
|
82
81
|
end
|
82
|
+
|
83
|
+
desc "Commands useful for developers"
|
84
|
+
command :dev do |t|
|
85
|
+
t.desc 'Open a Pry shell for debugging'
|
86
|
+
t.command :shell do |c|
|
87
|
+
c.action do |global, options, args|
|
88
|
+
require 'pry'
|
89
|
+
binding.pry
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
t.desc 'Generate a complete schema for all metrics'
|
94
|
+
t.long_desc "This generates the schema that is used to create or update the BigQuery
|
95
|
+
database. Every report is also validated against this schema before
|
96
|
+
submission, so you can be assured that this is a complete representation
|
97
|
+
of what data is collected and run through aggregation filters."
|
98
|
+
t.command :schema do |c|
|
99
|
+
c.action do |global, options, args|
|
100
|
+
Dropsonde.generate_schema
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
t.desc 'Generate an example of random data to simulate actual reports'
|
105
|
+
t.long_desc "The submitted telemetry reports are treated as sensitive material. Very
|
106
|
+
few people have access to that raw data. Instead, it's run through some
|
107
|
+
data aggregation filters to generate the published statistics we share.
|
108
|
+
Writing those aggregation queries is difficult without data to work with,
|
109
|
+
so this command generates a representative example of random data.
|
110
|
+
|
111
|
+
This is in jsonl format for direct upload to BigQuery."
|
112
|
+
t.command :example do |c|
|
113
|
+
c.desc 'How many rows to generate'
|
114
|
+
c.flag [:size], :default_value => 100, :type => Integer
|
115
|
+
|
116
|
+
c.desc 'Filename for the output (in jsonl format).'
|
117
|
+
c.flag [:filename], :default_value => 'example.jsonl'
|
118
|
+
|
119
|
+
c.action do |global, options, args|
|
120
|
+
@cache.autoupdate
|
121
|
+
Dropsonde.generate_example(options[:size], options[:filename])
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
83
126
|
end
|
84
127
|
|
85
128
|
exit Dropsonde.run(ARGV)
|
data/lib/dropsonde/cache.rb
CHANGED
@@ -1,34 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'date'
|
2
4
|
require 'json'
|
3
5
|
require 'fileutils'
|
4
6
|
require 'puppet_forge'
|
5
7
|
|
8
|
+
# cache class
|
6
9
|
class Dropsonde::Cache
|
7
|
-
|
10
|
+
@autoupdate = false
|
8
11
|
|
9
|
-
def
|
12
|
+
def initialize(path, ttl, autoupdate)
|
10
13
|
FileUtils.mkdir_p(path)
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
if File.file?
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
PuppetForge.user_agent =
|
14
|
+
@path = "#{File.expand_path(path)}/forge.json"
|
15
|
+
@ttl = ttl
|
16
|
+
@autoupdate = autoupdate
|
17
|
+
|
18
|
+
@@cache = if File.file? @path # rubocop:disable Style/ClassVars
|
19
|
+
JSON.parse(File.read(@path))
|
20
|
+
else
|
21
|
+
{
|
22
|
+
'timestamp' => '2000-1-1', # long before any puppet modules were released!
|
23
|
+
'modules' => [],
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
PuppetForge.user_agent = 'Dropsonde Telemetry Client/0.0.1'
|
25
28
|
end
|
26
29
|
|
27
|
-
def
|
30
|
+
def modules
|
28
31
|
@@cache['modules']
|
29
32
|
end
|
30
33
|
|
31
|
-
def
|
34
|
+
def cache
|
35
|
+
@@cache
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.forge_module?(mod)
|
32
39
|
case mod
|
33
40
|
when Puppet::Module
|
34
41
|
modname = mod.forge_slug
|
@@ -39,38 +46,42 @@ class Dropsonde::Cache
|
|
39
46
|
end
|
40
47
|
return unless modname
|
41
48
|
|
42
|
-
modules.include? modname.tr('/','-')
|
49
|
+
@@cache['modules'].include? modname.tr('/', '-')
|
43
50
|
end
|
44
51
|
|
45
|
-
def
|
46
|
-
|
52
|
+
def update
|
53
|
+
puts 'Updating module cache...'
|
54
|
+
iter = PuppetForge::Module.all(sort_by: 'latest_release')
|
47
55
|
newest = DateTime.parse(@@cache['timestamp'])
|
48
56
|
|
49
|
-
@@cache['timestamp'] = iter.first.
|
57
|
+
@@cache['timestamp'] = iter.first.updated_at
|
50
58
|
|
51
59
|
until iter.next.nil?
|
52
60
|
# stop once we reach modules we've already cached
|
53
|
-
break if DateTime.parse(iter.first.
|
61
|
+
break if DateTime.parse(iter.first.updated_at) <= newest
|
54
62
|
|
55
|
-
@@cache['modules'].concat
|
63
|
+
@@cache['modules'].concat(iter.map { |mod| mod.slug })
|
56
64
|
|
57
65
|
iter = iter.next
|
58
66
|
print '.'
|
59
67
|
end
|
68
|
+
puts
|
60
69
|
@@cache['modules'].sort!
|
61
70
|
@@cache['modules'].uniq!
|
62
71
|
|
63
|
-
File.write(
|
72
|
+
File.write(@path, JSON.pretty_generate(@@cache))
|
64
73
|
end
|
65
74
|
|
66
|
-
def
|
67
|
-
return unless
|
68
|
-
|
69
|
-
update unless File.file? @@path
|
75
|
+
def autoupdate
|
76
|
+
return unless @autoupdate
|
70
77
|
|
71
|
-
|
78
|
+
unless File.file? @path
|
79
|
+
puts 'Dropsonde caches a list of all Forge modules to ensure that it only reports'
|
80
|
+
puts 'usage data on public modules. Generating this cache may take some time on'
|
81
|
+
puts "the first run and you'll see your screen fill up with dots."
|
72
82
|
update
|
73
83
|
end
|
74
|
-
end
|
75
84
|
|
85
|
+
return update if (Date.today - File.mtime(@path).to_date).to_i > @ttl
|
86
|
+
end
|
76
87
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# dependencies plugin
|
1
4
|
class Dropsonde::Metrics::Dependencies
|
2
5
|
def self.initialize_dependencies
|
3
6
|
# require any libraries needed here -- no need to load puppet; it's already initialized
|
@@ -5,10 +8,10 @@ class Dropsonde::Metrics::Dependencies
|
|
5
8
|
end
|
6
9
|
|
7
10
|
def self.description
|
8
|
-
<<~
|
11
|
+
<<~DESCRIPTION
|
9
12
|
This group of metrics discovers dependencies between modules in all
|
10
13
|
environments. It will omit dependencies on private modules.
|
11
|
-
|
14
|
+
DESCRIPTION
|
12
15
|
end
|
13
16
|
|
14
17
|
def self.schema
|
@@ -18,23 +21,23 @@ class Dropsonde::Metrics::Dependencies
|
|
18
21
|
{
|
19
22
|
"fields": [
|
20
23
|
{
|
21
|
-
"description":
|
22
|
-
"mode":
|
23
|
-
"name":
|
24
|
-
"type":
|
24
|
+
"description": 'The depended on module name',
|
25
|
+
"mode": 'NULLABLE',
|
26
|
+
"name": 'name',
|
27
|
+
"type": 'STRING',
|
25
28
|
},
|
26
29
|
{
|
27
|
-
"description":
|
28
|
-
"mode":
|
29
|
-
"name":
|
30
|
-
"type":
|
31
|
-
}
|
30
|
+
"description": 'The depended on module version requirement',
|
31
|
+
"mode": 'NULLABLE',
|
32
|
+
"name": 'version_requirement',
|
33
|
+
"type": 'STRING',
|
34
|
+
},
|
32
35
|
],
|
33
|
-
"description":
|
34
|
-
"mode":
|
35
|
-
"name":
|
36
|
-
"type":
|
37
|
-
}
|
36
|
+
"description": 'List of modules that private modules in all environments depend on.',
|
37
|
+
"mode": 'REPEATED',
|
38
|
+
"name": 'dependencies',
|
39
|
+
"type": 'RECORD',
|
40
|
+
},
|
38
41
|
]
|
39
42
|
end
|
40
43
|
|
@@ -42,26 +45,44 @@ class Dropsonde::Metrics::Dependencies
|
|
42
45
|
# run just before generating this metric
|
43
46
|
end
|
44
47
|
|
45
|
-
def self.run
|
48
|
+
def self.run(_puppetdb_session = nil)
|
46
49
|
# return an array of hashes representing the data to be merged into the combined checkin
|
47
|
-
environments = Puppet.lookup(:environments).list.map{|e|e.name}
|
48
|
-
modules = environments.map
|
50
|
+
environments = Puppet.lookup(:environments).list.map { |e| e.name }
|
51
|
+
modules = environments.map { |env|
|
49
52
|
Puppet.lookup(:environments).get(env).modules
|
50
|
-
|
53
|
+
}.flatten
|
51
54
|
|
52
55
|
# we want only PUBLIC modules that PRIVATE modules depend on
|
53
|
-
dependencies = modules.map
|
56
|
+
dependencies = modules.map { |mod|
|
54
57
|
next unless mod.dependencies
|
55
|
-
next if Dropsonde::Cache.
|
58
|
+
next if Dropsonde::Cache.forge_module? mod # skip unless this is a private module
|
56
59
|
|
57
60
|
# and return a list of all public modules it depends on
|
58
|
-
mod.dependencies.select {|
|
59
|
-
|
61
|
+
mod.dependencies.select { |dep| Dropsonde::Cache.forge_module? dep }
|
62
|
+
}.flatten.compact
|
60
63
|
|
61
64
|
[
|
62
|
-
{ :
|
65
|
+
{ dependencies: dependencies },
|
63
66
|
]
|
67
|
+
end
|
64
68
|
|
69
|
+
def self.example
|
70
|
+
# this method is used to generate a table filled with randomized data to
|
71
|
+
# make it easier to write data aggregation queries without access to the
|
72
|
+
# actual private data that users have submitted.
|
73
|
+
|
74
|
+
dropsonde_cache = Dropsonde::Cache.new('foo', 7, true)
|
75
|
+
versions = ['>= 1.5.2', '>= 4.3.2', '>= 3.0.0 < 4.0.0', '>= 2.2.1 < 5.0.0', '>= 5.0.0 < 7.0.0', '>= 4.11.0']
|
76
|
+
[
|
77
|
+
dependencies: dropsonde_cache.modules
|
78
|
+
.sample(rand(250))
|
79
|
+
.map do |item|
|
80
|
+
{
|
81
|
+
name: item,
|
82
|
+
version_requirement: versions.sample,
|
83
|
+
}
|
84
|
+
end,
|
85
|
+
]
|
65
86
|
end
|
66
87
|
|
67
88
|
def self.cleanup
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# environments plugin
|
4
|
+
class Dropsonde::Metrics::Environments
|
5
|
+
def self.initialize_environments
|
6
|
+
# Require any libraries needed here -- no need to load puppet or puppetdb;
|
7
|
+
# they're already loaded. This hook is named after the class name.
|
8
|
+
# All plugins are initialized at startup before any metrics are generated.
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.description
|
12
|
+
# This is a Ruby squiggle heredoc; just a multi-line string with indentation removed
|
13
|
+
<<~DESCRIPTION
|
14
|
+
This group of metrics gathers information about environments.
|
15
|
+
DESCRIPTION
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.schema
|
19
|
+
# return an array of hashes of a partial schema to be merged into the complete schema
|
20
|
+
[
|
21
|
+
{
|
22
|
+
"description": 'The number of environments',
|
23
|
+
"mode": 'NULLABLE',
|
24
|
+
"name": 'environment_count',
|
25
|
+
"type": 'INTEGER',
|
26
|
+
},
|
27
|
+
]
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.setup
|
31
|
+
# run just before generating this metric
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.run(_puppetdb_session = nil)
|
35
|
+
# return an array of hashes representing the data to be merged into the combined checkin
|
36
|
+
[
|
37
|
+
environment_count: Puppet.lookup(:environments).list.count,
|
38
|
+
]
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.example
|
42
|
+
# this method is used to generate a table filled with randomized data to
|
43
|
+
# make it easier to write data aggregation queries without access to the
|
44
|
+
# actual private data that users have submitted.
|
45
|
+
[
|
46
|
+
environment_count: rand(1..100),
|
47
|
+
]
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.cleanup
|
51
|
+
# run just after generating this metric
|
52
|
+
end
|
53
|
+
end
|