dropsonde 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a0fd6246263a8a375056fe6fdcb3bcc6b1db8cc6bddc7ea80f9dc870b48c85bc
4
- data.tar.gz: b19a11ca730df12f61e95ae1f7bb81b9bbb65d8251966f3bcdd1fb203dacb96d
3
+ metadata.gz: 643def01638761803e04cf758bbf627ac821bb091fc7dad50c5b161251534728
4
+ data.tar.gz: f0c8a36548214da7beeecee9dc583dcd8d81eceb640b967078fadf5b2978f987
5
5
  SHA512:
6
- metadata.gz: 53c7d74ceb0aa6358ddb12828faf70b840a33a4bbd30977317023050833a5db24d2b72eefc1eecfc1008e6e8eae80249b93a710edc098045c333787e4faa0ef2
7
- data.tar.gz: 6644219dfdcbf761cb2c85bf446266ddf38d343d7a84d969ae88ffb30068c5faa647393457e1e255cd5fcbc2b8acb58f52d9efcd539c5713541932df79f0a807
6
+ metadata.gz: 7c260e1617666fb83d7b0be1a3d73ad13aaee5c33844d5eb4117b186c6eec23a9aad6dc03ff3df393168b6a3715bf492ce677624fdbe536e4961d956d1e9cdcb
7
+ data.tar.gz: cffbe7901afe08afa593ee73fa749358257efcd03d612564b3aac507684897d2d0db3e504a599d1eab7251508c3f455e236985501ea9f6722f108bb02cf80e2e
data/README.md CHANGED
@@ -43,10 +43,10 @@ schema so the system cannot gather any data that's not described in the schema.
43
43
  See the full schema of all enabled plugins by running the command:
44
44
 
45
45
  ```
46
- $ dropsonde schema
46
+ $ dropsonde dev schema
47
47
  ```
48
48
 
49
- All information in the report is keyed off a non-reversible SHA512 hash site-id
49
+ All information in the report is keyed off a non-reversible SHA512 hashed site-id
50
50
  to make it unidentifiable; this report cannot be linked back to you or to your
51
51
  infrastructure. Now that said, we know that the more bits of data shared about a
52
52
  specific site, the easier it is to fingerprint that site. See
@@ -63,6 +63,8 @@ For example, this aggregated data might include records that show a count of how
63
63
  many sites are using various combinations of modules together, but it will never
64
64
  include a record showing the full list of modules that any single site is using.
65
65
 
66
+ ![Data aggregation workflow](./aggregation.png)
67
+
66
68
  With your own Google Cloud account, you can use that [dataset](https://console.cloud.google.com/bigquery?p=dataops-puppet-public-data&d=community&t=forge_modules&page=table)
67
69
  in your own tooling and you can see/contribute to the aggregation queries in its
68
70
  own [repository](https://github.com/puppetlabs/dropsonde-aggregation).
@@ -100,7 +102,7 @@ will not report the `:puppetfiles` metrics.
100
102
  ``` yaml
101
103
  ---
102
104
  :update: false
103
- :blacklist:
105
+ :disable:
104
106
  - puppetfiles
105
107
  ```
106
108
 
@@ -22,7 +22,10 @@ class Dropsonde
22
22
  flag [:ttl], :default_value => 7, :type => Integer
23
23
 
24
24
  desc 'List of metrics to omit'
25
- flag [:blacklist, :b], :type => Array
25
+ flag [:disable, :d], :type => Array
26
+
27
+ desc 'Only load these metrics'
28
+ flag [:enable, :e], :type => Array
26
29
 
27
30
  desc 'Any number or string used to generate the randomized site ID.'
28
31
  flag [:seed]
@@ -102,7 +105,7 @@ class Dropsonde
102
105
  This is in jsonl format for direct upload to BigQuery."
103
106
  t.command :example do |c|
104
107
  c.desc 'How many rows to generate'
105
- c.flag [:size], :default_value => 100
108
+ c.flag [:size], :default_value => 100, :type => Integer
106
109
 
107
110
  c.desc 'Filename for the output (in jsonl format).'
108
111
  c.flag [:filename], :default_value => 'example.jsonl'
@@ -4,7 +4,13 @@ class Dropsonde::Metrics
4
4
  extend LittlePlugger( :path => 'dropsonde/metrics', :module => Dropsonde::Metrics)
5
5
 
6
6
  def initialize
7
- Dropsonde::Metrics.disregard_plugins(*Dropsonde.settings[:blacklist])
7
+ if Dropsonde.settings[:enable]
8
+ Dropsonde.settings[:disable] ||= []
9
+ disable = Dropsonde::Metrics.plugins.keys - Dropsonde.settings[:enable].map(&:to_sym)
10
+ Dropsonde.settings[:disable].concat disable
11
+ end
12
+
13
+ Dropsonde::Metrics.disregard_plugins(*Dropsonde.settings[:disable])
8
14
  Dropsonde::Metrics.initialize_plugins
9
15
  end
10
16
 
@@ -28,9 +34,9 @@ class Dropsonde::Metrics
28
34
  str << plugin.description.strip
29
35
  str << "\n\n"
30
36
  end
31
- if Dropsonde.settings[:blacklist]
37
+ if Dropsonde.settings[:disable]
32
38
  str << "Disabled plugins:\n"
33
- str << " #{Dropsonde.settings[:blacklist].join(', ')}"
39
+ str << " #{Dropsonde.settings[:disable].join(', ')}"
34
40
  end
35
41
  str
36
42
  end
@@ -50,16 +56,16 @@ class Dropsonde::Metrics
50
56
  Dropsonde::Metrics.plugins.each do |name, plugin|
51
57
  schema = plugin.schema
52
58
 
53
- plugin.setup
59
+ plugin.setup if plugin.respond_to? :setup
54
60
  data = sanity_check_data(plugin, plugin.run)
55
- plugin.cleanup
61
+ plugin.cleanup if plugin.respond_to? :cleanup
56
62
 
57
63
  str << plugin.name+"\n"
58
64
  str << "-------------------------------\n"
59
65
  str << plugin.description
60
66
  data.each do |row|
61
67
  key = row.keys.first
62
- values = row.values.first
68
+ values = row.values.flatten
63
69
 
64
70
  desc = schema.find {|item| item[:name].to_sym == key.to_sym}[:description]
65
71
  str << "- #{key}: #{desc}\n"
@@ -112,6 +118,9 @@ class Dropsonde::Metrics
112
118
  # We accept both the plugin and data gathered from the plugin so that
113
119
  # we can sanitize both data and example data
114
120
  def sanity_check_data(plugin, data)
121
+ # This allows plugin authors to easily skip metrics with no results
122
+ return [] if data.nil?
123
+
115
124
  keys_data = data.map {|item| item.keys }.flatten.map(&:to_s)
116
125
  keys_schema = plugin.schema.map {|item| item[:name] }
117
126
 
@@ -0,0 +1,50 @@
1
+ class Dropsonde::Metrics::Environments
2
+ def self.initialize_environments
3
+ # Require any libraries needed here -- no need to load puppet or puppetdb;
4
+ # they're already loaded. This hook is named after the class name.
5
+ # All plugins are initialized at startup before any metrics are generated.
6
+ end
7
+
8
+ def self.description
9
+ # This is a Ruby squiggle heredoc; just a multi-line string with indentation removed
10
+ <<~EOF
11
+ This group of metrics gathers information about environments.
12
+ EOF
13
+ end
14
+
15
+ def self.schema
16
+ # return an array of hashes of a partial schema to be merged into the complete schema
17
+ [
18
+ {
19
+ "description": "The number of environments",
20
+ "mode": "NULLABLE",
21
+ "name": "environment_count",
22
+ "type": "INTEGER"
23
+ }
24
+ ]
25
+ end
26
+
27
+ def self.setup
28
+ # run just before generating this metric
29
+ end
30
+
31
+ def self.run
32
+ # return an array of hashes representing the data to be merged into the combined checkin
33
+ [
34
+ :environment_count => Puppet.lookup(:environments).list.count,
35
+ ]
36
+ end
37
+
38
+ def self.example
39
+ # this method is used to generate a table filled with randomized data to
40
+ # make it easier to write data aggregation queries without access to the
41
+ # actual private data that users have submitted.
42
+ [
43
+ :environment_count => rand(1..100),
44
+ ]
45
+ end
46
+
47
+ def self.cleanup
48
+ # run just after generating this metric
49
+ end
50
+ end
@@ -0,0 +1,123 @@
1
+ class Dropsonde::Metrics::Platforms
2
+ def self.initialize_platforms
3
+ # require any libraries needed here -- no need to load puppet; it's already initialized
4
+ # All plugins are initialized before any metrics are generated.
5
+ end
6
+
7
+ def self.description
8
+ <<~EOF
9
+ This group of metrics generates usage patterns by platform.
10
+ Currently implemented is a list of classes, the platforms
11
+ they are declared on, and a count of each combination.
12
+ EOF
13
+ end
14
+
15
+ def self.schema
16
+ # return an array of hashes of a partial schema to be merged into the complete schema
17
+ # See https://cloud.google.com/bigquery/docs/schemas#specifying_a_json_schema_file
18
+ [
19
+ {
20
+ "fields": [
21
+ {
22
+ "description": "The class name name",
23
+ "mode": "NULLABLE",
24
+ "name": "name",
25
+ "type": "STRING"
26
+ },
27
+ {
28
+ "description": "The osfamily of the node the class is declared on",
29
+ "mode": "NULLABLE",
30
+ "name": "platform",
31
+ "type": "STRING"
32
+ },
33
+ {
34
+ "description": "The number of time this combination is declared",
35
+ "mode": "NULLABLE",
36
+ "name": "count",
37
+ "type": "INTEGER"
38
+ },
39
+ ],
40
+ "description": "List of all classes in the infrastructure and platforms they're declared on.",
41
+ "mode": "REPEATED",
42
+ "name": "class_platforms",
43
+ "type": "RECORD"
44
+ }
45
+ ]
46
+ end
47
+
48
+ def self.setup
49
+ # run just before generating this metric
50
+ end
51
+
52
+ def self.run
53
+ # skip this metric if we don't have an active PuppetDB connection
54
+ return unless Dropsonde.puppetDB
55
+
56
+ classes = Dropsonde.puppetDB.request( '', 'resources[certname, title] { type = "Class" }').data
57
+ facts = Dropsonde.puppetDB.request( '', 'facts[certname, value] { name = "osfamily" }').data
58
+
59
+ # All public Forge modules that are installed.
60
+ modules = Puppet.lookup(:environments).list.map {|env|
61
+ env.modules.select {|mod|
62
+ mod.forge_module?
63
+ }.map {|fmod|
64
+ fmod.name
65
+ }}.flatten.uniq
66
+
67
+ data = classes.map {|item|
68
+ # filter out any that don't come from public Forge modules
69
+ mod = item['title'].split('::').first.downcase
70
+ next unless modules.include? mod
71
+
72
+ item['platform'] = facts.find {|fact|
73
+ fact['certname'] == item['certname']
74
+ }['value']
75
+
76
+ {
77
+ :name => item['title'],
78
+ :platform => item['platform'],
79
+ }
80
+ }.compact
81
+
82
+ data.each {|item|
83
+ item['count'] = data.select {|i|
84
+ i[:name] == item[:name] and i[:platform] == item[:platform]
85
+ }.count
86
+ }
87
+
88
+ [
89
+ :class_platforms => data,
90
+ ]
91
+ end
92
+
93
+ def self.example
94
+ # this method is used to generate a table filled with randomized data to
95
+ # make it easier to write data aggregation queries without access to the
96
+ # actual private data that users have submitted.
97
+
98
+ platforms = ['RedHat', 'Debian', 'Windows', 'Suse', 'FreeBSD', 'Darwin', 'Archlinux', 'AIX']
99
+ classes = ['', '::Config', '::Service', '::Server', '::Client', '::Packages']
100
+
101
+ data = Dropsonde::Cache.modules
102
+ .sample(rand(35))
103
+ .map do |item|
104
+ name = item.split('-').last.capitalize + classes.sample
105
+
106
+ rand(5).times.map do
107
+ {
108
+ :name => name,
109
+ :platform => platforms.sample,
110
+ :count => rand(1000),
111
+ }
112
+ end
113
+ end.flatten
114
+
115
+ [
116
+ :class_platforms => data.uniq,
117
+ ]
118
+ end
119
+
120
+ def self.cleanup
121
+ # run just after generating this metric
122
+ end
123
+ end
@@ -1,3 +1,3 @@
1
1
  class Dropsonde
2
- VERSION = '0.0.4'
2
+ VERSION = '0.0.5'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dropsonde
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Ford
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-26 00:00:00.000000000 Z
11
+ date: 2020-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -153,7 +153,9 @@ files:
153
153
  - lib/dropsonde/cache.rb
154
154
  - lib/dropsonde/metrics.rb
155
155
  - lib/dropsonde/metrics/dependencies.rb
156
+ - lib/dropsonde/metrics/environments.rb
156
157
  - lib/dropsonde/metrics/modules.rb
158
+ - lib/dropsonde/metrics/platforms.rb
157
159
  - lib/dropsonde/metrics/puppetfiles.rb
158
160
  - lib/dropsonde/monkeypatches.rb
159
161
  - lib/dropsonde/version.rb