dropsonde 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a0fd6246263a8a375056fe6fdcb3bcc6b1db8cc6bddc7ea80f9dc870b48c85bc
4
- data.tar.gz: b19a11ca730df12f61e95ae1f7bb81b9bbb65d8251966f3bcdd1fb203dacb96d
3
+ metadata.gz: 643def01638761803e04cf758bbf627ac821bb091fc7dad50c5b161251534728
4
+ data.tar.gz: f0c8a36548214da7beeecee9dc583dcd8d81eceb640b967078fadf5b2978f987
5
5
  SHA512:
6
- metadata.gz: 53c7d74ceb0aa6358ddb12828faf70b840a33a4bbd30977317023050833a5db24d2b72eefc1eecfc1008e6e8eae80249b93a710edc098045c333787e4faa0ef2
7
- data.tar.gz: 6644219dfdcbf761cb2c85bf446266ddf38d343d7a84d969ae88ffb30068c5faa647393457e1e255cd5fcbc2b8acb58f52d9efcd539c5713541932df79f0a807
6
+ metadata.gz: 7c260e1617666fb83d7b0be1a3d73ad13aaee5c33844d5eb4117b186c6eec23a9aad6dc03ff3df393168b6a3715bf492ce677624fdbe536e4961d956d1e9cdcb
7
+ data.tar.gz: cffbe7901afe08afa593ee73fa749358257efcd03d612564b3aac507684897d2d0db3e504a599d1eab7251508c3f455e236985501ea9f6722f108bb02cf80e2e
data/README.md CHANGED
@@ -43,10 +43,10 @@ schema so the system cannot gather any data that's not described in the schema.
43
43
  See the full schema of all enabled plugins by running the command:
44
44
 
45
45
  ```
46
- $ dropsonde schema
46
+ $ dropsonde dev schema
47
47
  ```
48
48
 
49
- All information in the report is keyed off a non-reversible SHA512 hash site-id
49
+ All information in the report is keyed off a non-reversible SHA512 hashed site-id
50
50
  to make it unidentifiable; this report cannot be linked back to you or to your
51
51
  infrastructure. Now that said, we know that the more bits of data shared about a
52
52
  specific site, the easier it is to fingerprint that site. See
@@ -63,6 +63,8 @@ For example, this aggregated data might include records that show a count of how
63
63
  many sites are using various combinations of modules together, but it will never
64
64
  include a record showing the full list of modules that any single site is using.
65
65
 
66
+ ![Data aggregation workflow](./aggregation.png)
67
+
66
68
  With your own Google Cloud account, you can use that [dataset](https://console.cloud.google.com/bigquery?p=dataops-puppet-public-data&d=community&t=forge_modules&page=table)
67
69
  in your own tooling and you can see/contribute to the aggregation queries in its
68
70
  own [repository](https://github.com/puppetlabs/dropsonde-aggregation).
@@ -100,7 +102,7 @@ will not report the `:puppetfiles` metrics.
100
102
  ``` yaml
101
103
  ---
102
104
  :update: false
103
- :blacklist:
105
+ :disable:
104
106
  - puppetfiles
105
107
  ```
106
108
 
@@ -22,7 +22,10 @@ class Dropsonde
22
22
  flag [:ttl], :default_value => 7, :type => Integer
23
23
 
24
24
  desc 'List of metrics to omit'
25
- flag [:blacklist, :b], :type => Array
25
+ flag [:disable, :d], :type => Array
26
+
27
+ desc 'Only load these metrics'
28
+ flag [:enable, :e], :type => Array
26
29
 
27
30
  desc 'Any number or string used to generate the randomized site ID.'
28
31
  flag [:seed]
@@ -102,7 +105,7 @@ class Dropsonde
102
105
  This is in jsonl format for direct upload to BigQuery."
103
106
  t.command :example do |c|
104
107
  c.desc 'How many rows to generate'
105
- c.flag [:size], :default_value => 100
108
+ c.flag [:size], :default_value => 100, :type => Integer
106
109
 
107
110
  c.desc 'Filename for the output (in jsonl format).'
108
111
  c.flag [:filename], :default_value => 'example.jsonl'
@@ -4,7 +4,13 @@ class Dropsonde::Metrics
4
4
  extend LittlePlugger( :path => 'dropsonde/metrics', :module => Dropsonde::Metrics)
5
5
 
6
6
  def initialize
7
- Dropsonde::Metrics.disregard_plugins(*Dropsonde.settings[:blacklist])
7
+ if Dropsonde.settings[:enable]
8
+ Dropsonde.settings[:disable] ||= []
9
+ disable = Dropsonde::Metrics.plugins.keys - Dropsonde.settings[:enable].map(&:to_sym)
10
+ Dropsonde.settings[:disable].concat disable
11
+ end
12
+
13
+ Dropsonde::Metrics.disregard_plugins(*Dropsonde.settings[:disable])
8
14
  Dropsonde::Metrics.initialize_plugins
9
15
  end
10
16
 
@@ -28,9 +34,9 @@ class Dropsonde::Metrics
28
34
  str << plugin.description.strip
29
35
  str << "\n\n"
30
36
  end
31
- if Dropsonde.settings[:blacklist]
37
+ if Dropsonde.settings[:disable]
32
38
  str << "Disabled plugins:\n"
33
- str << " #{Dropsonde.settings[:blacklist].join(', ')}"
39
+ str << " #{Dropsonde.settings[:disable].join(', ')}"
34
40
  end
35
41
  str
36
42
  end
@@ -50,16 +56,16 @@ class Dropsonde::Metrics
50
56
  Dropsonde::Metrics.plugins.each do |name, plugin|
51
57
  schema = plugin.schema
52
58
 
53
- plugin.setup
59
+ plugin.setup if plugin.respond_to? :setup
54
60
  data = sanity_check_data(plugin, plugin.run)
55
- plugin.cleanup
61
+ plugin.cleanup if plugin.respond_to? :cleanup
56
62
 
57
63
  str << plugin.name+"\n"
58
64
  str << "-------------------------------\n"
59
65
  str << plugin.description
60
66
  data.each do |row|
61
67
  key = row.keys.first
62
- values = row.values.first
68
+ values = row.values.flatten
63
69
 
64
70
  desc = schema.find {|item| item[:name].to_sym == key.to_sym}[:description]
65
71
  str << "- #{key}: #{desc}\n"
@@ -112,6 +118,9 @@ class Dropsonde::Metrics
112
118
  # We accept both the plugin and data gathered from the plugin so that
113
119
  # we can sanitize both data and example data
114
120
  def sanity_check_data(plugin, data)
121
+ # This allows plugin authors to easily skip metrics with no results
122
+ return [] if data.nil?
123
+
115
124
  keys_data = data.map {|item| item.keys }.flatten.map(&:to_s)
116
125
  keys_schema = plugin.schema.map {|item| item[:name] }
117
126
 
@@ -0,0 +1,50 @@
1
+ class Dropsonde::Metrics::Environments
2
+ def self.initialize_environments
3
+ # Require any libraries needed here -- no need to load puppet or puppetdb;
4
+ # they're already loaded. This hook is named after the class name.
5
+ # All plugins are initialized at startup before any metrics are generated.
6
+ end
7
+
8
+ def self.description
9
+ # This is a Ruby squiggle heredoc; just a multi-line string with indentation removed
10
+ <<~EOF
11
+ This group of metrics gathers information about environments.
12
+ EOF
13
+ end
14
+
15
+ def self.schema
16
+ # return an array of hashes of a partial schema to be merged into the complete schema
17
+ [
18
+ {
19
+ "description": "The number of environments",
20
+ "mode": "NULLABLE",
21
+ "name": "environment_count",
22
+ "type": "INTEGER"
23
+ }
24
+ ]
25
+ end
26
+
27
+ def self.setup
28
+ # run just before generating this metric
29
+ end
30
+
31
+ def self.run
32
+ # return an array of hashes representing the data to be merged into the combined checkin
33
+ [
34
+ :environment_count => Puppet.lookup(:environments).list.count,
35
+ ]
36
+ end
37
+
38
+ def self.example
39
+ # this method is used to generate a table filled with randomized data to
40
+ # make it easier to write data aggregation queries without access to the
41
+ # actual private data that users have submitted.
42
+ [
43
+ :environment_count => rand(1..100),
44
+ ]
45
+ end
46
+
47
+ def self.cleanup
48
+ # run just after generating this metric
49
+ end
50
+ end
@@ -0,0 +1,123 @@
1
+ class Dropsonde::Metrics::Platforms
2
+ def self.initialize_platforms
3
+ # require any libraries needed here -- no need to load puppet; it's already initialized
4
+ # All plugins are initialized before any metrics are generated.
5
+ end
6
+
7
+ def self.description
8
+ <<~EOF
9
+ This group of metrics generates usage patterns by platform.
10
+ Currently implemented is a list of classes, the platforms
11
+ they are declared on, and a count of each combination.
12
+ EOF
13
+ end
14
+
15
+ def self.schema
16
+ # return an array of hashes of a partial schema to be merged into the complete schema
17
+ # See https://cloud.google.com/bigquery/docs/schemas#specifying_a_json_schema_file
18
+ [
19
+ {
20
+ "fields": [
21
+ {
22
+ "description": "The class name name",
23
+ "mode": "NULLABLE",
24
+ "name": "name",
25
+ "type": "STRING"
26
+ },
27
+ {
28
+ "description": "The osfamily of the node the class is declared on",
29
+ "mode": "NULLABLE",
30
+ "name": "platform",
31
+ "type": "STRING"
32
+ },
33
+ {
34
+ "description": "The number of time this combination is declared",
35
+ "mode": "NULLABLE",
36
+ "name": "count",
37
+ "type": "INTEGER"
38
+ },
39
+ ],
40
+ "description": "List of all classes in the infrastructure and platforms they're declared on.",
41
+ "mode": "REPEATED",
42
+ "name": "class_platforms",
43
+ "type": "RECORD"
44
+ }
45
+ ]
46
+ end
47
+
48
+ def self.setup
49
+ # run just before generating this metric
50
+ end
51
+
52
+ def self.run
53
+ # skip this metric if we don't have an active PuppetDB connection
54
+ return unless Dropsonde.puppetDB
55
+
56
+ classes = Dropsonde.puppetDB.request( '', 'resources[certname, title] { type = "Class" }').data
57
+ facts = Dropsonde.puppetDB.request( '', 'facts[certname, value] { name = "osfamily" }').data
58
+
59
+ # All public Forge modules that are installed.
60
+ modules = Puppet.lookup(:environments).list.map {|env|
61
+ env.modules.select {|mod|
62
+ mod.forge_module?
63
+ }.map {|fmod|
64
+ fmod.name
65
+ }}.flatten.uniq
66
+
67
+ data = classes.map {|item|
68
+ # filter out any that don't come from public Forge modules
69
+ mod = item['title'].split('::').first.downcase
70
+ next unless modules.include? mod
71
+
72
+ item['platform'] = facts.find {|fact|
73
+ fact['certname'] == item['certname']
74
+ }['value']
75
+
76
+ {
77
+ :name => item['title'],
78
+ :platform => item['platform'],
79
+ }
80
+ }.compact
81
+
82
+ data.each {|item|
83
+ item['count'] = data.select {|i|
84
+ i[:name] == item[:name] and i[:platform] == item[:platform]
85
+ }.count
86
+ }
87
+
88
+ [
89
+ :class_platforms => data,
90
+ ]
91
+ end
92
+
93
+ def self.example
94
+ # this method is used to generate a table filled with randomized data to
95
+ # make it easier to write data aggregation queries without access to the
96
+ # actual private data that users have submitted.
97
+
98
+ platforms = ['RedHat', 'Debian', 'Windows', 'Suse', 'FreeBSD', 'Darwin', 'Archlinux', 'AIX']
99
+ classes = ['', '::Config', '::Service', '::Server', '::Client', '::Packages']
100
+
101
+ data = Dropsonde::Cache.modules
102
+ .sample(rand(35))
103
+ .map do |item|
104
+ name = item.split('-').last.capitalize + classes.sample
105
+
106
+ rand(5).times.map do
107
+ {
108
+ :name => name,
109
+ :platform => platforms.sample,
110
+ :count => rand(1000),
111
+ }
112
+ end
113
+ end.flatten
114
+
115
+ [
116
+ :class_platforms => data.uniq,
117
+ ]
118
+ end
119
+
120
+ def self.cleanup
121
+ # run just after generating this metric
122
+ end
123
+ end
@@ -1,3 +1,3 @@
1
1
  class Dropsonde
2
- VERSION = '0.0.4'
2
+ VERSION = '0.0.5'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dropsonde
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Ford
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-26 00:00:00.000000000 Z
11
+ date: 2020-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -153,7 +153,9 @@ files:
153
153
  - lib/dropsonde/cache.rb
154
154
  - lib/dropsonde/metrics.rb
155
155
  - lib/dropsonde/metrics/dependencies.rb
156
+ - lib/dropsonde/metrics/environments.rb
156
157
  - lib/dropsonde/metrics/modules.rb
158
+ - lib/dropsonde/metrics/platforms.rb
157
159
  - lib/dropsonde/metrics/puppetfiles.rb
158
160
  - lib/dropsonde/monkeypatches.rb
159
161
  - lib/dropsonde/version.rb