quixoten-puppetdb-terminus 2.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +202 -0
- data/README.md +29 -0
- data/Rakefile +16 -0
- data/lib/puppet/application/storeconfigs.rb +4 -0
- data/lib/puppet/face/node/deactivate.rb +38 -0
- data/lib/puppet/face/node/status.rb +83 -0
- data/lib/puppet/face/storeconfigs.rb +179 -0
- data/lib/puppet/indirector/catalog/puppetdb.rb +350 -0
- data/lib/puppet/indirector/facts/puppetdb.rb +134 -0
- data/lib/puppet/indirector/facts/puppetdb_apply.rb +25 -0
- data/lib/puppet/indirector/node/puppetdb.rb +22 -0
- data/lib/puppet/indirector/resource/puppetdb.rb +107 -0
- data/lib/puppet/reports/puppetdb.rb +186 -0
- data/lib/puppet/util/puppetdb/blacklist.rb +35 -0
- data/lib/puppet/util/puppetdb/char_encoding.rb +212 -0
- data/lib/puppet/util/puppetdb/command.rb +113 -0
- data/lib/puppet/util/puppetdb/command_names.rb +8 -0
- data/lib/puppet/util/puppetdb/config.rb +112 -0
- data/lib/puppet/util/puppetdb/global_check.rb +31 -0
- data/lib/puppet/util/puppetdb.rb +108 -0
- data/lib/puppetdb/terminus/version.rb +5 -0
- data/lib/puppetdb/terminus.rb +6 -0
- data/lib/puppetdb-terminus.rb +1 -0
- data/lib/quixoten-puppetdb-terminus.rb +1 -0
- data/puppetdb-terminus.gemspec +23 -0
- metadata +99 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'puppet/indirector/facts/puppetdb'
|
2
|
+
|
3
|
+
# This class provides an alternative implementation of the Facts::Puppetdb
|
4
|
+
# terminus that better suits execution via `puppet apply`.
|
5
|
+
#
|
6
|
+
# This terminus is designed to be used as a cache terminus, to ensure that facts
|
7
|
+
# are stored in PuppetDB. It does not act as a real cache itself however, it
|
8
|
+
# tells Puppet to fallback to the `terminus` instead.
|
9
|
+
class Puppet::Node::Facts::PuppetdbApply < Puppet::Node::Facts::Puppetdb
|
10
|
+
attr_writer :dbstored
|
11
|
+
|
12
|
+
# Here we override the normal save, only saving the first time, as a `save`
|
13
|
+
# can be called multiple times in a puppet run.
|
14
|
+
def save(args)
|
15
|
+
unless @dbstored
|
16
|
+
@dbstored = true
|
17
|
+
super(args)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# By returning nil, we force puppet to use the real terminus.
|
22
|
+
def find(args)
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'puppet/node'
|
2
|
+
require 'puppet/indirector/rest'
|
3
|
+
require 'puppet/util/puppetdb'
|
4
|
+
|
5
|
+
class Puppet::Node::Puppetdb < Puppet::Indirector::REST
|
6
|
+
include Puppet::Util::Puppetdb
|
7
|
+
|
8
|
+
# Run initial checks
|
9
|
+
def initialize
|
10
|
+
Puppet::Util::Puppetdb::GlobalCheck.run
|
11
|
+
end
|
12
|
+
|
13
|
+
def find(request)
|
14
|
+
end
|
15
|
+
|
16
|
+
def save(request)
|
17
|
+
end
|
18
|
+
|
19
|
+
def destroy(request)
|
20
|
+
submit_command(request.key, request.key, CommandDeactivateNode, 2)
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'puppet/indirector/rest'
|
2
|
+
require 'puppet/util/puppetdb'
|
3
|
+
require 'json'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
class Puppet::Resource::Puppetdb < Puppet::Indirector::REST
|
7
|
+
include Puppet::Util::Puppetdb
|
8
|
+
|
9
|
+
# Run initial checks
|
10
|
+
def initialize
|
11
|
+
Puppet::Util::Puppetdb::GlobalCheck.run
|
12
|
+
end
|
13
|
+
|
14
|
+
def search(request)
|
15
|
+
profile "resource#search" do
|
16
|
+
type = request.key
|
17
|
+
host = request.options[:host]
|
18
|
+
filter = request.options[:filter]
|
19
|
+
scope = request.options[:scope]
|
20
|
+
|
21
|
+
# At minimum, we want to filter to the right type of exported resources.
|
22
|
+
expr = ['and',
|
23
|
+
['=', 'type', type],
|
24
|
+
['=', 'exported', true],
|
25
|
+
['not',
|
26
|
+
['=', 'certname', host]]]
|
27
|
+
|
28
|
+
filter_expr = build_expression(filter)
|
29
|
+
expr << filter_expr if filter_expr
|
30
|
+
|
31
|
+
query_param = CGI.escape(expr.to_json)
|
32
|
+
|
33
|
+
begin
|
34
|
+
url = "/v3/resources?query=#{query_param}"
|
35
|
+
response = profile "Resources query: #{URI.unescape(url)}" do
|
36
|
+
http_get(request, url, headers)
|
37
|
+
end
|
38
|
+
log_x_deprecation_header(response)
|
39
|
+
|
40
|
+
unless response.is_a? Net::HTTPSuccess
|
41
|
+
# Newline characters cause an HTTP error, so strip them
|
42
|
+
raise "[#{response.code} #{response.message}] #{response.body.gsub(/[\r\n]/, '')}"
|
43
|
+
end
|
44
|
+
rescue => e
|
45
|
+
raise Puppet::Error, "Could not retrieve resources from the PuppetDB at #{self.class.server}:#{self.class.port}: #{e}"
|
46
|
+
end
|
47
|
+
|
48
|
+
resources = profile "Parse resource query response (size: #{response.body.size})" do
|
49
|
+
JSON.load(response.body)
|
50
|
+
end
|
51
|
+
|
52
|
+
profile "Build up collected resource objects (count: #{resources.count})" do
|
53
|
+
resources.map do |res|
|
54
|
+
params = res['parameters'] || {}
|
55
|
+
params = params.map do |name,value|
|
56
|
+
Puppet::Parser::Resource::Param.new(:name => name, :value => value)
|
57
|
+
end
|
58
|
+
attrs = {:parameters => params, :scope => scope}
|
59
|
+
result = Puppet::Parser::Resource.new(res['type'], res['title'], attrs)
|
60
|
+
result.collector_id = "#{res['certname']}|#{res['type']}|#{res['title']}"
|
61
|
+
result
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def build_expression(filter)
|
68
|
+
return nil unless filter
|
69
|
+
|
70
|
+
lhs, op, rhs = filter
|
71
|
+
|
72
|
+
case op
|
73
|
+
when '==', '!='
|
74
|
+
build_predicate(op, lhs, rhs)
|
75
|
+
when 'and', 'or'
|
76
|
+
build_join(op, lhs, rhs)
|
77
|
+
else
|
78
|
+
raise Puppet::Error, "Operator #{op} in #{filter.inspect} not supported"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def build_predicate(op, field, value)
|
83
|
+
# Title and tag aren't parameters, so we have to special-case them.
|
84
|
+
expr = case field
|
85
|
+
when "tag"
|
86
|
+
# Tag queries are case-insensitive, so downcase them
|
87
|
+
["=", "tag", value.downcase]
|
88
|
+
when "title"
|
89
|
+
["=", "title", value]
|
90
|
+
else
|
91
|
+
["=", ['parameter', field], value]
|
92
|
+
end
|
93
|
+
|
94
|
+
op == '!=' ? ['not', expr] : expr
|
95
|
+
end
|
96
|
+
|
97
|
+
def build_join(op, lhs, rhs)
|
98
|
+
lhs = build_expression(lhs)
|
99
|
+
rhs = build_expression(rhs)
|
100
|
+
|
101
|
+
[op, lhs, rhs]
|
102
|
+
end
|
103
|
+
|
104
|
+
def headers
|
105
|
+
{'Accept' => 'application/json'}
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
require 'puppet'
|
2
|
+
require 'puppet/util/puppetdb'
|
3
|
+
require 'puppet/util/puppetdb/command_names'
|
4
|
+
|
5
|
+
Puppet::Reports.register_report(:puppetdb) do
|
6
|
+
include Puppet::Util::Puppetdb
|
7
|
+
|
8
|
+
Puppet::Util::Puppetdb::GlobalCheck.run
|
9
|
+
|
10
|
+
CommandStoreReport = Puppet::Util::Puppetdb::CommandNames::CommandStoreReport
|
11
|
+
|
12
|
+
desc <<-DESC
|
13
|
+
Send report information to PuppetDB via the REST API. Reports are serialized to
|
14
|
+
JSON format, and then submitted to puppetdb using the '#{CommandStoreReport}'
|
15
|
+
command.
|
16
|
+
DESC
|
17
|
+
|
18
|
+
|
19
|
+
def process
|
20
|
+
profile "report#process" do
|
21
|
+
submit_command(self.host, report_to_hash, CommandStoreReport, 3)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# TODO: It seems unfortunate that we have to access puppet_version and
|
26
|
+
# report_format directly as instance variables. I've filed the following
|
27
|
+
# ticket / pull req against puppet to expose them via accessors, which
|
28
|
+
# seems more consistent and safer for the long-term. However, for reasons
|
29
|
+
# relating to backwards compatibility we won't be able to switch over to
|
30
|
+
# the accessors until version 3.x of puppet is our oldest supported version.
|
31
|
+
#
|
32
|
+
# This was resolved in puppet 3.x via ticket #16139 (puppet pull request #1073).
|
33
|
+
|
34
|
+
# @api private
|
35
|
+
def report_format
|
36
|
+
@report_format
|
37
|
+
end
|
38
|
+
|
39
|
+
# @api private
|
40
|
+
def puppet_version
|
41
|
+
@puppet_version
|
42
|
+
end
|
43
|
+
|
44
|
+
# Convert `self` (an instance of `Puppet::Transaction::Report`) to a hash
|
45
|
+
# suitable for sending over the wire to PuppetDB
|
46
|
+
#
|
47
|
+
# @api private
|
48
|
+
def report_to_hash
|
49
|
+
profile "Convert report to wire format hash" do
|
50
|
+
add_v4_fields_to_report(
|
51
|
+
{
|
52
|
+
"certname" => host,
|
53
|
+
"puppet-version" => puppet_version,
|
54
|
+
"report-format" => report_format,
|
55
|
+
"configuration-version" => configuration_version.to_s,
|
56
|
+
"start-time" => Puppet::Util::Puppetdb.to_wire_time(time),
|
57
|
+
"end-time" => Puppet::Util::Puppetdb.to_wire_time(time + run_duration),
|
58
|
+
"resource-events" => build_events_list,
|
59
|
+
"environment" => environment,
|
60
|
+
})
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# @api private
|
65
|
+
def build_events_list
|
66
|
+
profile "Build events list (count: #{resource_statuses.count})" do
|
67
|
+
filter_events(resource_statuses.inject([]) do |events, status_entry|
|
68
|
+
_, status = *status_entry
|
69
|
+
if ! (status.events.empty?)
|
70
|
+
events.concat(status.events.map { |event| event_to_hash(status, event) })
|
71
|
+
elsif status.skipped
|
72
|
+
events.concat([fabricate_event(status, "skipped")])
|
73
|
+
elsif status.failed
|
74
|
+
# PP-254:
|
75
|
+
# We have to fabricate resource events here due to a bug/s in report providers
|
76
|
+
# that causes them not to include events on a resource status that has failed.
|
77
|
+
# When PuppetDB is able to make a hard break from older version of Puppet that
|
78
|
+
# have this bug, we can remove this behavior.
|
79
|
+
events.concat([fabricate_event(status, "failure")])
|
80
|
+
end
|
81
|
+
events
|
82
|
+
end)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# @api private
|
87
|
+
def run_duration
|
88
|
+
# TODO: this is wrong in puppet. I am consistently seeing reports where
|
89
|
+
# start-time + this value is less than the timestamp on the individual
|
90
|
+
# resource events. Not sure what the best short-term fix is yet; the long
|
91
|
+
# term fix is obviously to make the correct data available in puppet.
|
92
|
+
# I've filed a ticket against puppet here:
|
93
|
+
# http://projects.puppetlabs.com/issues/16480
|
94
|
+
#
|
95
|
+
# NOTE: failed reports have an empty metrics hash. Just send 0 for run time,
|
96
|
+
# since we don't have access to any better information.
|
97
|
+
if metrics["time"] and metrics["time"]["total"]
|
98
|
+
metrics["time"]["total"]
|
99
|
+
else
|
100
|
+
raise Puppet::Error, "Report from #{host} contained no metrics, which is often caused by a failed catalog compilation. Unable to process."
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Convert an instance of `Puppet::Transaction::Event` to a hash
|
105
|
+
# suitable for sending over the wire to PuppetDB
|
106
|
+
#
|
107
|
+
# @api private
|
108
|
+
def event_to_hash(resource_status, event)
|
109
|
+
add_v4_fields_to_event(resource_status,
|
110
|
+
{
|
111
|
+
"status" => event.status,
|
112
|
+
"timestamp" => Puppet::Util::Puppetdb.to_wire_time(event.time),
|
113
|
+
"resource-type" => resource_status.resource_type,
|
114
|
+
"resource-title" => resource_status.title,
|
115
|
+
"property" => event.property,
|
116
|
+
"new-value" => event.desired_value,
|
117
|
+
"old-value" => event.previous_value,
|
118
|
+
"message" => event.message,
|
119
|
+
"file" => resource_status.file,
|
120
|
+
"line" => resource_status.line
|
121
|
+
})
|
122
|
+
end
|
123
|
+
|
124
|
+
# Given an instance of `Puppet::Resource::Status` and a status string,
|
125
|
+
# this method fabricates a PuppetDB event object with the provided
|
126
|
+
# `"status"`.
|
127
|
+
#
|
128
|
+
# @api private
|
129
|
+
def fabricate_event(resource_status, event_status)
|
130
|
+
add_v4_fields_to_event(resource_status,
|
131
|
+
{
|
132
|
+
"status" => event_status,
|
133
|
+
"timestamp" => Puppet::Util::Puppetdb.to_wire_time(resource_status.time),
|
134
|
+
"resource-type" => resource_status.resource_type,
|
135
|
+
"resource-title" => resource_status.title,
|
136
|
+
"property" => nil,
|
137
|
+
"new-value" => nil,
|
138
|
+
"old-value" => nil,
|
139
|
+
"message" => nil,
|
140
|
+
"file" => resource_status.file,
|
141
|
+
"line" => resource_status.line
|
142
|
+
})
|
143
|
+
end
|
144
|
+
|
145
|
+
# Backwards compatibility with versions of Puppet prior to report format 4
|
146
|
+
#
|
147
|
+
# @api private
|
148
|
+
def add_v4_fields_to_report(report_hash)
|
149
|
+
if report_format >= 4
|
150
|
+
report_hash.merge("transaction-uuid" => transaction_uuid)
|
151
|
+
else
|
152
|
+
report_hash.merge("transaction-uuid" => nil)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Backwards compatibility with versions of Puppet prior to report format 4
|
157
|
+
#
|
158
|
+
# @api private
|
159
|
+
def add_v4_fields_to_event(resource_status, event_hash)
|
160
|
+
if report_format >= 4
|
161
|
+
event_hash.merge("containment-path" => resource_status.containment_path)
|
162
|
+
else
|
163
|
+
event_hash.merge("containment-path" => nil)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# Filter out blacklisted events, if we're configured to do so
|
168
|
+
#
|
169
|
+
# @api private
|
170
|
+
def filter_events(events)
|
171
|
+
if config.ignore_blacklisted_events?
|
172
|
+
profile "Filter blacklisted events" do
|
173
|
+
events.select { |e| ! config.is_event_blacklisted?(e) }
|
174
|
+
end
|
175
|
+
else
|
176
|
+
events
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Helper method for accessing the puppetdb configuration
|
181
|
+
#
|
182
|
+
# @api private
|
183
|
+
def config
|
184
|
+
Puppet::Util::Puppetdb.config
|
185
|
+
end
|
186
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Puppet::Util::Puppetdb
|
2
|
+
class Blacklist
|
3
|
+
|
4
|
+
BlacklistedEvent = Struct.new(:resource_type, :resource_title, :status, :property)
|
5
|
+
|
6
|
+
# Initialize our blacklist of events to filter out of reports. This is needed
|
7
|
+
# because older versions of puppet always generate a swath of (meaningless)
|
8
|
+
# 'skipped' Schedule events on every agent run. As of puppet 3.3, these
|
9
|
+
# events should no longer be generated, but this is here for backward compat.
|
10
|
+
BlacklistedEvents =
|
11
|
+
[BlacklistedEvent.new("Schedule", "never", "skipped", nil),
|
12
|
+
BlacklistedEvent.new("Schedule", "puppet", "skipped", nil),
|
13
|
+
BlacklistedEvent.new("Schedule", "hourly", "skipped", nil),
|
14
|
+
BlacklistedEvent.new("Schedule", "daily", "skipped", nil),
|
15
|
+
BlacklistedEvent.new("Schedule", "weekly", "skipped", nil),
|
16
|
+
BlacklistedEvent.new("Schedule", "monthly", "skipped", nil)]
|
17
|
+
|
18
|
+
def initialize(events)
|
19
|
+
@events = events.inject({}) do |m, e|
|
20
|
+
m[e.resource_type] ||= {}
|
21
|
+
m[e.resource_type][e.resource_title] ||= {}
|
22
|
+
m[e.resource_type][e.resource_title][e.status] ||= {}
|
23
|
+
m[e.resource_type][e.resource_title][e.status][e.property] = true
|
24
|
+
m
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def is_event_blacklisted?(event)
|
29
|
+
@events.fetch(event["resource-type"], {}).
|
30
|
+
fetch(event["resource-title"], {}).
|
31
|
+
fetch(event["status"], {}).
|
32
|
+
fetch(event["property"], false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
require 'puppet'
|
2
|
+
|
3
|
+
module Puppet
|
4
|
+
module Util
|
5
|
+
module Puppetdb
|
6
|
+
module CharEncoding
|
7
|
+
|
8
|
+
|
9
|
+
# Some of this code is modeled after:
|
10
|
+
# https://github.com/brianmario/utf8/blob/ef10c033/ext/utf8/utf8proc.c
|
11
|
+
# https://github.com/brianmario/utf8/blob/ef10c033/ext/utf8/string_utf8.c
|
12
|
+
|
13
|
+
Utf8CharLens = [
|
14
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
15
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
16
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
17
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
18
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
19
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
20
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
21
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
27
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
28
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
29
|
+
4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
30
|
+
]
|
31
|
+
|
32
|
+
Utf8ReplacementChar = [ 0xEF, 0xBF, 0xBD ].pack("c*")
|
33
|
+
|
34
|
+
|
35
|
+
def self.utf8_string(str)
|
36
|
+
if RUBY_VERSION =~ /1.8/
|
37
|
+
# Ruby 1.8 doesn't have String#encode and related methods, and there
|
38
|
+
# appears to be a bug in iconv that will interpret some byte sequences
|
39
|
+
# as 6-byte characters. Thus, we are forced to resort to some unfortunate
|
40
|
+
# manual chicanery.
|
41
|
+
warn_if_changed(str, ruby18_clean_utf8(str))
|
42
|
+
elsif str.encoding == Encoding::UTF_8
|
43
|
+
# If we get here, we're in ruby 1.9+, so we have the string encoding methods
|
44
|
+
# available. However, just because a ruby String object is already
|
45
|
+
# marked as UTF-8, that doesn't guarantee that its contents are actually
|
46
|
+
# valid; and if you call ruby's ".encode" method with an encoding of
|
47
|
+
# "utf-8" for a String that ruby already believes is UTF-8, ruby
|
48
|
+
# seems to optimize that to be a no-op. So, we have to do some more
|
49
|
+
# complex handling...
|
50
|
+
|
51
|
+
# If the string already has valid encoding then we're fine.
|
52
|
+
return str if str.valid_encoding?
|
53
|
+
|
54
|
+
# If not, we basically have to walk over the characters and replace
|
55
|
+
# them by hand.
|
56
|
+
warn_if_changed(str, str.each_char.map { |c| c.valid_encoding? ? c : "\ufffd"}.join)
|
57
|
+
else
|
58
|
+
# if we get here, we're ruby 1.9 and the current string is *not* encoded
|
59
|
+
# as UTF-8. Thus we can actually rely on ruby's "encode" method.
|
60
|
+
begin
|
61
|
+
str.encode('UTF-8')
|
62
|
+
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
|
63
|
+
# If we got an exception, the string is either invalid or not
|
64
|
+
# convertible to UTF-8, so drop those bytes.
|
65
|
+
warn_if_changed(str, str.encode('UTF-8', :invalid => :replace, :undef => :replace))
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# @api private
|
71
|
+
def self.warn_if_changed(str, converted_str)
|
72
|
+
if converted_str != str
|
73
|
+
Puppet.warning "Ignoring invalid UTF-8 byte sequences in data to be sent to PuppetDB"
|
74
|
+
end
|
75
|
+
converted_str
|
76
|
+
end
|
77
|
+
|
78
|
+
# @api private
|
79
|
+
def self.ruby18_clean_utf8(str)
|
80
|
+
#iconv_to_utf8(str)
|
81
|
+
#ruby18_manually_clean_utf8(str)
|
82
|
+
|
83
|
+
# So, we've tried doing this UTF8 cleaning for ruby 1.8 a few different
|
84
|
+
# ways. Doing it via IConv, we don't do a good job of handling characters
|
85
|
+
# whose codepoints would exceed the legal maximum for UTF-8. Doing it via
|
86
|
+
# our manual scrubbing process is slower and doesn't catch overlong
|
87
|
+
# encodings. Since this code really shouldn't even exist in the first place
|
88
|
+
# we've decided to simply compose the two scrubbing methods for now, rather
|
89
|
+
# than trying to add detection of overlong encodings. It'd be a non-trivial
|
90
|
+
# chunk of code, and it'd have to do a lot of bitwise arithmetic (which Ruby
|
91
|
+
# is not blazingly fast at).
|
92
|
+
ruby18_manually_clean_utf8(iconv_to_utf8(str))
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# @todo we're not using this anymore, but I wanted to leave it around
|
97
|
+
# for a little while just to make sure that the new code pans out.
|
98
|
+
# @api private
|
99
|
+
def self.iconv_to_utf8(str)
|
100
|
+
iconv = Iconv.new('UTF-8//IGNORE', 'UTF-8')
|
101
|
+
|
102
|
+
# http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/
|
103
|
+
iconv.iconv(str + " ")[0..-2]
|
104
|
+
end
|
105
|
+
|
106
|
+
# @api private
|
107
|
+
def self.get_char_len(byte)
|
108
|
+
Utf8CharLens[byte]
|
109
|
+
end
|
110
|
+
|
111
|
+
# Manually cleans a string by stripping any byte sequences that are
|
112
|
+
# not valid UTF-8 characters. If you'd prefer for the invalid bytes to be
|
113
|
+
# replaced with the unicode replacement character rather than being stripped,
|
114
|
+
# you may pass `false` for the optional second parameter (`strip`, which
|
115
|
+
# defaults to `true`).
|
116
|
+
#
|
117
|
+
# @api private
|
118
|
+
def self.ruby18_manually_clean_utf8(str, strip = true)
|
119
|
+
|
120
|
+
# This is a hack to allow this code to work with either ruby 1.8 or 1.9,
|
121
|
+
# which is useful for debugging and benchmarking. For more info see the
|
122
|
+
# comments in the #get_byte method below.
|
123
|
+
@has_get_byte = str.respond_to?(:getbyte)
|
124
|
+
|
125
|
+
|
126
|
+
i = 0
|
127
|
+
len = str.length
|
128
|
+
result = ""
|
129
|
+
|
130
|
+
while i < len
|
131
|
+
byte = get_byte(str, i)
|
132
|
+
|
133
|
+
i += 1
|
134
|
+
|
135
|
+
char_len = get_char_len(byte)
|
136
|
+
case char_len
|
137
|
+
when 0
|
138
|
+
result.concat(Utf8ReplacementChar) unless strip
|
139
|
+
when 1
|
140
|
+
result << byte
|
141
|
+
when 2..4
|
142
|
+
ruby18_handle_multibyte_char(result, byte, str, i, char_len, strip)
|
143
|
+
i += char_len - 1
|
144
|
+
else
|
145
|
+
raise Puppet::DevError, "Unhandled UTF8 char length: '#{char_len}'"
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
result
|
151
|
+
end
|
152
|
+
|
153
|
+
# @api private
|
154
|
+
def self.ruby18_handle_multibyte_char(result_str, byte, str, i, char_len, strip = true)
|
155
|
+
# keeping an array of bytes for now because we need to do some
|
156
|
+
# bitwise math on them.
|
157
|
+
char_additional_bytes = []
|
158
|
+
|
159
|
+
# If we don't have enough bytes left to read the full character, we
|
160
|
+
# put on a replacement character and bail.
|
161
|
+
if i + (char_len - 1) > str.length
|
162
|
+
result_str.concat(Utf8ReplacementChar) unless strip
|
163
|
+
return
|
164
|
+
end
|
165
|
+
|
166
|
+
# we've already read the first byte, so we need to set up a range
|
167
|
+
# from 0 to (n-2); e.g. if it's a 2-byte char, we will have a range
|
168
|
+
# from 0 to 0 which will result in reading 1 more byte
|
169
|
+
(0..char_len - 2).each do |x|
|
170
|
+
char_additional_bytes << get_byte(str, i + x)
|
171
|
+
end
|
172
|
+
|
173
|
+
if (is_valid_multibyte_suffix(byte, char_additional_bytes))
|
174
|
+
result_str << byte
|
175
|
+
result_str.concat(char_additional_bytes.pack("c*"))
|
176
|
+
else
|
177
|
+
result_str.concat(Utf8ReplacementChar) unless strip
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# @api private
|
182
|
+
def self.is_valid_multibyte_suffix(byte, additional_bytes)
|
183
|
+
# This is heinous, but the UTF-8 spec says that codepoints greater than
|
184
|
+
# 0x10FFFF are illegal. The first character that is over that limit is
|
185
|
+
# 0xF490bfbf, so if the first byte is F4 then we have to check for
|
186
|
+
# that condition.
|
187
|
+
if byte == 0xF4
|
188
|
+
val = additional_bytes.inject(0) { |result, b | (result << 8) + b}
|
189
|
+
if val >= 0x90bfbf
|
190
|
+
return false
|
191
|
+
end
|
192
|
+
end
|
193
|
+
additional_bytes.all? { |b| ((b & 0xC0) == 0x80) }
|
194
|
+
end
|
195
|
+
|
196
|
+
# @api private
|
197
|
+
def self.get_byte(str, index)
|
198
|
+
# This method is a hack to allow this code to work with either ruby 1.8
|
199
|
+
# or 1.9. In production this code path should never be exercised by
|
200
|
+
# 1.9 because it has a much more sane way to accomplish our goal, but
|
201
|
+
# for testing, it is useful to be able to run the 1.8 codepath in 1.9.
|
202
|
+
if @has_get_byte
|
203
|
+
str.getbyte(index)
|
204
|
+
else
|
205
|
+
str[index]
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|