quixoten-puppetdb-terminus 2.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +202 -0
- data/README.md +29 -0
- data/Rakefile +16 -0
- data/lib/puppet/application/storeconfigs.rb +4 -0
- data/lib/puppet/face/node/deactivate.rb +38 -0
- data/lib/puppet/face/node/status.rb +83 -0
- data/lib/puppet/face/storeconfigs.rb +179 -0
- data/lib/puppet/indirector/catalog/puppetdb.rb +350 -0
- data/lib/puppet/indirector/facts/puppetdb.rb +134 -0
- data/lib/puppet/indirector/facts/puppetdb_apply.rb +25 -0
- data/lib/puppet/indirector/node/puppetdb.rb +22 -0
- data/lib/puppet/indirector/resource/puppetdb.rb +107 -0
- data/lib/puppet/reports/puppetdb.rb +186 -0
- data/lib/puppet/util/puppetdb/blacklist.rb +35 -0
- data/lib/puppet/util/puppetdb/char_encoding.rb +212 -0
- data/lib/puppet/util/puppetdb/command.rb +113 -0
- data/lib/puppet/util/puppetdb/command_names.rb +8 -0
- data/lib/puppet/util/puppetdb/config.rb +112 -0
- data/lib/puppet/util/puppetdb/global_check.rb +31 -0
- data/lib/puppet/util/puppetdb.rb +108 -0
- data/lib/puppetdb/terminus/version.rb +5 -0
- data/lib/puppetdb/terminus.rb +6 -0
- data/lib/puppetdb-terminus.rb +1 -0
- data/lib/quixoten-puppetdb-terminus.rb +1 -0
- data/puppetdb-terminus.gemspec +23 -0
- metadata +99 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'puppet/indirector/facts/puppetdb'
|
2
|
+
|
3
|
+
# This class provides an alternative implementation of the Facts::Puppetdb
|
4
|
+
# terminus that better suits execution via `puppet apply`.
|
5
|
+
#
|
6
|
+
# This terminus is designed to be used as a cache terminus, to ensure that facts
|
7
|
+
# are stored in PuppetDB. It does not act as a real cache itself however, it
|
8
|
+
# tells Puppet to fallback to the `terminus` instead.
|
9
|
+
class Puppet::Node::Facts::PuppetdbApply < Puppet::Node::Facts::Puppetdb
|
10
|
+
attr_writer :dbstored
|
11
|
+
|
12
|
+
# Here we override the normal save, only saving the first time, as a `save`
|
13
|
+
# can be called multiple times in a puppet run.
|
14
|
+
def save(args)
|
15
|
+
unless @dbstored
|
16
|
+
@dbstored = true
|
17
|
+
super(args)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# By returning nil, we force puppet to use the real terminus.
|
22
|
+
def find(args)
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'puppet/node'
|
2
|
+
require 'puppet/indirector/rest'
|
3
|
+
require 'puppet/util/puppetdb'
|
4
|
+
|
5
|
+
class Puppet::Node::Puppetdb < Puppet::Indirector::REST
|
6
|
+
include Puppet::Util::Puppetdb
|
7
|
+
|
8
|
+
# Run initial checks
|
9
|
+
def initialize
|
10
|
+
Puppet::Util::Puppetdb::GlobalCheck.run
|
11
|
+
end
|
12
|
+
|
13
|
+
def find(request)
|
14
|
+
end
|
15
|
+
|
16
|
+
def save(request)
|
17
|
+
end
|
18
|
+
|
19
|
+
def destroy(request)
|
20
|
+
submit_command(request.key, request.key, CommandDeactivateNode, 2)
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'puppet/indirector/rest'
|
2
|
+
require 'puppet/util/puppetdb'
|
3
|
+
require 'json'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
class Puppet::Resource::Puppetdb < Puppet::Indirector::REST
|
7
|
+
include Puppet::Util::Puppetdb
|
8
|
+
|
9
|
+
# Run initial checks
|
10
|
+
def initialize
|
11
|
+
Puppet::Util::Puppetdb::GlobalCheck.run
|
12
|
+
end
|
13
|
+
|
14
|
+
def search(request)
|
15
|
+
profile "resource#search" do
|
16
|
+
type = request.key
|
17
|
+
host = request.options[:host]
|
18
|
+
filter = request.options[:filter]
|
19
|
+
scope = request.options[:scope]
|
20
|
+
|
21
|
+
# At minimum, we want to filter to the right type of exported resources.
|
22
|
+
expr = ['and',
|
23
|
+
['=', 'type', type],
|
24
|
+
['=', 'exported', true],
|
25
|
+
['not',
|
26
|
+
['=', 'certname', host]]]
|
27
|
+
|
28
|
+
filter_expr = build_expression(filter)
|
29
|
+
expr << filter_expr if filter_expr
|
30
|
+
|
31
|
+
query_param = CGI.escape(expr.to_json)
|
32
|
+
|
33
|
+
begin
|
34
|
+
url = "/v3/resources?query=#{query_param}"
|
35
|
+
response = profile "Resources query: #{URI.unescape(url)}" do
|
36
|
+
http_get(request, url, headers)
|
37
|
+
end
|
38
|
+
log_x_deprecation_header(response)
|
39
|
+
|
40
|
+
unless response.is_a? Net::HTTPSuccess
|
41
|
+
# Newline characters cause an HTTP error, so strip them
|
42
|
+
raise "[#{response.code} #{response.message}] #{response.body.gsub(/[\r\n]/, '')}"
|
43
|
+
end
|
44
|
+
rescue => e
|
45
|
+
raise Puppet::Error, "Could not retrieve resources from the PuppetDB at #{self.class.server}:#{self.class.port}: #{e}"
|
46
|
+
end
|
47
|
+
|
48
|
+
resources = profile "Parse resource query response (size: #{response.body.size})" do
|
49
|
+
JSON.load(response.body)
|
50
|
+
end
|
51
|
+
|
52
|
+
profile "Build up collected resource objects (count: #{resources.count})" do
|
53
|
+
resources.map do |res|
|
54
|
+
params = res['parameters'] || {}
|
55
|
+
params = params.map do |name,value|
|
56
|
+
Puppet::Parser::Resource::Param.new(:name => name, :value => value)
|
57
|
+
end
|
58
|
+
attrs = {:parameters => params, :scope => scope}
|
59
|
+
result = Puppet::Parser::Resource.new(res['type'], res['title'], attrs)
|
60
|
+
result.collector_id = "#{res['certname']}|#{res['type']}|#{res['title']}"
|
61
|
+
result
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def build_expression(filter)
|
68
|
+
return nil unless filter
|
69
|
+
|
70
|
+
lhs, op, rhs = filter
|
71
|
+
|
72
|
+
case op
|
73
|
+
when '==', '!='
|
74
|
+
build_predicate(op, lhs, rhs)
|
75
|
+
when 'and', 'or'
|
76
|
+
build_join(op, lhs, rhs)
|
77
|
+
else
|
78
|
+
raise Puppet::Error, "Operator #{op} in #{filter.inspect} not supported"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def build_predicate(op, field, value)
|
83
|
+
# Title and tag aren't parameters, so we have to special-case them.
|
84
|
+
expr = case field
|
85
|
+
when "tag"
|
86
|
+
# Tag queries are case-insensitive, so downcase them
|
87
|
+
["=", "tag", value.downcase]
|
88
|
+
when "title"
|
89
|
+
["=", "title", value]
|
90
|
+
else
|
91
|
+
["=", ['parameter', field], value]
|
92
|
+
end
|
93
|
+
|
94
|
+
op == '!=' ? ['not', expr] : expr
|
95
|
+
end
|
96
|
+
|
97
|
+
def build_join(op, lhs, rhs)
|
98
|
+
lhs = build_expression(lhs)
|
99
|
+
rhs = build_expression(rhs)
|
100
|
+
|
101
|
+
[op, lhs, rhs]
|
102
|
+
end
|
103
|
+
|
104
|
+
def headers
|
105
|
+
{'Accept' => 'application/json'}
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
require 'puppet'
|
2
|
+
require 'puppet/util/puppetdb'
|
3
|
+
require 'puppet/util/puppetdb/command_names'
|
4
|
+
|
5
|
+
Puppet::Reports.register_report(:puppetdb) do
|
6
|
+
include Puppet::Util::Puppetdb
|
7
|
+
|
8
|
+
Puppet::Util::Puppetdb::GlobalCheck.run
|
9
|
+
|
10
|
+
CommandStoreReport = Puppet::Util::Puppetdb::CommandNames::CommandStoreReport
|
11
|
+
|
12
|
+
desc <<-DESC
|
13
|
+
Send report information to PuppetDB via the REST API. Reports are serialized to
|
14
|
+
JSON format, and then submitted to puppetdb using the '#{CommandStoreReport}'
|
15
|
+
command.
|
16
|
+
DESC
|
17
|
+
|
18
|
+
|
19
|
+
def process
|
20
|
+
profile "report#process" do
|
21
|
+
submit_command(self.host, report_to_hash, CommandStoreReport, 3)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# TODO: It seems unfortunate that we have to access puppet_version and
|
26
|
+
# report_format directly as instance variables. I've filed the following
|
27
|
+
# ticket / pull req against puppet to expose them via accessors, which
|
28
|
+
# seems more consistent and safer for the long-term. However, for reasons
|
29
|
+
# relating to backwards compatibility we won't be able to switch over to
|
30
|
+
# the accessors until version 3.x of puppet is our oldest supported version.
|
31
|
+
#
|
32
|
+
# This was resolved in puppet 3.x via ticket #16139 (puppet pull request #1073).
|
33
|
+
|
34
|
+
# @api private
|
35
|
+
def report_format
|
36
|
+
@report_format
|
37
|
+
end
|
38
|
+
|
39
|
+
# @api private
|
40
|
+
def puppet_version
|
41
|
+
@puppet_version
|
42
|
+
end
|
43
|
+
|
44
|
+
# Convert `self` (an instance of `Puppet::Transaction::Report`) to a hash
|
45
|
+
# suitable for sending over the wire to PuppetDB
|
46
|
+
#
|
47
|
+
# @api private
|
48
|
+
def report_to_hash
|
49
|
+
profile "Convert report to wire format hash" do
|
50
|
+
add_v4_fields_to_report(
|
51
|
+
{
|
52
|
+
"certname" => host,
|
53
|
+
"puppet-version" => puppet_version,
|
54
|
+
"report-format" => report_format,
|
55
|
+
"configuration-version" => configuration_version.to_s,
|
56
|
+
"start-time" => Puppet::Util::Puppetdb.to_wire_time(time),
|
57
|
+
"end-time" => Puppet::Util::Puppetdb.to_wire_time(time + run_duration),
|
58
|
+
"resource-events" => build_events_list,
|
59
|
+
"environment" => environment,
|
60
|
+
})
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# @api private
|
65
|
+
def build_events_list
|
66
|
+
profile "Build events list (count: #{resource_statuses.count})" do
|
67
|
+
filter_events(resource_statuses.inject([]) do |events, status_entry|
|
68
|
+
_, status = *status_entry
|
69
|
+
if ! (status.events.empty?)
|
70
|
+
events.concat(status.events.map { |event| event_to_hash(status, event) })
|
71
|
+
elsif status.skipped
|
72
|
+
events.concat([fabricate_event(status, "skipped")])
|
73
|
+
elsif status.failed
|
74
|
+
# PP-254:
|
75
|
+
# We have to fabricate resource events here due to a bug/s in report providers
|
76
|
+
# that causes them not to include events on a resource status that has failed.
|
77
|
+
# When PuppetDB is able to make a hard break from older version of Puppet that
|
78
|
+
# have this bug, we can remove this behavior.
|
79
|
+
events.concat([fabricate_event(status, "failure")])
|
80
|
+
end
|
81
|
+
events
|
82
|
+
end)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# @api private
|
87
|
+
def run_duration
|
88
|
+
# TODO: this is wrong in puppet. I am consistently seeing reports where
|
89
|
+
# start-time + this value is less than the timestamp on the individual
|
90
|
+
# resource events. Not sure what the best short-term fix is yet; the long
|
91
|
+
# term fix is obviously to make the correct data available in puppet.
|
92
|
+
# I've filed a ticket against puppet here:
|
93
|
+
# http://projects.puppetlabs.com/issues/16480
|
94
|
+
#
|
95
|
+
# NOTE: failed reports have an empty metrics hash. Just send 0 for run time,
|
96
|
+
# since we don't have access to any better information.
|
97
|
+
if metrics["time"] and metrics["time"]["total"]
|
98
|
+
metrics["time"]["total"]
|
99
|
+
else
|
100
|
+
raise Puppet::Error, "Report from #{host} contained no metrics, which is often caused by a failed catalog compilation. Unable to process."
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Convert an instance of `Puppet::Transaction::Event` to a hash
|
105
|
+
# suitable for sending over the wire to PuppetDB
|
106
|
+
#
|
107
|
+
# @api private
|
108
|
+
def event_to_hash(resource_status, event)
|
109
|
+
add_v4_fields_to_event(resource_status,
|
110
|
+
{
|
111
|
+
"status" => event.status,
|
112
|
+
"timestamp" => Puppet::Util::Puppetdb.to_wire_time(event.time),
|
113
|
+
"resource-type" => resource_status.resource_type,
|
114
|
+
"resource-title" => resource_status.title,
|
115
|
+
"property" => event.property,
|
116
|
+
"new-value" => event.desired_value,
|
117
|
+
"old-value" => event.previous_value,
|
118
|
+
"message" => event.message,
|
119
|
+
"file" => resource_status.file,
|
120
|
+
"line" => resource_status.line
|
121
|
+
})
|
122
|
+
end
|
123
|
+
|
124
|
+
# Given an instance of `Puppet::Resource::Status` and a status string,
|
125
|
+
# this method fabricates a PuppetDB event object with the provided
|
126
|
+
# `"status"`.
|
127
|
+
#
|
128
|
+
# @api private
|
129
|
+
def fabricate_event(resource_status, event_status)
|
130
|
+
add_v4_fields_to_event(resource_status,
|
131
|
+
{
|
132
|
+
"status" => event_status,
|
133
|
+
"timestamp" => Puppet::Util::Puppetdb.to_wire_time(resource_status.time),
|
134
|
+
"resource-type" => resource_status.resource_type,
|
135
|
+
"resource-title" => resource_status.title,
|
136
|
+
"property" => nil,
|
137
|
+
"new-value" => nil,
|
138
|
+
"old-value" => nil,
|
139
|
+
"message" => nil,
|
140
|
+
"file" => resource_status.file,
|
141
|
+
"line" => resource_status.line
|
142
|
+
})
|
143
|
+
end
|
144
|
+
|
145
|
+
# Backwards compatibility with versions of Puppet prior to report format 4
|
146
|
+
#
|
147
|
+
# @api private
|
148
|
+
def add_v4_fields_to_report(report_hash)
|
149
|
+
if report_format >= 4
|
150
|
+
report_hash.merge("transaction-uuid" => transaction_uuid)
|
151
|
+
else
|
152
|
+
report_hash.merge("transaction-uuid" => nil)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Backwards compatibility with versions of Puppet prior to report format 4
|
157
|
+
#
|
158
|
+
# @api private
|
159
|
+
def add_v4_fields_to_event(resource_status, event_hash)
|
160
|
+
if report_format >= 4
|
161
|
+
event_hash.merge("containment-path" => resource_status.containment_path)
|
162
|
+
else
|
163
|
+
event_hash.merge("containment-path" => nil)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# Filter out blacklisted events, if we're configured to do so
|
168
|
+
#
|
169
|
+
# @api private
|
170
|
+
def filter_events(events)
|
171
|
+
if config.ignore_blacklisted_events?
|
172
|
+
profile "Filter blacklisted events" do
|
173
|
+
events.select { |e| ! config.is_event_blacklisted?(e) }
|
174
|
+
end
|
175
|
+
else
|
176
|
+
events
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Helper method for accessing the puppetdb configuration
|
181
|
+
#
|
182
|
+
# @api private
|
183
|
+
def config
|
184
|
+
Puppet::Util::Puppetdb.config
|
185
|
+
end
|
186
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Puppet::Util::Puppetdb
|
2
|
+
class Blacklist
|
3
|
+
|
4
|
+
BlacklistedEvent = Struct.new(:resource_type, :resource_title, :status, :property)
|
5
|
+
|
6
|
+
# Initialize our blacklist of events to filter out of reports. This is needed
|
7
|
+
# because older versions of puppet always generate a swath of (meaningless)
|
8
|
+
# 'skipped' Schedule events on every agent run. As of puppet 3.3, these
|
9
|
+
# events should no longer be generated, but this is here for backward compat.
|
10
|
+
BlacklistedEvents =
|
11
|
+
[BlacklistedEvent.new("Schedule", "never", "skipped", nil),
|
12
|
+
BlacklistedEvent.new("Schedule", "puppet", "skipped", nil),
|
13
|
+
BlacklistedEvent.new("Schedule", "hourly", "skipped", nil),
|
14
|
+
BlacklistedEvent.new("Schedule", "daily", "skipped", nil),
|
15
|
+
BlacklistedEvent.new("Schedule", "weekly", "skipped", nil),
|
16
|
+
BlacklistedEvent.new("Schedule", "monthly", "skipped", nil)]
|
17
|
+
|
18
|
+
def initialize(events)
|
19
|
+
@events = events.inject({}) do |m, e|
|
20
|
+
m[e.resource_type] ||= {}
|
21
|
+
m[e.resource_type][e.resource_title] ||= {}
|
22
|
+
m[e.resource_type][e.resource_title][e.status] ||= {}
|
23
|
+
m[e.resource_type][e.resource_title][e.status][e.property] = true
|
24
|
+
m
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def is_event_blacklisted?(event)
|
29
|
+
@events.fetch(event["resource-type"], {}).
|
30
|
+
fetch(event["resource-title"], {}).
|
31
|
+
fetch(event["status"], {}).
|
32
|
+
fetch(event["property"], false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
require 'puppet'
|
2
|
+
|
3
|
+
module Puppet
|
4
|
+
module Util
|
5
|
+
module Puppetdb
|
6
|
+
module CharEncoding
|
7
|
+
|
8
|
+
|
9
|
+
# Some of this code is modeled after:
|
10
|
+
# https://github.com/brianmario/utf8/blob/ef10c033/ext/utf8/utf8proc.c
|
11
|
+
# https://github.com/brianmario/utf8/blob/ef10c033/ext/utf8/string_utf8.c
|
12
|
+
|
13
|
+
Utf8CharLens = [
|
14
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
15
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
16
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
17
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
18
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
19
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
20
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
21
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
27
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
28
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
29
|
+
4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
30
|
+
]
|
31
|
+
|
32
|
+
Utf8ReplacementChar = [ 0xEF, 0xBF, 0xBD ].pack("c*")
|
33
|
+
|
34
|
+
|
35
|
+
def self.utf8_string(str)
|
36
|
+
if RUBY_VERSION =~ /1.8/
|
37
|
+
# Ruby 1.8 doesn't have String#encode and related methods, and there
|
38
|
+
# appears to be a bug in iconv that will interpret some byte sequences
|
39
|
+
# as 6-byte characters. Thus, we are forced to resort to some unfortunate
|
40
|
+
# manual chicanery.
|
41
|
+
warn_if_changed(str, ruby18_clean_utf8(str))
|
42
|
+
elsif str.encoding == Encoding::UTF_8
|
43
|
+
# If we get here, we're in ruby 1.9+, so we have the string encoding methods
|
44
|
+
# available. However, just because a ruby String object is already
|
45
|
+
# marked as UTF-8, that doesn't guarantee that its contents are actually
|
46
|
+
# valid; and if you call ruby's ".encode" method with an encoding of
|
47
|
+
# "utf-8" for a String that ruby already believes is UTF-8, ruby
|
48
|
+
# seems to optimize that to be a no-op. So, we have to do some more
|
49
|
+
# complex handling...
|
50
|
+
|
51
|
+
# If the string already has valid encoding then we're fine.
|
52
|
+
return str if str.valid_encoding?
|
53
|
+
|
54
|
+
# If not, we basically have to walk over the characters and replace
|
55
|
+
# them by hand.
|
56
|
+
warn_if_changed(str, str.each_char.map { |c| c.valid_encoding? ? c : "\ufffd"}.join)
|
57
|
+
else
|
58
|
+
# if we get here, we're ruby 1.9 and the current string is *not* encoded
|
59
|
+
# as UTF-8. Thus we can actually rely on ruby's "encode" method.
|
60
|
+
begin
|
61
|
+
str.encode('UTF-8')
|
62
|
+
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
|
63
|
+
# If we got an exception, the string is either invalid or not
|
64
|
+
# convertible to UTF-8, so drop those bytes.
|
65
|
+
warn_if_changed(str, str.encode('UTF-8', :invalid => :replace, :undef => :replace))
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# @api private
|
71
|
+
def self.warn_if_changed(str, converted_str)
|
72
|
+
if converted_str != str
|
73
|
+
Puppet.warning "Ignoring invalid UTF-8 byte sequences in data to be sent to PuppetDB"
|
74
|
+
end
|
75
|
+
converted_str
|
76
|
+
end
|
77
|
+
|
78
|
+
# @api private
|
79
|
+
def self.ruby18_clean_utf8(str)
|
80
|
+
#iconv_to_utf8(str)
|
81
|
+
#ruby18_manually_clean_utf8(str)
|
82
|
+
|
83
|
+
# So, we've tried doing this UTF8 cleaning for ruby 1.8 a few different
|
84
|
+
# ways. Doing it via IConv, we don't do a good job of handling characters
|
85
|
+
# whose codepoints would exceed the legal maximum for UTF-8. Doing it via
|
86
|
+
# our manual scrubbing process is slower and doesn't catch overlong
|
87
|
+
# encodings. Since this code really shouldn't even exist in the first place
|
88
|
+
# we've decided to simply compose the two scrubbing methods for now, rather
|
89
|
+
# than trying to add detection of overlong encodings. It'd be a non-trivial
|
90
|
+
# chunk of code, and it'd have to do a lot of bitwise arithmetic (which Ruby
|
91
|
+
# is not blazingly fast at).
|
92
|
+
ruby18_manually_clean_utf8(iconv_to_utf8(str))
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# @todo we're not using this anymore, but I wanted to leave it around
|
97
|
+
# for a little while just to make sure that the new code pans out.
|
98
|
+
# @api private
|
99
|
+
def self.iconv_to_utf8(str)
|
100
|
+
iconv = Iconv.new('UTF-8//IGNORE', 'UTF-8')
|
101
|
+
|
102
|
+
# http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/
|
103
|
+
iconv.iconv(str + " ")[0..-2]
|
104
|
+
end
|
105
|
+
|
106
|
+
# @api private
|
107
|
+
def self.get_char_len(byte)
|
108
|
+
Utf8CharLens[byte]
|
109
|
+
end
|
110
|
+
|
111
|
+
# Manually cleans a string by stripping any byte sequences that are
|
112
|
+
# not valid UTF-8 characters. If you'd prefer for the invalid bytes to be
|
113
|
+
# replaced with the unicode replacement character rather than being stripped,
|
114
|
+
# you may pass `false` for the optional second parameter (`strip`, which
|
115
|
+
# defaults to `true`).
|
116
|
+
#
|
117
|
+
# @api private
|
118
|
+
def self.ruby18_manually_clean_utf8(str, strip = true)
|
119
|
+
|
120
|
+
# This is a hack to allow this code to work with either ruby 1.8 or 1.9,
|
121
|
+
# which is useful for debugging and benchmarking. For more info see the
|
122
|
+
# comments in the #get_byte method below.
|
123
|
+
@has_get_byte = str.respond_to?(:getbyte)
|
124
|
+
|
125
|
+
|
126
|
+
i = 0
|
127
|
+
len = str.length
|
128
|
+
result = ""
|
129
|
+
|
130
|
+
while i < len
|
131
|
+
byte = get_byte(str, i)
|
132
|
+
|
133
|
+
i += 1
|
134
|
+
|
135
|
+
char_len = get_char_len(byte)
|
136
|
+
case char_len
|
137
|
+
when 0
|
138
|
+
result.concat(Utf8ReplacementChar) unless strip
|
139
|
+
when 1
|
140
|
+
result << byte
|
141
|
+
when 2..4
|
142
|
+
ruby18_handle_multibyte_char(result, byte, str, i, char_len, strip)
|
143
|
+
i += char_len - 1
|
144
|
+
else
|
145
|
+
raise Puppet::DevError, "Unhandled UTF8 char length: '#{char_len}'"
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
result
|
151
|
+
end
|
152
|
+
|
153
|
+
# @api private
|
154
|
+
def self.ruby18_handle_multibyte_char(result_str, byte, str, i, char_len, strip = true)
|
155
|
+
# keeping an array of bytes for now because we need to do some
|
156
|
+
# bitwise math on them.
|
157
|
+
char_additional_bytes = []
|
158
|
+
|
159
|
+
# If we don't have enough bytes left to read the full character, we
|
160
|
+
# put on a replacement character and bail.
|
161
|
+
if i + (char_len - 1) > str.length
|
162
|
+
result_str.concat(Utf8ReplacementChar) unless strip
|
163
|
+
return
|
164
|
+
end
|
165
|
+
|
166
|
+
# we've already read the first byte, so we need to set up a range
|
167
|
+
# from 0 to (n-2); e.g. if it's a 2-byte char, we will have a range
|
168
|
+
# from 0 to 0 which will result in reading 1 more byte
|
169
|
+
(0..char_len - 2).each do |x|
|
170
|
+
char_additional_bytes << get_byte(str, i + x)
|
171
|
+
end
|
172
|
+
|
173
|
+
if (is_valid_multibyte_suffix(byte, char_additional_bytes))
|
174
|
+
result_str << byte
|
175
|
+
result_str.concat(char_additional_bytes.pack("c*"))
|
176
|
+
else
|
177
|
+
result_str.concat(Utf8ReplacementChar) unless strip
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# @api private
|
182
|
+
def self.is_valid_multibyte_suffix(byte, additional_bytes)
|
183
|
+
# This is heinous, but the UTF-8 spec says that codepoints greater than
|
184
|
+
# 0x10FFFF are illegal. The first character that is over that limit is
|
185
|
+
# 0xF490bfbf, so if the first byte is F4 then we have to check for
|
186
|
+
# that condition.
|
187
|
+
if byte == 0xF4
|
188
|
+
val = additional_bytes.inject(0) { |result, b | (result << 8) + b}
|
189
|
+
if val >= 0x90bfbf
|
190
|
+
return false
|
191
|
+
end
|
192
|
+
end
|
193
|
+
additional_bytes.all? { |b| ((b & 0xC0) == 0x80) }
|
194
|
+
end
|
195
|
+
|
196
|
+
# @api private
|
197
|
+
def self.get_byte(str, index)
|
198
|
+
# This method is a hack to allow this code to work with either ruby 1.8
|
199
|
+
# or 1.9. In production this code path should never be exercised by
|
200
|
+
# 1.9 because it has a much more sane way to accomplish our goal, but
|
201
|
+
# for testing, it is useful to be able to run the 1.8 codepath in 1.9.
|
202
|
+
if @has_get_byte
|
203
|
+
str.getbyte(index)
|
204
|
+
else
|
205
|
+
str[index]
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|