fluent-plugin-azure-logs-ingestion 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'net/http'
5
+ require 'uri'
6
+ require 'openssl'
7
+ require 'securerandom'
8
+ require 'time'
9
+
10
+ module Fluent
11
+ module Plugin
12
+ module AzureLogsIngestion
13
+ class Client
14
+ def initialize(endpoint:, dcr_immutable_id:, stream_name:, logger:)
15
+ @endpoint = endpoint
16
+ @dcr_immutable_id = dcr_immutable_id
17
+ @stream_name = stream_name
18
+ @log = logger
19
+ end
20
+
21
+ def send_payload(payload:, bearer_token:)
22
+ uri = build_uri
23
+ @log.debug('sending logs ingestion request', uri: uri.to_s, content_length: payload.content_length, content_encoding: payload.content_encoding)
24
+ request = Net::HTTP::Post.new(uri)
25
+ request['Authorization'] = "Bearer #{bearer_token}"
26
+ request['Content-Type'] = 'application/json'
27
+ request['Content-Encoding'] = payload.content_encoding if payload.content_encoding
28
+ request['x-ms-client-request-id'] = SecureRandom.uuid
29
+ request.body_stream = payload.io
30
+ request.content_length = payload.content_length
31
+
32
+ response = perform_request(uri, request)
33
+ handle_response(response)
34
+ true
35
+ rescue Timeout::Error, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET, EOFError, SocketError, IOError, SystemCallError => error
36
+ raise "logs ingestion request failed: #{error.message}"
37
+ ensure
38
+ payload.io.rewind if payload&.io
39
+ end
40
+
41
+ private
42
+
43
+ def build_uri
44
+ base = @endpoint.end_with?('/') ? @endpoint : "#{@endpoint}/"
45
+ URI.join(base, "dataCollectionRules/#{@dcr_immutable_id}/streams/#{@stream_name}?api-version=2023-01-01")
46
+ end
47
+
48
+ def perform_request(uri, request)
49
+ http = Net::HTTP.new(uri.host, uri.port)
50
+ http.use_ssl = uri.scheme == 'https'
51
+ http.open_timeout = 10
52
+ http.read_timeout = 60
53
+ http.verify_mode = OpenSSL::SSL::VERIFY_PEER if http.use_ssl?
54
+ http.request(request)
55
+ end
56
+
57
+ def handle_response(response)
58
+ code = response.code.to_i
59
+ @log.debug('received logs ingestion response', code: code, message: response.message, retry_after: response['Retry-After'])
60
+ return handle_success_response(response) if response.is_a?(Net::HTTPSuccess)
61
+
62
+ message = "#{response.code} #{response.message} #{String(response.body).strip}".strip
63
+ case code
64
+ when 400, 401, 403, 413
65
+ raise Fluent::UnrecoverableError, message
66
+ when 429
67
+ @log.warn('received retryable 429 from Logs Ingestion API', retry_after: response['Retry-After']) if response['Retry-After']
68
+ raise message
69
+ when 500..599
70
+ raise message
71
+ else
72
+ if code >= 400 && code < 500
73
+ raise Fluent::UnrecoverableError, message
74
+ end
75
+
76
+ raise message
77
+ end
78
+ end
79
+
80
+ def handle_success_response(response)
81
+ body = String(response.body)
82
+ return true if body.empty?
83
+
84
+ JSON.parse(body)
85
+ true
86
+ rescue JSON::ParserError => error
87
+ raise "successful response body was not valid JSON: #{error.message}"
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'tempfile'
5
+ require 'zlib'
6
+
7
+ module Fluent
8
+ module Plugin
9
+ module AzureLogsIngestion
10
+ class PayloadBuilder
11
+ MAX_BYTES = 1_048_576
12
+ Result = Struct.new(
13
+ :io,
14
+ :content_encoding,
15
+ :content_length,
16
+ :raw_size,
17
+ :gzip_size,
18
+ :record_count,
19
+ keyword_init: true
20
+ ) do
21
+ def close!
22
+ io.close!
23
+ rescue StandardError
24
+ nil
25
+ end
26
+ end
27
+
28
+ def initialize(gzip:)
29
+ @gzip = gzip
30
+ end
31
+
32
+ def build(chunk)
33
+ raw_file = Tempfile.new('azure-logs-ingestion-raw')
34
+ raw_file.binmode
35
+ gzip_file = nil
36
+
37
+ raw_size = 0
38
+ record_count = 0
39
+ first_record = true
40
+
41
+ raw_size += write_bytes(raw_file, '[')
42
+ chunk.each do |_event_time, record|
43
+ json = JSON.generate(record.dup)
44
+ raw_size += write_bytes(raw_file, ',') unless first_record
45
+ raw_size += write_bytes(raw_file, json)
46
+ first_record = false
47
+ record_count += 1
48
+ end
49
+ raw_size += write_bytes(raw_file, ']')
50
+ raw_file.flush
51
+ raw_file.rewind
52
+
53
+ gzip_size = nil
54
+ io = raw_file
55
+ content_encoding = nil
56
+ content_length = raw_size
57
+
58
+ if @gzip
59
+ gzip_file, gzip_size = gzip_file_from(raw_file)
60
+ io = gzip_file
61
+ content_encoding = 'gzip'
62
+ content_length = gzip_size
63
+ raw_file.close!
64
+ end
65
+
66
+ validate!(raw_size: raw_size, gzip_size: gzip_size)
67
+
68
+ Result.new(
69
+ io: io,
70
+ content_encoding: content_encoding,
71
+ content_length: content_length,
72
+ raw_size: raw_size,
73
+ gzip_size: gzip_size,
74
+ record_count: record_count
75
+ )
76
+ rescue StandardError
77
+ gzip_file.close! if gzip_file
78
+ raw_file.close! if raw_file
79
+ raise
80
+ end
81
+
82
+ private
83
+
84
+ def validate!(raw_size:, gzip_size:)
85
+ raise Fluent::UnrecoverableError, "payload size #{raw_size} exceeds #{MAX_BYTES} bytes" if raw_size > MAX_BYTES
86
+ if gzip_size && gzip_size > MAX_BYTES
87
+ raise Fluent::UnrecoverableError, "gzip payload size #{gzip_size} exceeds #{MAX_BYTES} bytes"
88
+ end
89
+ end
90
+
91
+ def gzip_file_from(source_file)
92
+ gzip_file = Tempfile.new('azure-logs-ingestion-gzip')
93
+ gzip_file.binmode
94
+ Zlib::GzipWriter.open(gzip_file.path) do |writer|
95
+ source_file.rewind
96
+ IO.copy_stream(source_file, writer)
97
+ end
98
+ gzip_file.close
99
+ gzip_file.open
100
+ gzip_file.binmode
101
+ gzip_file.rewind
102
+ [gzip_file, File.size(gzip_file.path)]
103
+ end
104
+
105
+ def write_bytes(io, string)
106
+ io.write(string)
107
+ string.bytesize
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fluent
4
+ module Plugin
5
+ module AzureLogsIngestion
6
+ VERSION = '0.1.0'
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/output'
4
+ require_relative 'azure_logs_ingestion/version'
5
+ require_relative 'azure_logs_ingestion/auth'
6
+ require_relative 'azure_logs_ingestion/payload_builder'
7
+ require_relative 'azure_logs_ingestion/client'
8
+
9
+ module Fluent
10
+ module Plugin
11
+ class AzureLogsIngestionOutput < Output
12
+ Fluent::Plugin.register_output('azure_logs_ingestion', self)
13
+
14
+ config_param :endpoint, :string
15
+ config_param :dcr_immutable_id, :string
16
+ config_param :stream_name, :string
17
+ config_param :gzip, :bool, default: false
18
+ config_param :use_msi, :bool, default: false
19
+ config_param :tenant_id, :string, default: ENV['AZURE_TENANT_ID']
20
+ config_param :client_id, :string, default: ENV['AZURE_CLIENT_ID']
21
+ config_param :client_secret, :string, secret: true, default: ENV['AZURE_CLIENT_SECRET']
22
+ config_param :authority_host, :string, default: 'https://login.microsoftonline.com'
23
+ config_param :logs_ingestion_scope, :string, default: 'https://monitor.azure.com/.default'
24
+ config_param :token_refresh_skew, :time, default: 300
25
+
26
+ config_section :buffer do
27
+ config_set_default :@type, 'file'
28
+ config_set_default :chunk_limit_size, 900 * 1024
29
+ end
30
+
31
+ def configure(conf)
32
+ super
33
+
34
+ validate_urls!
35
+
36
+ return if @use_msi
37
+ return if @tenant_id && @client_id && @client_secret
38
+
39
+ raise Fluent::ConfigError, 'tenant_id, client_id, and client_secret are required when use_msi is false'
40
+ end
41
+
42
+ def start
43
+ super
44
+ @auth = AzureLogsIngestion::Auth.new(
45
+ use_msi: @use_msi,
46
+ tenant_id: @tenant_id,
47
+ client_id: @client_id,
48
+ client_secret: @client_secret,
49
+ authority_host: @authority_host,
50
+ logs_ingestion_scope: @logs_ingestion_scope,
51
+ token_refresh_skew: @token_refresh_skew,
52
+ logger: log
53
+ )
54
+ @client = AzureLogsIngestion::Client.new(
55
+ endpoint: @endpoint,
56
+ dcr_immutable_id: @dcr_immutable_id,
57
+ stream_name: @stream_name,
58
+ logger: log
59
+ )
60
+ end
61
+
62
+ def write(chunk)
63
+ log.debug('building logs ingestion payload', chunk_id: dump_unique_id_hex(chunk.unique_id), gzip: @gzip)
64
+ payload = AzureLogsIngestion::PayloadBuilder.new(gzip: @gzip).build(chunk)
65
+
66
+ log.debug(
67
+ 'built logs ingestion payload',
68
+ chunk_id: dump_unique_id_hex(chunk.unique_id),
69
+ record_count: payload.record_count,
70
+ raw_size: payload.raw_size,
71
+ gzip_size: payload.gzip_size,
72
+ content_length: payload.content_length
73
+ )
74
+
75
+ token = @auth.token
76
+ @client.send_payload(payload: payload, bearer_token: token)
77
+ log.debug('logs ingestion request completed', chunk_id: dump_unique_id_hex(chunk.unique_id))
78
+ ensure
79
+ payload&.close!
80
+ end
81
+
82
+ private
83
+
84
+ def validate_urls!
85
+ validate_url!(@endpoint, 'endpoint')
86
+ validate_url!(@authority_host, 'authority_host')
87
+ end
88
+
89
+ def validate_url!(value, field)
90
+ uri = URI.parse(value)
91
+ return if uri.is_a?(URI::HTTP) && uri.host
92
+
93
+ raise Fluent::ConfigError, "#{field} must be a valid HTTP or HTTPS URL"
94
+ rescue URI::InvalidURIError
95
+ raise Fluent::ConfigError, "#{field} must be a valid HTTP or HTTPS URL"
96
+ end
97
+ end
98
+ end
99
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test/unit'
4
+ require 'fluent/test'
5
+
6
+ Fluent::Test.setup
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'socket'
4
+ require 'thread'
5
+
6
+ class FakeAzureServer
7
+ Request = Struct.new(:method, :path, :headers, :body, keyword_init: true)
8
+
9
+ attr_reader :requests
10
+
11
+ def initialize(&handler)
12
+ @handler = handler || proc { |_request| [200, {}, ''] }
13
+ @requests = []
14
+ @mutex = Mutex.new
15
+ @closed = false
16
+ end
17
+
18
+ def start
19
+ @server = TCPServer.new('127.0.0.1', 0)
20
+ @port = @server.addr[1]
21
+ @thread = Thread.new { run }
22
+ self
23
+ end
24
+
25
+ def url
26
+ "http://127.0.0.1:#{@port}"
27
+ end
28
+
29
+ def stop
30
+ @closed = true
31
+ @server&.close
32
+ @thread&.join(1)
33
+ rescue IOError, Errno::EBADF
34
+ nil
35
+ end
36
+
37
+ private
38
+
39
+ def run
40
+ until @closed
41
+ begin
42
+ socket = @server.accept
43
+ handle_client(socket)
44
+ rescue IOError, Errno::EBADF
45
+ break
46
+ end
47
+ end
48
+ end
49
+
50
+ def handle_client(socket)
51
+ request_line = socket.gets("\r\n")
52
+ return unless request_line
53
+
54
+ method, path, = request_line.strip.split(' ', 3)
55
+ headers = {}
56
+ while (line = socket.gets("\r\n"))
57
+ break if line == "\r\n"
58
+
59
+ key, value = line.split(':', 2)
60
+ headers[key.downcase] = value.strip
61
+ end
62
+
63
+ body = read_body(socket, headers)
64
+ request = Request.new(method: method, path: path, headers: headers, body: body)
65
+ @mutex.synchronize { @requests << request }
66
+ status, response_headers, response_body = @handler.call(request)
67
+ write_response(socket, status, response_headers || {}, response_body || '')
68
+ ensure
69
+ socket&.close
70
+ end
71
+
72
+ def read_body(socket, headers)
73
+ length = headers.fetch('content-length', '0').to_i
74
+ return '' if length <= 0
75
+
76
+ socket.read(length)
77
+ end
78
+
79
+ def write_response(socket, status, headers, body)
80
+ reason = {
81
+ 200 => 'OK',
82
+ 400 => 'Bad Request',
83
+ 401 => 'Unauthorized',
84
+ 403 => 'Forbidden',
85
+ 413 => 'Payload Too Large',
86
+ 429 => 'Too Many Requests',
87
+ 500 => 'Internal Server Error'
88
+ }.fetch(status, 'OK')
89
+
90
+ response = +"HTTP/1.1 #{status} #{reason}\r\n"
91
+ response << "Content-Length: #{body.bytesize}\r\n"
92
+ response << "Connection: close\r\n"
93
+ headers.each do |key, value|
94
+ response << "#{key}: #{value}\r\n"
95
+ end
96
+ response << "\r\n"
97
+ response << body
98
+ socket.write(response)
99
+ end
100
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'time'
5
+ require 'uri'
6
+
7
+ class TestLogger
8
+ def debug(*) = nil
9
+ def info(*) = nil
10
+ def warn(*) = nil
11
+ def error(*) = nil
12
+ end
13
+
14
+ FakeChunk = Struct.new(:events, :chunk_id) do
15
+ def initialize(events, chunk_id = '0123456789ab')
16
+ super(events, chunk_id)
17
+ end
18
+
19
+ def each(&block)
20
+ events.each(&block)
21
+ end
22
+
23
+ def unique_id
24
+ chunk_id
25
+ end
26
+ end
27
+
28
+ module TestEnvHelper
29
+ def with_env(values)
30
+ previous = {}
31
+ values.each do |key, value|
32
+ previous[key] = ENV[key]
33
+ value.nil? ? ENV.delete(key) : ENV[key] = value
34
+ end
35
+ yield
36
+ ensure
37
+ previous.each do |key, value|
38
+ value.nil? ? ENV.delete(key) : ENV[key] = value
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'helper'
4
+ require_relative 'support/fake_azure_server'
5
+ require_relative 'support/helpers'
6
+ require 'fluent/plugin/azure_logs_ingestion/auth'
7
+
8
+ class AuthManagedIdentityTest < Test::Unit::TestCase
9
+ include TestEnvHelper
10
+
11
+ test 'uses app service managed identity endpoint when environment is present' do
12
+ server = FakeAzureServer.new do |_request|
13
+ [200, { 'Content-Type' => 'application/json' }, { access_token: 'msi-token', expires_on: (Time.now.to_i + 3600).to_s }.to_json]
14
+ end.start
15
+
16
+ with_env(
17
+ 'IDENTITY_ENDPOINT' => "#{server.url}/msi/token",
18
+ 'IDENTITY_HEADER' => 'identity-header-value',
19
+ 'AZURE_LOGS_INGESTION_IMDS_ENDPOINT' => nil
20
+ ) do
21
+ auth = Fluent::Plugin::AzureLogsIngestion::Auth.new(
22
+ use_msi: true,
23
+ tenant_id: nil,
24
+ client_id: 'user-assigned-client-id',
25
+ client_secret: nil,
26
+ authority_host: 'https://login.microsoftonline.com',
27
+ logs_ingestion_scope: 'https://monitor.azure.com/.default',
28
+ token_refresh_skew: 300,
29
+ logger: TestLogger.new
30
+ )
31
+
32
+ assert_equal 'msi-token', auth.token
33
+ end
34
+
35
+ request = server.requests.first
36
+ query = URI.decode_www_form(URI(request.path).query).to_h
37
+ assert_equal 'identity-header-value', request.headers['x-identity-header']
38
+ assert_equal 'https://monitor.azure.com/', query['resource']
39
+ assert_equal 'user-assigned-client-id', query['client_id']
40
+ ensure
41
+ server&.stop
42
+ end
43
+
44
+ test 'uses IMDS endpoint when app service environment is absent' do
45
+ server = FakeAzureServer.new do |_request|
46
+ [200, { 'Content-Type' => 'application/json' }, { access_token: 'imds-token', expires_on: (Time.now.to_i + 3600).to_s }.to_json]
47
+ end.start
48
+
49
+ with_env(
50
+ 'IDENTITY_ENDPOINT' => nil,
51
+ 'IDENTITY_HEADER' => nil,
52
+ 'AZURE_LOGS_INGESTION_IMDS_ENDPOINT' => "#{server.url}/metadata/identity/oauth2/token"
53
+ ) do
54
+ auth = Fluent::Plugin::AzureLogsIngestion::Auth.new(
55
+ use_msi: true,
56
+ tenant_id: nil,
57
+ client_id: 'user-assigned-client-id',
58
+ client_secret: nil,
59
+ authority_host: 'https://login.microsoftonline.com',
60
+ logs_ingestion_scope: 'https://monitor.azure.com/.default',
61
+ token_refresh_skew: 300,
62
+ logger: TestLogger.new
63
+ )
64
+
65
+ assert_equal 'imds-token', auth.token
66
+ end
67
+
68
+ request = server.requests.first
69
+ query = URI.decode_www_form(URI(request.path).query).to_h
70
+ assert_equal 'true', request.headers['metadata']
71
+ assert_equal 'https://monitor.azure.com/', query['resource']
72
+ assert_equal 'user-assigned-client-id', query['client_id']
73
+ ensure
74
+ server&.stop
75
+ end
76
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'helper'
4
+ require_relative 'support/fake_azure_server'
5
+ require_relative 'support/helpers'
6
+ require 'fluent/plugin/azure_logs_ingestion/auth'
7
+
8
+ class AuthServicePrincipalTest < Test::Unit::TestCase
9
+ test 'caches service principal token until refresh is needed' do
10
+ server = FakeAzureServer.new do |_request|
11
+ [200, { 'Content-Type' => 'application/json' }, { access_token: 'token-1', expires_in: '3600' }.to_json]
12
+ end.start
13
+
14
+ auth = Fluent::Plugin::AzureLogsIngestion::Auth.new(
15
+ use_msi: false,
16
+ tenant_id: 'tenant-id',
17
+ client_id: 'client-id',
18
+ client_secret: 'secret',
19
+ authority_host: server.url,
20
+ logs_ingestion_scope: 'https://monitor.azure.com/.default',
21
+ token_refresh_skew: 300,
22
+ logger: TestLogger.new
23
+ )
24
+
25
+ assert_equal 'token-1', auth.token
26
+ assert_equal 'token-1', auth.token
27
+ assert_equal 1, server.requests.size
28
+ assert_match %r{/tenant-id/oauth2/v2.0/token}, server.requests.first.path
29
+ assert_match(/scope=https%3A%2F%2Fmonitor\.azure\.com%2F\.default/, server.requests.first.body)
30
+ ensure
31
+ server&.stop
32
+ end
33
+
34
+ test 'refreshes service principal token when it is already expired' do
35
+ tokens = %w[token-1 token-2]
36
+ server = FakeAzureServer.new do |_request|
37
+ [200, { 'Content-Type' => 'application/json' }, { access_token: tokens.shift, expires_in: '0' }.to_json]
38
+ end.start
39
+
40
+ auth = Fluent::Plugin::AzureLogsIngestion::Auth.new(
41
+ use_msi: false,
42
+ tenant_id: 'tenant-id',
43
+ client_id: 'client-id',
44
+ client_secret: 'secret',
45
+ authority_host: server.url,
46
+ logs_ingestion_scope: 'https://monitor.azure.com/.default',
47
+ token_refresh_skew: 0,
48
+ logger: TestLogger.new
49
+ )
50
+
51
+ assert_equal 'token-1', auth.token
52
+ assert_equal 'token-2', auth.token
53
+ assert_equal 2, server.requests.size
54
+ ensure
55
+ server&.stop
56
+ end
57
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'helper'
4
+ require 'fluent/test/driver/output'
5
+ require 'fluent/plugin/out_azure_logs_ingestion'
6
+
7
+ class AzureLogsIngestionOutputTest < Test::Unit::TestCase
8
+ BASE_CONFIG = <<~CONFIG
9
+ endpoint https://example.eastus-1.ingest.monitor.azure.com
10
+ dcr_immutable_id dcr-000a00a000a00000a000000aa000a0aa
11
+ stream_name Custom-MyTable
12
+ tenant_id test-tenant
13
+ client_id test-client
14
+ client_secret test-secret
15
+ <buffer>
16
+ @type memory
17
+ </buffer>
18
+ CONFIG
19
+
20
+ def create_driver(conf = BASE_CONFIG)
21
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::AzureLogsIngestionOutput).configure(conf)
22
+ end
23
+
24
+ test 'configures minimal service principal settings' do
25
+ driver = create_driver
26
+
27
+ assert_equal 'https://example.eastus-1.ingest.monitor.azure.com', driver.instance.endpoint
28
+ assert_equal 'dcr-000a00a000a00000a000000aa000a0aa', driver.instance.dcr_immutable_id
29
+ assert_equal 'Custom-MyTable', driver.instance.stream_name
30
+ end
31
+
32
+ test 'allows managed identity without service principal secret' do
33
+ driver = create_driver(<<~CONFIG)
34
+ endpoint https://example.eastus-1.ingest.monitor.azure.com
35
+ dcr_immutable_id dcr-000a00a000a00000a000000aa000a0aa
36
+ stream_name Custom-MyTable
37
+ use_msi true
38
+ <buffer>
39
+ @type memory
40
+ </buffer>
41
+ CONFIG
42
+
43
+ assert_equal true, driver.instance.use_msi
44
+ end
45
+
46
+ test 'rejects missing credentials when use_msi is false' do
47
+ error = assert_raise(Fluent::ConfigError) do
48
+ create_driver(<<~CONFIG)
49
+ endpoint https://example.eastus-1.ingest.monitor.azure.com
50
+ dcr_immutable_id dcr-000a00a000a00000a000000aa000a0aa
51
+ stream_name Custom-MyTable
52
+ <buffer>
53
+ @type memory
54
+ </buffer>
55
+ CONFIG
56
+ end
57
+
58
+ assert_match(/tenant_id, client_id, and client_secret/, error.message)
59
+ end
60
+ end