surfliner-metadata_consumer 0.1.0.pre.alpha

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,47 @@
1
+ require "rsolr"
2
+ require "surfliner/metadata_consumer/mq_connection"
3
+ require "surfliner/metadata_consumer/solr/message_handler"
4
+
5
+ module Surfliner
6
+ module MetadataConsumer
7
+ # A metadata consumer that subscribes to a RabbitMQ queue and passes
8
+ # messages to the specified handler.
9
+ class Consumer
10
+ attr_reader :connection, :logger, :tracer, :handler
11
+
12
+ # Initializes a new `Consumer`
13
+ # @param tracer [OpenTelemetry::Trace::Tracer] OpenTelemetry tracer
14
+ # @param logger [Logger] log message destination
15
+ # @param handler #handle an object accepting a JSON string
16
+ def initialize(tracer:, logger:, handler:)
17
+ @connection = MqConnection.new(logger:)
18
+ @logger = logger
19
+ @tracer = tracer
20
+ @handler = handler
21
+ end
22
+
23
+ # Initializes and starts a new `Consumer`
24
+ # @param tracer [OpenTelemetry::Trace::Tracer] OpenTelemetry tracer
25
+ # @param logger [Logger] log message destination
26
+ # @param handler #handle an object accepting a JSON string payload
27
+ def self.run(tracer:, logger:, handler:)
28
+ new(tracer:, logger:, handler:).run
29
+ end
30
+
31
+ # Starts listening to the message queue and passing messages to the handler.
32
+ def run
33
+ connection.open do |queue|
34
+ queue.subscribe(block: true) do |_delivery_info, _properties, payload_json|
35
+ tracer.in_span("surfliner metadata consumer message") do |_span|
36
+ logger.info(" [  ] message received with payload: #{payload_json}")
37
+
38
+ handler.handle(payload_json)
39
+ end
40
+ rescue => err
41
+ logger.error(" [❌] failed to handle message: #{err}\n#{err.backtrace}")
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,79 @@
1
+ module Surfliner
2
+ module MetadataConsumer
3
+ # An object encapsulating RabbitMQ configuration.
4
+ class MqConfig
5
+ # @return [String] The RabbitMQ hostname
6
+ attr_reader :host
7
+
8
+ # @return [String] The RabbitMQ AMQP port
9
+ attr_reader :port
10
+
11
+ # @return [String] The RabbitMQ username
12
+ attr_reader :username
13
+
14
+ # @return [String] The RabbitMQ passsword
15
+ attr_reader :password
16
+
17
+ # @return [String] The topic exchange to listen to
18
+ attr_reader :topic
19
+
20
+ # @return [String] The name of the queue to listen to
21
+ attr_reader :queue_name
22
+
23
+ # @return [String] The platform routing key to listen to
24
+ attr_reader :routing_key
25
+
26
+ # Initializes a new `MqConfig` object.
27
+ # @param host [The] RabbitMQ hostname
28
+ # @param port [The] RabbitMQ AMQP port
29
+ # @param username [The] RabbitMQ username
30
+ # @param password [The] RabbitMQ passsword
31
+ # @param topic [The] topic exchange to listen to
32
+ # @param queue_name [The] name of the queue to listen to
33
+ # @param routing_key [The] platform routing key to listen to
34
+ def initialize(host:, port:, username:, password:, topic:, queue_name:, routing_key:)
35
+ @host = host
36
+ @port = port
37
+ @username = username
38
+ @password = password
39
+ @topic = topic
40
+ @queue_name = queue_name
41
+ @routing_key = routing_key
42
+ end
43
+
44
+ class << self
45
+ # Reads RabbitMQ configuration from environment variables and
46
+ # returns it as a new `MqConfig` object.
47
+ #
48
+ # - `RABBITMQ_HOST` → `host`
49
+ # - `RABBITMQ_NODE_PORT_NUMBER` → `port`
50
+ # - `RABBITMQ_USERNAME` → `username`
51
+ # - `RABBITMQ_PASSWORD` → `password`
52
+ # - `RABBITMQ_TOPIC` → `topic`
53
+ # - `RABBITMQ_QUEUE` → `queue_name`
54
+ # - `RABBITMQ_PLATFORM_ROUTING_KEY` → `routing_key`
55
+ def from_env
56
+ MqConfig.new(
57
+ host: ENV.fetch("RABBITMQ_HOST"),
58
+ port: ENV.fetch("RABBITMQ_NODE_PORT_NUMBER"),
59
+ username: ENV.fetch("RABBITMQ_USERNAME"),
60
+ password: ENV.fetch("RABBITMQ_PASSWORD"),
61
+ topic: ENV.fetch("RABBITMQ_TOPIC"),
62
+ queue_name: ENV.fetch("RABBITMQ_QUEUE"),
63
+ routing_key: ENV.fetch("RABBITMQ_PLATFORM_ROUTING_KEY")
64
+ )
65
+ end
66
+ end
67
+
68
+ # @return [String] the connection URL as a string
69
+ def connection_url
70
+ @connection_url ||= "amqp://#{username}:#{password}@#{host}:#{port}"
71
+ end
72
+
73
+ # @return [String] the connection URL as a string, without the password
74
+ def redacted_url
75
+ @redacted_url ||= connection_url.sub(password, "REDACTED")
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,115 @@
1
+ require "bunny"
2
+
3
+ module Surfliner
4
+ module MetadataConsumer
5
+ # An object encapsulating a RabbitMQ connection.
6
+ class MqConnection
7
+ # @return [Logger] The logger
8
+ attr_reader :logger
9
+
10
+ # @return [Bunny::Session] The current RabbitMQ session
11
+ attr_reader :connection
12
+
13
+ # @return [Bunny::Channel] The channel being listened to
14
+ attr_reader :channel
15
+
16
+ # @return [Bunny::Exchange] The exchange being listened to
17
+ attr_reader :exchange
18
+
19
+ # @return [Bunny::Queue] The queue being listened to
20
+ attr_reader :queue
21
+
22
+ # @return [MqConfig] The configuration
23
+ attr_reader :config
24
+
25
+ # Initializes a new `MqConnection`.
26
+ #
27
+ # @param logger [Logger] the logger
28
+ # @param config [MqConfig] the configuration
29
+ def initialize(logger:, config: MqConfig.from_env)
30
+ @logger = logger
31
+ @config = config
32
+ end
33
+
34
+ # Opens a connection.
35
+ # @return [self]
36
+ # @raise RuntimeError if already connected
37
+ def connect
38
+ raise "RabbitMQ connection #{connection} already open." if open?
39
+
40
+ logger.info("Rabbitmq message broker connection url: #{config.redacted_url}")
41
+ @connection = Bunny.new(config.connection_url, logger: logger)
42
+ connect_on(connection)
43
+ @channel = connection.create_channel
44
+ @exchange = channel.topic(config.topic, auto_delete: true)
45
+ @queue = channel.queue(config.queue_name, durable: true)
46
+ queue.bind(exchange, routing_key: config.routing_key)
47
+
48
+ self
49
+ rescue Bunny::TCPConnectionFailed => err
50
+ # TODO: realistically, this only happens in connection.start, where we're eating it
51
+ logger.error("Connection to #{config.redacted_url} failed")
52
+ raise err
53
+ rescue Bunny::PossibleAuthenticationFailureError => err
54
+ # TODO: realistically, this only happens in connection.start, where we're eating it
55
+ logger.error("Failed to authenticate to #{config.redacted_url}")
56
+ raise err
57
+ end
58
+
59
+ # Opens a connection, yields the queue, and closes the connection after
60
+ # the provided block completes.
61
+ # @yield [Bunny::Queue] the queue
62
+ def open
63
+ connect
64
+ yield queue
65
+ ensure
66
+ close
67
+ end
68
+
69
+ # Closes the connection.
70
+ def close
71
+ channel&.close
72
+ ensure
73
+ connection&.close
74
+ end
75
+
76
+ # @return [true, false] True if the connection is open, false otherwise
77
+ def open?
78
+ connection&.status == :open
79
+ end
80
+
81
+ # @return [Symbol, nil] The connection status, or nil if there is no connection
82
+ def status
83
+ connection&.status
84
+ end
85
+
86
+ # @return [String] The RabbitMQ hostname
87
+ def host
88
+ config.host
89
+ end
90
+
91
+ # @return [String] The RabbitMQ port
92
+ def port
93
+ config.port
94
+ end
95
+
96
+ private
97
+
98
+ def connect_on(connection, timeout = 120)
99
+ timer = 0
100
+ logger.info "Trying to open queue connection with timeout=#{timeout}"
101
+ while timer < timeout
102
+ begin
103
+ connection.start
104
+ rescue
105
+ # TODO: do we actually want to rescue from everything?
106
+ end
107
+ return connection if connection.status == :open
108
+ sleep 1
109
+ timer += 1
110
+ end
111
+ raise "Failed to connect to queue."
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,39 @@
1
+ module Surfliner
2
+ module MetadataConsumer
3
+ # Encapsulates a Surfliner resource message payload.
4
+ class Payload
5
+ # Expected resource status values.
6
+ KNOWN_STATUSES = [:published, :updated, :unpublished, :deleted]
7
+
8
+ # Initializes a new `Payload` from the specified JSON data.
9
+ # @param payload_json [String] The payload data as received from RabbitMQ.
10
+ def initialize(payload_json)
11
+ @data = JSON.parse(payload_json)
12
+ end
13
+
14
+ # @return [String] The payload data as a JSON string.
15
+ def to_s
16
+ @data.to_s
17
+ end
18
+
19
+ # @return [String] the URL for the resource.
20
+ def resource_url
21
+ @resource_url ||= @data.fetch("resourceUrl")
22
+ end
23
+
24
+ # @return [Symbol] the resource status as a symbol.
25
+ # @raise UnknownStatus if the message does not provide the resource status.
26
+ def status
27
+ @status ||= begin
28
+ status_str = @data.fetch("status") do
29
+ raise(UnknownStatus, "Payload status is not defined in payload: #{@data}")
30
+ end
31
+ status_str.to_sym
32
+ end
33
+ end
34
+
35
+ # Exception raised if the resource status has not been provided in the message.
36
+ class UnknownStatus < RuntimeError; end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,15 @@
1
+ require "surfliner/metadata_consumer/solr/message_handler"
2
+
3
+ module Surfliner
4
+ module MetadataConsumer
5
+ module Solr
6
+ # Message handler for `:deleted` status
7
+ class DeleteHandler < MessageHandler
8
+ # @raise NotImplementedError because not implemented
9
+ def handle
10
+ raise NotImplementedError, "IMPLEMENT ME: This consumer can't delete yet!"
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,45 @@
1
+ require "json"
2
+ require "net/http"
3
+ require "rsolr"
4
+
5
+ require "surfliner/metadata_consumer/solr/message_handler"
6
+ require "surfliner/metadata_consumer/superskunk_client"
7
+
8
+ module Surfliner
9
+ module MetadataConsumer
10
+ module Solr
11
+ # Message handler that indexes resources into Solr.
12
+ class IndexHandler < MessageHandler
13
+ # Attributes for the Solr "add" command; see
14
+ # https://cwiki.apache.org/confluence/display/solr/UpdateXmlMessages#UpdateXmlMessages-Optionalattributesfor%22add%22
15
+ SOLR_ATTRIBUTES = {commitWithin: 10}.freeze
16
+
17
+ # Retrieves the resource specified in the message from Superskunk, converts
18
+ # it to a Solr document, and adds it to Solr.
19
+ def handle
20
+ index(build_document(SuperskunkClient.get(payload.resource_url)))
21
+ end
22
+
23
+ private
24
+
25
+ ##
26
+ # @return [Hash]
27
+ def build_document(data)
28
+ index_document = {id: data["@id"].split("/").last}
29
+ index_document[:title_tesim] = data["title"]
30
+ index_document[:creator_ssim] = data["creator"]
31
+ index_document[:ark_si] = data["ark"]
32
+ index_document[:superskunk_uri_si] = data["@id"]
33
+ index_document
34
+ end
35
+
36
+ ##
37
+ # @param doc [Hash]
38
+ # @return [void]
39
+ def index(doc)
40
+ solr_connection.add([doc], add_attributes: SOLR_ATTRIBUTES)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,73 @@
1
+ require "json"
2
+ require "surfliner/metadata_consumer/payload"
3
+
4
+ module Surfliner
5
+ module MetadataConsumer
6
+ module Solr
7
+ # A message handler that delegates to sub-handlers based on the resource
8
+ # status provided in the message.
9
+ class MessageHandler
10
+ attr_reader :payload
11
+
12
+ # Connects to the configured Solr instance.
13
+ # @return [RSolr::Client] The Solr connection.
14
+ def solr_connection
15
+ @solr_connection ||= begin
16
+ solr_host = ENV.fetch("SOLR_HOST")
17
+ solr_port = ENV.fetch("SOLR_PORT")
18
+ solr_collection_name = ENV.fetch("SOLR_COLLECTION_NAME")
19
+
20
+ solr_url = "http://#{solr_auth}#{solr_host}:#{solr_port}/solr/#{solr_collection_name}"
21
+ RSolr.connect(url: solr_url)
22
+ end
23
+ end
24
+
25
+ class << self
26
+ # Returns the appropriate handler based on the resource status provided in the message.
27
+ # @param payload_json [String] JSON message payload
28
+ # @return [#handle]
29
+ def handler_for(payload_json)
30
+ payload = Payload.new(payload_json)
31
+
32
+ case payload.status
33
+ when :published, :updated
34
+ IndexHandler.new(payload)
35
+ when :unpublished, :deleted
36
+ DeleteHandler.new(payload)
37
+ else
38
+ raise ArgumentError, "Couldn't handle message with payload status: #{payload.status}"
39
+ end
40
+ end
41
+
42
+ # Delegates payload handling to the appropriate handler.
43
+ # @param payload_json [String] JSON message payload
44
+ def handle(payload_json)
45
+ handler_for(payload_json).handle
46
+ end
47
+ end
48
+
49
+ # Initializes a new `MessageHandler`.
50
+ # @param payload [Payload]
51
+ def initialize(payload)
52
+ @payload = payload
53
+ end
54
+
55
+ # Implementations should handle the `Payload` provided in the initializer.
56
+ # @abstract
57
+ def handle
58
+ raise NotImplementedError
59
+ end
60
+
61
+ private
62
+
63
+ def solr_auth
64
+ solr_admin_user = ENV.fetch("SOLR_ADMIN_USER", nil)
65
+ solr_admin_password = ENV.fetch("SOLR_ADMIN_PASSWORD", nil)
66
+ return "" unless solr_admin_user && solr_admin_password
67
+
68
+ "#{solr_admin_user}:#{solr_admin_password}@"
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,9 @@
1
+ Dir.glob(File.expand_path("solr/*.rb", __dir__)).sort.each(&method(:require))
2
+
3
+ module Surfliner
4
+ module MetadataConsumer
5
+ # Message handlers for Solr indexing.
6
+ module Solr
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,45 @@
1
+ module Surfliner
2
+ module MetadataConsumer
3
+ # Retrieves resources from Superskunk.
4
+ class SuperskunkClient
5
+ # The JSON-LD profile to request when retrieving the resource
6
+ DEFAULT_JSONLD_PROFILE = "tag:surfliner.gitlab.io,2022:api/oai_dc"
7
+
8
+ class << self
9
+ # Retrieves the specified resource.
10
+ # @param url [String] The resource URL as a string
11
+ # @return [Hash] parsed JSON response data
12
+ #
13
+ # @raise [UnexpectedResponse] in the event of an unexpected HTTP status code.
14
+ def get(url)
15
+ uri = URI(url)
16
+ req = Net::HTTP::Get.new(uri)
17
+ req["Accept"] = "application/ld+json;profile=\"#{jsonld_profile}\""
18
+ req["User-Agent"] = ENV.fetch("USER_AGENT_PRODUCT_NAME") { "surfliner.daylight" } # TODO: make this more obviously configurable?
19
+
20
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http|
21
+ http.request(req)
22
+ end
23
+
24
+ case response
25
+ when Net::HTTPSuccess
26
+ JSON.parse(response.body)
27
+ when Net::HTTPRedirection
28
+ get(response["location"])
29
+ else
30
+ raise UnexpectedResponse, "Failed to fetch data; status #{response.code}"
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def jsonld_profile
37
+ DEFAULT_JSONLD_PROFILE
38
+ end
39
+ end
40
+
41
+ # Exception raised in the event of an unexpected HTTP status code.
42
+ class UnexpectedResponse < RuntimeError; end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,8 @@
1
+ # Umbrella module for general Surfliner code
2
+ module Surfliner
3
+ # Parent module for this gem
4
+ module MetadataConsumer
5
+ # The gem version
6
+ VERSION = "0.1.0.pre.alpha"
7
+ end
8
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path("metadata_consumer/*.rb", __dir__)).sort.each(&method(:require))
@@ -0,0 +1,42 @@
1
+ require_relative "lib/surfliner/metadata_consumer/version"
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "surfliner-metadata_consumer"
5
+ spec.version = Surfliner::MetadataConsumer::VERSION
6
+ spec.authors = ["Project Surfliner"]
7
+
8
+ spec.homepage = "https://gitlab.com/surfliner/metadata_consumer"
9
+ spec.license = "MIT"
10
+ spec.summary = "Surfliner metadata consumer"
11
+
12
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.3.1")
13
+
14
+ spec.metadata["homepage_uri"] = spec.homepage
15
+ spec.metadata["source_code_uri"] = "https://gitlab.com/surfliner/metadata_consumer.git"
16
+ spec.metadata["rubygems_mfa_required"] = "false"
17
+
18
+ spec.files = Dir["lib/**/*.rb"] + Dir["bin/*"] + Dir["[A-Z]*"]
19
+
20
+ ["daylight-index-listen", "simulate-publish-event"].each do |script|
21
+ spec.executables << script
22
+ end
23
+
24
+ spec.add_dependency "bunny", "~> 2.23"
25
+ # TODO: Figure out why we get "The otlp exporter cannot be configured - please add opentelemetry-exporter-otlp to your Gemfile"
26
+ spec.add_dependency "opentelemetry-exporter-otlp", "~> 0.26.3"
27
+ spec.add_dependency "opentelemetry-instrumentation-all", "~> 0.60.0"
28
+ spec.add_dependency "opentelemetry-sdk", "~> 1.4.1"
29
+ spec.add_dependency "rsolr", ">= 1.0", "< 3"
30
+
31
+ spec.add_development_dependency "debug", "~> 1.9.2"
32
+ spec.add_development_dependency "rspec", "~> 3.13"
33
+ spec.add_development_dependency "standard", "~> 1.31"
34
+ spec.add_development_dependency "ci_reporter_rspec", "~> 1.0"
35
+ spec.add_development_dependency "colorize", "~> 0.8"
36
+ spec.add_development_dependency "dotenv", "~> 2.7"
37
+ spec.add_development_dependency "rake", "~> 13.0"
38
+ spec.add_development_dependency "simplecov", "~> 0.22"
39
+ spec.add_development_dependency "simplecov-cobertura", "~> 2.1"
40
+ spec.add_development_dependency "webmock", "~> 3.12"
41
+ spec.add_development_dependency "yard", "~> 0.9.37"
42
+ end