surfliner-metadata_consumer 0.1.0.pre.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ require "rsolr"
2
+ require "surfliner/metadata_consumer/mq_connection"
3
+ require "surfliner/metadata_consumer/solr/message_handler"
4
+
5
+ module Surfliner
6
+ module MetadataConsumer
7
+ # A metadata consumer that subscribes to a RabbitMQ queue and passes
8
+ # messages to the specified handler.
9
+ class Consumer
10
+ attr_reader :connection, :logger, :tracer, :handler
11
+
12
+ # Initializes a new `Consumer`
13
+ # @param tracer [OpenTelemetry::Trace::Tracer] OpenTelemetry tracer
14
+ # @param logger [Logger] log message destination
15
+ # @param handler #handle an object accepting a JSON string
16
+ def initialize(tracer:, logger:, handler:)
17
+ @connection = MqConnection.new(logger:)
18
+ @logger = logger
19
+ @tracer = tracer
20
+ @handler = handler
21
+ end
22
+
23
+ # Initializes and starts a new `Consumer`
24
+ # @param tracer [OpenTelemetry::Trace::Tracer] OpenTelemetry tracer
25
+ # @param logger [Logger] log message destination
26
+ # @param handler #handle an object accepting a JSON string payload
27
+ def self.run(tracer:, logger:, handler:)
28
+ new(tracer:, logger:, handler:).run
29
+ end
30
+
31
+ # Starts listening to the message queue and passing messages to the handler.
32
+ def run
33
+ connection.open do |queue|
34
+ queue.subscribe(block: true) do |_delivery_info, _properties, payload_json|
35
+ tracer.in_span("surfliner metadata consumer message") do |_span|
36
+ logger.info(" [  ] message received with payload: #{payload_json}")
37
+
38
+ handler.handle(payload_json)
39
+ end
40
+ rescue => err
41
+ logger.error(" [❌] failed to handle message: #{err}\n#{err.backtrace}")
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,79 @@
1
+ module Surfliner
2
+ module MetadataConsumer
3
+ # An object encapsulating RabbitMQ configuration.
4
+ class MqConfig
5
+ # @return [String] The RabbitMQ hostname
6
+ attr_reader :host
7
+
8
+ # @return [String] The RabbitMQ AMQP port
9
+ attr_reader :port
10
+
11
+ # @return [String] The RabbitMQ username
12
+ attr_reader :username
13
+
14
+ # @return [String] The RabbitMQ passsword
15
+ attr_reader :password
16
+
17
+ # @return [String] The topic exchange to listen to
18
+ attr_reader :topic
19
+
20
+ # @return [String] The name of the queue to listen to
21
+ attr_reader :queue_name
22
+
23
+ # @return [String] The platform routing key to listen to
24
+ attr_reader :routing_key
25
+
26
+ # Initializes a new `MqConfig` object.
27
+ # @param host [The] RabbitMQ hostname
28
+ # @param port [The] RabbitMQ AMQP port
29
+ # @param username [The] RabbitMQ username
30
+ # @param password [The] RabbitMQ passsword
31
+ # @param topic [The] topic exchange to listen to
32
+ # @param queue_name [The] name of the queue to listen to
33
+ # @param routing_key [The] platform routing key to listen to
34
+ def initialize(host:, port:, username:, password:, topic:, queue_name:, routing_key:)
35
+ @host = host
36
+ @port = port
37
+ @username = username
38
+ @password = password
39
+ @topic = topic
40
+ @queue_name = queue_name
41
+ @routing_key = routing_key
42
+ end
43
+
44
+ class << self
45
+ # Reads RabbitMQ configuration from environment variables and
46
+ # returns it as a new `MqConfig` object.
47
+ #
48
+ # - `RABBITMQ_HOST` → `host`
49
+ # - `RABBITMQ_NODE_PORT_NUMBER` → `port`
50
+ # - `RABBITMQ_USERNAME` → `username`
51
+ # - `RABBITMQ_PASSWORD` → `password`
52
+ # - `RABBITMQ_TOPIC` → `topic`
53
+ # - `RABBITMQ_QUEUE` → `queue_name`
54
+ # - `RABBITMQ_PLATFORM_ROUTING_KEY` → `routing_key`
55
+ def from_env
56
+ MqConfig.new(
57
+ host: ENV.fetch("RABBITMQ_HOST"),
58
+ port: ENV.fetch("RABBITMQ_NODE_PORT_NUMBER"),
59
+ username: ENV.fetch("RABBITMQ_USERNAME"),
60
+ password: ENV.fetch("RABBITMQ_PASSWORD"),
61
+ topic: ENV.fetch("RABBITMQ_TOPIC"),
62
+ queue_name: ENV.fetch("RABBITMQ_QUEUE"),
63
+ routing_key: ENV.fetch("RABBITMQ_PLATFORM_ROUTING_KEY")
64
+ )
65
+ end
66
+ end
67
+
68
+ # @return [String] the connection URL as a string
69
+ def connection_url
70
+ @connection_url ||= "amqp://#{username}:#{password}@#{host}:#{port}"
71
+ end
72
+
73
+ # @return [String] the connection URL as a string, without the password
74
+ def redacted_url
75
+ @redacted_url ||= connection_url.sub(password, "REDACTED")
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,115 @@
1
+ require "bunny"
2
+
3
+ module Surfliner
4
+ module MetadataConsumer
5
+ # An object encapsulating a RabbitMQ connection.
6
+ class MqConnection
7
+ # @return [Logger] The logger
8
+ attr_reader :logger
9
+
10
+ # @return [Bunny::Session] The current RabbitMQ session
11
+ attr_reader :connection
12
+
13
+ # @return [Bunny::Channel] The channel being listened to
14
+ attr_reader :channel
15
+
16
+ # @return [Bunny::Exchange] The exchange being listened to
17
+ attr_reader :exchange
18
+
19
+ # @return [Bunny::Queue] The queue being listened to
20
+ attr_reader :queue
21
+
22
+ # @return [MqConfig] The configuration
23
+ attr_reader :config
24
+
25
+ # Initializes a new `MqConnection`.
26
+ #
27
+ # @param logger [Logger] the logger
28
+ # @param config [MqConfig] the configuration
29
+ def initialize(logger:, config: MqConfig.from_env)
30
+ @logger = logger
31
+ @config = config
32
+ end
33
+
34
+ # Opens a connection.
35
+ # @return [self]
36
+ # @raise RuntimeError if already connected
37
+ def connect
38
+ raise "RabbitMQ connection #{connection} already open." if open?
39
+
40
+ logger.info("Rabbitmq message broker connection url: #{config.redacted_url}")
41
+ @connection = Bunny.new(config.connection_url, logger: logger)
42
+ connect_on(connection)
43
+ @channel = connection.create_channel
44
+ @exchange = channel.topic(config.topic, auto_delete: true)
45
+ @queue = channel.queue(config.queue_name, durable: true)
46
+ queue.bind(exchange, routing_key: config.routing_key)
47
+
48
+ self
49
+ rescue Bunny::TCPConnectionFailed => err
50
+ # TODO: realistically, this only happens in connection.start, where we're eating it
51
+ logger.error("Connection to #{config.redacted_url} failed")
52
+ raise err
53
+ rescue Bunny::PossibleAuthenticationFailureError => err
54
+ # TODO: realistically, this only happens in connection.start, where we're eating it
55
+ logger.error("Failed to authenticate to #{config.redacted_url}")
56
+ raise err
57
+ end
58
+
59
+ # Opens a connection, yields the queue, and closes the connection after
60
+ # the provided block completes.
61
+ # @yield [Bunny::Queue] the queue
62
+ def open
63
+ connect
64
+ yield queue
65
+ ensure
66
+ close
67
+ end
68
+
69
+ # Closes the connection.
70
+ def close
71
+ channel&.close
72
+ ensure
73
+ connection&.close
74
+ end
75
+
76
+ # @return [true, false] True if the connection is open, false otherwise
77
+ def open?
78
+ connection&.status == :open
79
+ end
80
+
81
+ # @return [Symbol, nil] The connection status, or nil if there is no connection
82
+ def status
83
+ connection&.status
84
+ end
85
+
86
+ # @return [String] The RabbitMQ hostname
87
+ def host
88
+ config.host
89
+ end
90
+
91
+ # @return [String] The RabbitMQ port
92
+ def port
93
+ config.port
94
+ end
95
+
96
+ private
97
+
98
+ def connect_on(connection, timeout = 120)
99
+ timer = 0
100
+ logger.info "Trying to open queue connection with timeout=#{timeout}"
101
+ while timer < timeout
102
+ begin
103
+ connection.start
104
+ rescue
105
+ # TODO: do we actually want to rescue from everything?
106
+ end
107
+ return connection if connection.status == :open
108
+ sleep 1
109
+ timer += 1
110
+ end
111
+ raise "Failed to connect to queue."
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,39 @@
1
+ module Surfliner
2
+ module MetadataConsumer
3
+ # Encapsulates a Surfliner resource message payload.
4
+ class Payload
5
+ # Expected resource status values.
6
+ KNOWN_STATUSES = [:published, :updated, :unpublished, :deleted]
7
+
8
+ # Initializes a new `Payload` from the specified JSON data.
9
+ # @param payload_json [String] The payload data as received from RabbitMQ.
10
+ def initialize(payload_json)
11
+ @data = JSON.parse(payload_json)
12
+ end
13
+
14
+ # @return [String] The payload data as a JSON string.
15
+ def to_s
16
+ @data.to_s
17
+ end
18
+
19
+ # @return [String] the URL for the resource.
20
+ def resource_url
21
+ @resource_url ||= @data.fetch("resourceUrl")
22
+ end
23
+
24
+ # @return [Symbol] the resource status as a symbol.
25
+ # @raise UnknownStatus if the message does not provide the resource status.
26
+ def status
27
+ @status ||= begin
28
+ status_str = @data.fetch("status") do
29
+ raise(UnknownStatus, "Payload status is not defined in payload: #{@data}")
30
+ end
31
+ status_str.to_sym
32
+ end
33
+ end
34
+
35
+ # Exception raised if the resource status has not been provided in the message.
36
+ class UnknownStatus < RuntimeError; end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,15 @@
1
+ require "surfliner/metadata_consumer/solr/message_handler"
2
+
3
+ module Surfliner
4
+ module MetadataConsumer
5
+ module Solr
6
+ # Message handler for `:deleted` status
7
+ class DeleteHandler < MessageHandler
8
+ # @raise NotImplementedError because not implemented
9
+ def handle
10
+ raise NotImplementedError, "IMPLEMENT ME: This consumer can't delete yet!"
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,45 @@
1
+ require "json"
2
+ require "net/http"
3
+ require "rsolr"
4
+
5
+ require "surfliner/metadata_consumer/solr/message_handler"
6
+ require "surfliner/metadata_consumer/superskunk_client"
7
+
8
+ module Surfliner
9
+ module MetadataConsumer
10
+ module Solr
11
+ # Message handler that indexes resources into Solr.
12
+ class IndexHandler < MessageHandler
13
+ # Attributes for the Solr "add" command; see
14
+ # https://cwiki.apache.org/confluence/display/solr/UpdateXmlMessages#UpdateXmlMessages-Optionalattributesfor%22add%22
15
+ SOLR_ATTRIBUTES = {commitWithin: 10}.freeze
16
+
17
+ # Retrieves the resource specified in the message from Superskunk, converts
18
+ # it to a Solr document, and adds it to Solr.
19
+ def handle
20
+ index(build_document(SuperskunkClient.get(payload.resource_url)))
21
+ end
22
+
23
+ private
24
+
25
+ ##
26
+ # @return [Hash]
27
+ def build_document(data)
28
+ index_document = {id: data["@id"].split("/").last}
29
+ index_document[:title_tesim] = data["title"]
30
+ index_document[:creator_ssim] = data["creator"]
31
+ index_document[:ark_si] = data["ark"]
32
+ index_document[:superskunk_uri_si] = data["@id"]
33
+ index_document
34
+ end
35
+
36
+ ##
37
+ # @param doc [Hash]
38
+ # @return [void]
39
+ def index(doc)
40
+ solr_connection.add([doc], add_attributes: SOLR_ATTRIBUTES)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,73 @@
1
+ require "json"
2
+ require "surfliner/metadata_consumer/payload"
3
+
4
+ module Surfliner
5
+ module MetadataConsumer
6
+ module Solr
7
+ # A message handler that delegates to sub-handlers based on the resource
8
+ # status provided in the message.
9
+ class MessageHandler
10
+ attr_reader :payload
11
+
12
+ # Connects to the configured Solr instance.
13
+ # @return [RSolr::Client] The Solr connection.
14
+ def solr_connection
15
+ @solr_connection ||= begin
16
+ solr_host = ENV.fetch("SOLR_HOST")
17
+ solr_port = ENV.fetch("SOLR_PORT")
18
+ solr_collection_name = ENV.fetch("SOLR_COLLECTION_NAME")
19
+
20
+ solr_url = "http://#{solr_auth}#{solr_host}:#{solr_port}/solr/#{solr_collection_name}"
21
+ RSolr.connect(url: solr_url)
22
+ end
23
+ end
24
+
25
+ class << self
26
+ # Returns the appropriate handler based on the resource status provided in the message.
27
+ # @param payload_json [String] JSON message payload
28
+ # @return [#handle]
29
+ def handler_for(payload_json)
30
+ payload = Payload.new(payload_json)
31
+
32
+ case payload.status
33
+ when :published, :updated
34
+ IndexHandler.new(payload)
35
+ when :unpublished, :deleted
36
+ DeleteHandler.new(payload)
37
+ else
38
+ raise ArgumentError, "Couldn't handle message with payload status: #{payload.status}"
39
+ end
40
+ end
41
+
42
+ # Delegates payload handling to the appropriate handler.
43
+ # @param payload_json [String] JSON message payload
44
+ def handle(payload_json)
45
+ handler_for(payload_json).handle
46
+ end
47
+ end
48
+
49
+ # Initializes a new `MessageHandler`.
50
+ # @param payload [Payload]
51
+ def initialize(payload)
52
+ @payload = payload
53
+ end
54
+
55
+ # Implementations should handle the `Payload` provided in the initializer.
56
+ # @abstract
57
+ def handle
58
+ raise NotImplementedError
59
+ end
60
+
61
+ private
62
+
63
+ def solr_auth
64
+ solr_admin_user = ENV.fetch("SOLR_ADMIN_USER", nil)
65
+ solr_admin_password = ENV.fetch("SOLR_ADMIN_PASSWORD", nil)
66
+ return "" unless solr_admin_user && solr_admin_password
67
+
68
+ "#{solr_admin_user}:#{solr_admin_password}@"
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,9 @@
1
+ Dir.glob(File.expand_path("solr/*.rb", __dir__)).sort.each(&method(:require))
2
+
3
+ module Surfliner
4
+ module MetadataConsumer
5
+ # Message handlers for Solr indexing.
6
+ module Solr
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,45 @@
1
+ module Surfliner
2
+ module MetadataConsumer
3
+ # Retrieves resources from Superskunk.
4
+ class SuperskunkClient
5
+ # The JSON-LD profile to request when retrieving the resource
6
+ DEFAULT_JSONLD_PROFILE = "tag:surfliner.gitlab.io,2022:api/oai_dc"
7
+
8
+ class << self
9
+ # Retrieves the specified resource.
10
+ # @param url [String] The resource URL as a string
11
+ # @return [Hash] parsed JSON response data
12
+ #
13
+ # @raise [UnexpectedResponse] in the event of an unexpected HTTP status code.
14
+ def get(url)
15
+ uri = URI(url)
16
+ req = Net::HTTP::Get.new(uri)
17
+ req["Accept"] = "application/ld+json;profile=\"#{jsonld_profile}\""
18
+ req["User-Agent"] = ENV.fetch("USER_AGENT_PRODUCT_NAME") { "surfliner.daylight" } # TODO: make this more obviously configurable?
19
+
20
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http|
21
+ http.request(req)
22
+ end
23
+
24
+ case response
25
+ when Net::HTTPSuccess
26
+ JSON.parse(response.body)
27
+ when Net::HTTPRedirection
28
+ get(response["location"])
29
+ else
30
+ raise UnexpectedResponse, "Failed to fetch data; status #{response.code}"
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def jsonld_profile
37
+ DEFAULT_JSONLD_PROFILE
38
+ end
39
+ end
40
+
41
+ # Exception raised in the event of an unexpected HTTP status code.
42
+ class UnexpectedResponse < RuntimeError; end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,8 @@
1
+ # Umbrella module for general Surfliner code
2
+ module Surfliner
3
+ # Parent module for this gem
4
+ module MetadataConsumer
5
+ # The gem version
6
+ VERSION = "0.1.0.pre.alpha"
7
+ end
8
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path("metadata_consumer/*.rb", __dir__)).sort.each(&method(:require))
@@ -0,0 +1,42 @@
1
+ require_relative "lib/surfliner/metadata_consumer/version"
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "surfliner-metadata_consumer"
5
+ spec.version = Surfliner::MetadataConsumer::VERSION
6
+ spec.authors = ["Project Surfliner"]
7
+
8
+ spec.homepage = "https://gitlab.com/surfliner/metadata_consumer"
9
+ spec.license = "MIT"
10
+ spec.summary = "Surfliner metadata consumer"
11
+
12
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.3.1")
13
+
14
+ spec.metadata["homepage_uri"] = spec.homepage
15
+ spec.metadata["source_code_uri"] = "https://gitlab.com/surfliner/metadata_consumer.git"
16
+ spec.metadata["rubygems_mfa_required"] = "false"
17
+
18
+ spec.files = Dir["lib/**/*.rb"] + Dir["bin/*"] + Dir["[A-Z]*"]
19
+
20
+ ["daylight-index-listen", "simulate-publish-event"].each do |script|
21
+ spec.executables << script
22
+ end
23
+
24
+ spec.add_dependency "bunny", "~> 2.23"
25
+ # TODO: Figure out why we get "The otlp exporter cannot be configured - please add opentelemetry-exporter-otlp to your Gemfile"
26
+ spec.add_dependency "opentelemetry-exporter-otlp", "~> 0.26.3"
27
+ spec.add_dependency "opentelemetry-instrumentation-all", "~> 0.60.0"
28
+ spec.add_dependency "opentelemetry-sdk", "~> 1.4.1"
29
+ spec.add_dependency "rsolr", ">= 1.0", "< 3"
30
+
31
+ spec.add_development_dependency "debug", "~> 1.9.2"
32
+ spec.add_development_dependency "rspec", "~> 3.13"
33
+ spec.add_development_dependency "standard", "~> 1.31"
34
+ spec.add_development_dependency "ci_reporter_rspec", "~> 1.0"
35
+ spec.add_development_dependency "colorize", "~> 0.8"
36
+ spec.add_development_dependency "dotenv", "~> 2.7"
37
+ spec.add_development_dependency "rake", "~> 13.0"
38
+ spec.add_development_dependency "simplecov", "~> 0.22"
39
+ spec.add_development_dependency "simplecov-cobertura", "~> 2.1"
40
+ spec.add_development_dependency "webmock", "~> 3.12"
41
+ spec.add_development_dependency "yard", "~> 0.9.37"
42
+ end