sap-ecs-log-forwarder 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sap_ecs_log_forwarder/__init__.py +0 -0
- sap_ecs_log_forwarder/aws.py +208 -0
- sap_ecs_log_forwarder/azure.py +240 -0
- sap_ecs_log_forwarder/base_runner.py +26 -0
- sap_ecs_log_forwarder/cli.py +303 -0
- sap_ecs_log_forwarder/config.py +99 -0
- sap_ecs_log_forwarder/consumer.py +56 -0
- sap_ecs_log_forwarder/crypto.py +41 -0
- sap_ecs_log_forwarder/gcp.py +105 -0
- sap_ecs_log_forwarder/json_logging.py +42 -0
- sap_ecs_log_forwarder/metrics.py +37 -0
- sap_ecs_log_forwarder/processor.py +107 -0
- sap_ecs_log_forwarder/utils.py +28 -0
- sap_ecs_log_forwarder-1.0.0.dist-info/LICENSE +38 -0
- sap_ecs_log_forwarder-1.0.0.dist-info/METADATA +634 -0
- sap_ecs_log_forwarder-1.0.0.dist-info/RECORD +18 -0
- sap_ecs_log_forwarder-1.0.0.dist-info/WHEEL +4 -0
- sap_ecs_log_forwarder-1.0.0.dist-info/entry_points.txt +4 -0
|
File without changes
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from http.cookies import Morsel
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
import time
|
|
7
|
+
import threading
|
|
8
|
+
import aiohttp
|
|
9
|
+
import boto3
|
|
10
|
+
|
|
11
|
+
from sap_ecs_log_forwarder import metrics
|
|
12
|
+
from sap_ecs_log_forwarder.crypto import decrypt_auth_dict
|
|
13
|
+
from sap_ecs_log_forwarder.processor import emit
|
|
14
|
+
from sap_ecs_log_forwarder.utils import compile_filters, decode_bytes, is_relevant, split_lines
|
|
15
|
+
|
|
16
|
+
class AWSRunner:
|
|
17
|
+
def __init__(self, cfg):
|
|
18
|
+
self.cfg = cfg
|
|
19
|
+
self._stop = threading.Event()
|
|
20
|
+
self.inc, self.exc = compile_filters(cfg.get("includeFilter", []), cfg.get("excludeFilter", []))
|
|
21
|
+
self.max_retries = cfg.get("maxRetries", 5)
|
|
22
|
+
lvl = getattr(logging, str(self.cfg.get("logLevel","INFO")).upper(), logging.INFO)
|
|
23
|
+
self.log = logging.getLogger(f"input.{self.cfg.get('name','aws')}")
|
|
24
|
+
self.log.setLevel(lvl)
|
|
25
|
+
# rotating session credentials
|
|
26
|
+
self._aws_kwargs = {}
|
|
27
|
+
self._aws_expiry_ts = 0
|
|
28
|
+
self._gen = 0 # increments when creds refreshed
|
|
29
|
+
self._active_gen = -1 # last gen applied to clients
|
|
30
|
+
self.sqs = None
|
|
31
|
+
self.s3 = None
|
|
32
|
+
|
|
33
|
+
def _creds_valid(self):
|
|
34
|
+
return self._aws_kwargs and (self._aws_expiry_ts - time.time() > 60)
|
|
35
|
+
|
|
36
|
+
async def _refresh_temp_creds(self):
|
|
37
|
+
while not self._stop.is_set():
|
|
38
|
+
try:
|
|
39
|
+
self.log.info(f"[{self.cfg['name']}] Refreshing AWS Creds...")
|
|
40
|
+
await self._ensure_credentials()
|
|
41
|
+
self._gen += 1
|
|
42
|
+
now = time.time()
|
|
43
|
+
if self._aws_expiry_ts > now:
|
|
44
|
+
sleep_for = max(60, min(30*60, int(self._aws_expiry_ts - now - 60)))
|
|
45
|
+
else:
|
|
46
|
+
sleep_for = 30 * 60
|
|
47
|
+
except Exception as e:
|
|
48
|
+
self.log.error(f"[{self.cfg['name']}] AWS Creds refresh error: {e}")
|
|
49
|
+
sleep_for = 60
|
|
50
|
+
self.log.info(f"[{self.cfg['name']}] Next AWS Creds refresh in {sleep_for} seconds.")
|
|
51
|
+
await asyncio.sleep(sleep_for)
|
|
52
|
+
|
|
53
|
+
async def _ensure_credentials(self):
|
|
54
|
+
auth = decrypt_auth_dict(self.cfg.get("authentication", {}))
|
|
55
|
+
client_id = auth.get("clientId")
|
|
56
|
+
client_secret = auth.get("clientSecret")
|
|
57
|
+
login_url = auth.get("loginUrl")
|
|
58
|
+
aws_creds_url = auth.get("awsCredsUrl") or auth.get("credsUrl") # allow shared key name
|
|
59
|
+
bucket = self.cfg.get("bucket")
|
|
60
|
+
# Dynamic mode
|
|
61
|
+
if client_id and client_secret and login_url and aws_creds_url and bucket:
|
|
62
|
+
async with aiohttp.ClientSession() as session:
|
|
63
|
+
# Login to get cookie
|
|
64
|
+
data = {"client_id": client_id, "client_secret": client_secret}
|
|
65
|
+
async with session.post(login_url, data=data, headers={"content-type":"application/x-www-form-urlencoded"}) as r:
|
|
66
|
+
if r.status != 200:
|
|
67
|
+
raise RuntimeError(f"Login failed: HTTP {r.status}")
|
|
68
|
+
cookies = session.cookie_jar.filter_cookies(login_url)
|
|
69
|
+
sid = cookies.get("session-id-raven")
|
|
70
|
+
if not sid:
|
|
71
|
+
if login_url.startswith("http://localhost"):
|
|
72
|
+
cookieHeaders = r.headers.getall("Set-Cookie", [])
|
|
73
|
+
for ch in cookieHeaders:
|
|
74
|
+
m = re.search(r"session-id-raven=([^;]+);", ch)
|
|
75
|
+
if m:
|
|
76
|
+
sid = Morsel[str]()
|
|
77
|
+
sid.set("session-id-raven", m.group(1), m.group(1))
|
|
78
|
+
break
|
|
79
|
+
if not sid:
|
|
80
|
+
raise RuntimeError("Login OK but session-id-raven cookie missing")
|
|
81
|
+
# Fetch temporary AWS creds
|
|
82
|
+
headers = {"Cookie": f"session-id-raven={sid.value}"}
|
|
83
|
+
params = {"bucket": bucket}
|
|
84
|
+
async with session.get(aws_creds_url, params=params, headers=headers) as r2:
|
|
85
|
+
if r2.status != 200:
|
|
86
|
+
raise RuntimeError(f"Creds fetch failed: HTTP {r2.status}")
|
|
87
|
+
payload = await r2.json()
|
|
88
|
+
data = payload.get("data", {})
|
|
89
|
+
akid = data.get("AccessKeyId")
|
|
90
|
+
secret = data.get("SecretAccessKey")
|
|
91
|
+
token = data.get("SessionToken")
|
|
92
|
+
region = data.get("Region") or self.cfg.get("region")
|
|
93
|
+
expiry = data.get("Expiration")
|
|
94
|
+
if not (akid and secret and token):
|
|
95
|
+
raise RuntimeError("Temporary AWS credentials missing required fields")
|
|
96
|
+
# parse expiry
|
|
97
|
+
try:
|
|
98
|
+
from datetime import datetime, timezone
|
|
99
|
+
self._aws_expiry_ts = datetime.strptime(expiry, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc).timestamp() if expiry else time.time() + 30*60
|
|
100
|
+
except Exception:
|
|
101
|
+
self._aws_expiry_ts = time.time() + 30*60
|
|
102
|
+
self._aws_kwargs = {
|
|
103
|
+
"aws_access_key_id": akid,
|
|
104
|
+
"aws_secret_access_key": secret,
|
|
105
|
+
"aws_session_token": token,
|
|
106
|
+
}
|
|
107
|
+
self._dynamic_region = region
|
|
108
|
+
self.log.info(f"[{self.cfg['name']}] AWS temporary creds refreshed; expires at {expiry}")
|
|
109
|
+
return
|
|
110
|
+
# Static mode
|
|
111
|
+
static = decrypt_auth_dict(self.cfg.get("authentication", {}))
|
|
112
|
+
if static.get("accessKeyId") and static.get("secretAccessKey"):
|
|
113
|
+
self._aws_kwargs = {
|
|
114
|
+
"aws_access_key_id": static["accessKeyId"],
|
|
115
|
+
"aws_secret_access_key": static["secretAccessKey"],
|
|
116
|
+
}
|
|
117
|
+
self._dynamic_region = None
|
|
118
|
+
|
|
119
|
+
def start(self):
|
|
120
|
+
asyncio.run(self._run())
|
|
121
|
+
|
|
122
|
+
async def _run(self):
|
|
123
|
+
await self._ensure_credentials()
|
|
124
|
+
auth = decrypt_auth_dict(self.cfg.get("authentication", {}))
|
|
125
|
+
dynamic = all(auth.get(k) for k in ("clientId","clientSecret","loginUrl")) and (auth.get("awsCredsUrl") or auth.get("credsUrl"))
|
|
126
|
+
refresh_task = asyncio.create_task(self._refresh_temp_creds()) if dynamic else None
|
|
127
|
+
|
|
128
|
+
queue_url = self.cfg["queue"]
|
|
129
|
+
|
|
130
|
+
def build_clients():
|
|
131
|
+
region = self._dynamic_region if self._dynamic_region else self.cfg["region"]
|
|
132
|
+
aws_kwargs = self._aws_kwargs or {}
|
|
133
|
+
self.sqs = boto3.client("sqs", region_name=region, **aws_kwargs)
|
|
134
|
+
self.s3 = boto3.client("s3", region_name=region, **aws_kwargs)
|
|
135
|
+
self._active_gen = self._gen
|
|
136
|
+
self.log.info(f"[{self.cfg['name']}] AWS clients built (gen {self._active_gen}).")
|
|
137
|
+
|
|
138
|
+
build_clients()
|
|
139
|
+
self.log.info(f"[{self.cfg['name']}] Polling SQS {queue_url}")
|
|
140
|
+
while not self._stop.is_set():
|
|
141
|
+
try:
|
|
142
|
+
# Rebuild clients when creds rotated or expiring soon
|
|
143
|
+
if self._active_gen != self._gen or (self._aws_expiry_ts - time.time()) <= 60:
|
|
144
|
+
self.log.info(f"[{self.cfg['name']}] Rebuilding AWS clients due to creds rotation/expiry.")
|
|
145
|
+
self.log.info(f"[{self.cfg['name']}] Refreshing AWS Creds...")
|
|
146
|
+
await self._ensure_credentials()
|
|
147
|
+
self._gen += 1
|
|
148
|
+
build_clients()
|
|
149
|
+
|
|
150
|
+
resp = self.sqs.receive_message(
|
|
151
|
+
QueueUrl=queue_url,
|
|
152
|
+
MaxNumberOfMessages=10,
|
|
153
|
+
WaitTimeSeconds=15,
|
|
154
|
+
VisibilityTimeout=45,
|
|
155
|
+
AttributeNames=["ApproximateReceiveCount"],
|
|
156
|
+
)
|
|
157
|
+
for m in resp.get("Messages", []):
|
|
158
|
+
self._process(self.sqs, self.s3, m)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
self.log.error(f"AWS polling error: {e}")
|
|
161
|
+
await asyncio.sleep(10)
|
|
162
|
+
if refresh_task:
|
|
163
|
+
refresh_task.cancel()
|
|
164
|
+
|
|
165
|
+
def _process(self, sqs, s3, message):
|
|
166
|
+
handle = message.get("ReceiptHandle")
|
|
167
|
+
body = message.get("Body","")
|
|
168
|
+
try:
|
|
169
|
+
payload = json.loads(body)
|
|
170
|
+
except Exception:
|
|
171
|
+
self.log.error("AWS message is not valid JSON, deleting.")
|
|
172
|
+
sqs.delete_message(QueueUrl=self.cfg["queue"], ReceiptHandle=handle)
|
|
173
|
+
return
|
|
174
|
+
for record in payload.get("Records", []):
|
|
175
|
+
event = record.get("eventName","")
|
|
176
|
+
bname = record.get("s3",{}).get("bucket",{}).get("name","")
|
|
177
|
+
key = record.get("s3",{}).get("object",{}).get("key","")
|
|
178
|
+
if event != "ObjectCreated:Put" or bname != self.cfg.get("bucket") or not is_relevant(key, self.inc, self.exc):
|
|
179
|
+
self.log.debug(f"[{self.cfg['name']}] Ignoring S3 object s3://{bname}/{key} (event: {event})")
|
|
180
|
+
continue
|
|
181
|
+
retry = int(record.get("retry_count",0))
|
|
182
|
+
if retry >= self.max_retries:
|
|
183
|
+
self.log.error(f"[{self.cfg['name']}] Max retries reached for s3://{bname}/{key}, deleting message.")
|
|
184
|
+
continue
|
|
185
|
+
try:
|
|
186
|
+
self.log.debug(f"[{self.cfg['name']}] Processing S3 object s3://{bname}/{key}")
|
|
187
|
+
obj = s3.get_object(Bucket=bname, Key=key)
|
|
188
|
+
raw = obj["Body"].read()
|
|
189
|
+
text = decode_bytes(raw)
|
|
190
|
+
lines = split_lines(text)
|
|
191
|
+
emit(lines, key, self.cfg.get("outputs", []))
|
|
192
|
+
self.log.debug(f"[{self.cfg['name']}] Processed S3 object s3://{bname}/{key}")
|
|
193
|
+
sqs.delete_message(QueueUrl=self.cfg["queue"], ReceiptHandle=handle)
|
|
194
|
+
metrics.inc("aws_messages_processed")
|
|
195
|
+
return
|
|
196
|
+
except Exception as e:
|
|
197
|
+
self.log.error(f"AWS processing failed; requeued: {e}")
|
|
198
|
+
metrics.inc("aws_retry")
|
|
199
|
+
retry += 1
|
|
200
|
+
record["retry_count"] = retry
|
|
201
|
+
payload["Records"] = [record]
|
|
202
|
+
sqs.send_message(QueueUrl=self.cfg["queue"], MessageBody=json.dumps(payload))
|
|
203
|
+
sqs.delete_message(QueueUrl=self.cfg["queue"], ReceiptHandle=handle)
|
|
204
|
+
return
|
|
205
|
+
sqs.delete_message(QueueUrl=self.cfg["queue"], ReceiptHandle=handle)
|
|
206
|
+
|
|
207
|
+
def stop(self):
|
|
208
|
+
self._stop.set()
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import base64
|
|
3
|
+
from http.cookies import Morsel
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
from time import time
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
import aiohttp
|
|
10
|
+
from azure.storage.queue.aio import QueueServiceClient
|
|
11
|
+
from azure.storage.blob import BlobClient, BlobServiceClient
|
|
12
|
+
from sap_ecs_log_forwarder import metrics
|
|
13
|
+
from sap_ecs_log_forwarder.crypto import decrypt_auth_dict
|
|
14
|
+
from sap_ecs_log_forwarder.processor import emit
|
|
15
|
+
from sap_ecs_log_forwarder.utils import compile_filters, decode_bytes, is_relevant, split_lines
|
|
16
|
+
|
|
17
|
+
refresh_token_minutes = 30
|
|
18
|
+
refresh_token_fallback_seconds = 60
|
|
19
|
+
|
|
20
|
+
class AzureRunner:
|
|
21
|
+
def __init__(self, cfg):
|
|
22
|
+
self.cfg = cfg
|
|
23
|
+
self.inc, self.exc = compile_filters(cfg.get("includeFilter", []), cfg.get("excludeFilter", []))
|
|
24
|
+
self.max_retries = cfg.get("maxRetries", 5)
|
|
25
|
+
self.retry_delay = cfg.get("retryDelay", 10)
|
|
26
|
+
self._stop = False
|
|
27
|
+
self._blob_auth = None # cached auth dict
|
|
28
|
+
self._sas_token = None # rotating SAS token
|
|
29
|
+
self._sas_expiry_ts = 0 # initialize expiry
|
|
30
|
+
lvl = getattr(logging, str(self.cfg.get("logLevel","INFO")).upper(), logging.INFO)
|
|
31
|
+
self.log = logging.getLogger(f"input.{self.cfg.get('name','aws')}")
|
|
32
|
+
self.log.setLevel(lvl)
|
|
33
|
+
self._sas_generation = 0
|
|
34
|
+
self._active_generation = 0
|
|
35
|
+
self.qsc = None
|
|
36
|
+
self._queue_client = None
|
|
37
|
+
|
|
38
|
+
async def _refresh_sas_loop(self):
|
|
39
|
+
"""
|
|
40
|
+
If Azure authentication is configured for dynamic SAS, renew every 15 minutes (or before expiration).
|
|
41
|
+
"""
|
|
42
|
+
while not self._stop:
|
|
43
|
+
sleep_for = refresh_token_minutes * 60
|
|
44
|
+
try:
|
|
45
|
+
self.log.debug(f"[{self.cfg['name']}] Refreshing SAS token...")
|
|
46
|
+
await self._ensure_sas_token()
|
|
47
|
+
self._sas_generation += 1
|
|
48
|
+
now = time()
|
|
49
|
+
if self._sas_expiry_ts > now:
|
|
50
|
+
sleep_for = max(60, min(refresh_token_minutes*60, int(self._sas_expiry_ts - now - 60)))
|
|
51
|
+
self.log.debug(f"[{self.cfg['name']}] SAS refreshed; next in ~{sleep_for}s (gen {self._sas_generation}).")
|
|
52
|
+
except Exception as e:
|
|
53
|
+
self.log.error(f"[{self.cfg['name']}] SAS refresh error: {e}")
|
|
54
|
+
sleep_for = refresh_token_fallback_seconds
|
|
55
|
+
self.log.debug(f"[{self.cfg['name']}] Next SAS refresh in {sleep_for} seconds.")
|
|
56
|
+
await asyncio.sleep(sleep_for)
|
|
57
|
+
|
|
58
|
+
async def _ensure_sas_token(self):
|
|
59
|
+
"""
|
|
60
|
+
Populate or refresh a temporary SAS token via backend if dynamic auth configured.
|
|
61
|
+
Supports:
|
|
62
|
+
- Static: sasToken in cfg.authentication
|
|
63
|
+
- Dynamic: clientId/clientSecret + URLs (loginUrl, credsUrl) returning SAS and Expiration
|
|
64
|
+
"""
|
|
65
|
+
auth = decrypt_auth_dict(self.cfg.get("authentication", {}))
|
|
66
|
+
# Dynamic mode detection
|
|
67
|
+
client_id = auth.get("clientId")
|
|
68
|
+
client_secret = auth.get("clientSecret")
|
|
69
|
+
login_url = auth.get("loginUrl")
|
|
70
|
+
creds_url = auth.get("credsUrl")
|
|
71
|
+
storage_account_name = self.cfg.get("storageAccount")
|
|
72
|
+
if client_id and client_secret and login_url and creds_url and storage_account_name:
|
|
73
|
+
async with aiohttp.ClientSession() as session:
|
|
74
|
+
# Login to get session cookie
|
|
75
|
+
data = {"client_id": client_id, "client_secret": client_secret}
|
|
76
|
+
async with session.post(login_url, data=data, headers={"content-type":"application/x-www-form-urlencoded"}) as r:
|
|
77
|
+
if r.status != 200:
|
|
78
|
+
raise RuntimeError(f"Login failed: HTTP {r.status}")
|
|
79
|
+
# Extract cookie session-id-raven
|
|
80
|
+
cookies = session.cookie_jar.filter_cookies(login_url)
|
|
81
|
+
sid = cookies.get("session-id-raven")
|
|
82
|
+
if not sid:
|
|
83
|
+
if login_url.startswith("http://localhost"):
|
|
84
|
+
cookieHeaders = r.headers.getall("Set-Cookie", [])
|
|
85
|
+
for ch in cookieHeaders:
|
|
86
|
+
m = re.search(r"session-id-raven=([^;]+);", ch)
|
|
87
|
+
if m:
|
|
88
|
+
sid = Morsel[str]()
|
|
89
|
+
sid.set("session-id-raven", m.group(1), m.group(1))
|
|
90
|
+
break
|
|
91
|
+
if not sid:
|
|
92
|
+
raise RuntimeError("Login OK but session-id-raven cookie missing")
|
|
93
|
+
# Request temporary credentials
|
|
94
|
+
params = {"storage-account-name": storage_account_name}
|
|
95
|
+
headers = {"Content-Type":"application/json","Cookie":f"session-id-raven={sid.value}"}
|
|
96
|
+
async with session.get(creds_url, params=params, headers=headers) as r2:
|
|
97
|
+
if r2.status != 200:
|
|
98
|
+
raise RuntimeError(f"Creds fetch failed: HTTP {r2.status}")
|
|
99
|
+
payload = await r2.json()
|
|
100
|
+
data = payload.get("data", {})
|
|
101
|
+
sas_token = data.get("SASToken")
|
|
102
|
+
expiry = data.get("Expiration")
|
|
103
|
+
if not sas_token:
|
|
104
|
+
raise RuntimeError("SASToken missing in credentials response")
|
|
105
|
+
# Parse expiry RFC3339 to epoch
|
|
106
|
+
try:
|
|
107
|
+
from datetime import datetime, timezone
|
|
108
|
+
self._sas_expiry_ts = datetime.strptime(expiry, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc).timestamp() if expiry else time.time() + refresh_token_minutes*60
|
|
109
|
+
except Exception:
|
|
110
|
+
self._sas_expiry_ts = time.time() + 15*60
|
|
111
|
+
self._sas_token = sas_token
|
|
112
|
+
self._blob_auth = {"sasToken": sas_token}
|
|
113
|
+
self.log.debug(f"[{self.cfg['name']}] SAS token refreshed; expires at {expiry}")
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
# Static mode fallback
|
|
117
|
+
sas_token = auth.get("sasToken")
|
|
118
|
+
if sas_token:
|
|
119
|
+
self._sas_token = sas_token
|
|
120
|
+
self._blob_auth = auth
|
|
121
|
+
|
|
122
|
+
def start(self):
|
|
123
|
+
asyncio.run(self._run())
|
|
124
|
+
|
|
125
|
+
async def _run(self):
|
|
126
|
+
account = self.cfg.get("storageAccount")
|
|
127
|
+
queue_name = self.cfg["queue"]
|
|
128
|
+
auth = decrypt_auth_dict(self.cfg.get("authentication", {}))
|
|
129
|
+
await self._ensure_sas_token()
|
|
130
|
+
dynamic = all(auth.get(k) for k in ("clientId","clientSecret","loginUrl","credsUrl"))
|
|
131
|
+
refresh_task = asyncio.create_task(self._refresh_sas_loop()) if dynamic else None
|
|
132
|
+
|
|
133
|
+
async def build_clients():
|
|
134
|
+
# close previous context if exists
|
|
135
|
+
if self.qsc:
|
|
136
|
+
try:
|
|
137
|
+
await self.qsc.close()
|
|
138
|
+
except Exception:
|
|
139
|
+
pass
|
|
140
|
+
if "AccountKey=" in (account or "") or "SharedAccessSignature=" in (account or ""):
|
|
141
|
+
self.qsc = QueueServiceClient.from_connection_string(account)
|
|
142
|
+
else:
|
|
143
|
+
self.qsc = QueueServiceClient(account_url=f"https://{account}.queue.core.windows.net", credential=self._sas_token)
|
|
144
|
+
self._queue_client = self.qsc.get_queue_client(queue_name)
|
|
145
|
+
self._active_generation = self._sas_generation
|
|
146
|
+
self.log.debug(f"[{self.cfg['name']}] Queue client (gen {self._active_generation}) ready.")
|
|
147
|
+
|
|
148
|
+
await build_clients()
|
|
149
|
+
self.log.info(f"[{self.cfg['name']}] Listening Azure queue {queue_name}")
|
|
150
|
+
while not self._stop:
|
|
151
|
+
try:
|
|
152
|
+
# Rebuild if SAS rotated or expiring soon
|
|
153
|
+
if self._active_generation != self._sas_generation or (self._sas_expiry_ts - time()) <= 90:
|
|
154
|
+
self.log.debug(f"[{self.cfg['name']}] Rebuilding clients due to SAS rotation/expiry.")
|
|
155
|
+
await build_clients()
|
|
156
|
+
async with self.qsc: # lightweight context for this cycle
|
|
157
|
+
async for msg in self._queue_client.receive_messages(messages_per_page=10, visibility_timeout=60):
|
|
158
|
+
await self._process(self._queue_client, msg)
|
|
159
|
+
await asyncio.sleep(self.retry_delay)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
self.log.error(f"Azure loop error: {e}")
|
|
162
|
+
if getattr(e, "status_code",0) == 403:
|
|
163
|
+
self.log.debug(f"[{self.cfg['name']}] Rebuilding clients due to 403 error.")
|
|
164
|
+
self._active_generation = -1 # force rebuild
|
|
165
|
+
await asyncio.sleep(15)
|
|
166
|
+
if refresh_task:
|
|
167
|
+
refresh_task.cancel()
|
|
168
|
+
|
|
169
|
+
async def _process(self, qc, msg):
|
|
170
|
+
try:
|
|
171
|
+
decoded = base64.b64decode(msg.content).decode("utf-8")
|
|
172
|
+
payload = json.loads(decoded)
|
|
173
|
+
except Exception:
|
|
174
|
+
await qc.delete_message(msg)
|
|
175
|
+
return
|
|
176
|
+
event_type = payload.get("eventType","")
|
|
177
|
+
subject = payload.get("subject","")
|
|
178
|
+
if event_type != "Microsoft.Storage.BlobCreated" or not is_relevant(subject, self.inc, self.exc):
|
|
179
|
+
self.log.debug(f"[{self.cfg['name']}] Ignoring Azure blob event: {subject} ({event_type})")
|
|
180
|
+
await qc.delete_message(msg)
|
|
181
|
+
return
|
|
182
|
+
blob_url = payload.get("data",{}).get("url","")
|
|
183
|
+
retry = int(payload.get("retry_count",0))
|
|
184
|
+
if retry >= self.max_retries or not blob_url:
|
|
185
|
+
self.log.error(f"[{self.cfg['name']}] Max retries reached or invalid blob URL for {subject}, deleting message.")
|
|
186
|
+
await qc.delete_message(msg)
|
|
187
|
+
return
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
self.log.debug(f"[{self.cfg['name']}] Processing Azure blob: {blob_url}")
|
|
191
|
+
content_bytes = self._download_blob(blob_url)
|
|
192
|
+
text = decode_bytes(content_bytes)
|
|
193
|
+
lines = split_lines(text)
|
|
194
|
+
emit(lines, subject, self.cfg.get("outputs", []))
|
|
195
|
+
await qc.delete_message(msg)
|
|
196
|
+
self.log.debug(f"[{self.cfg['name']}] Processed Azure blob: {blob_url}")
|
|
197
|
+
metrics.inc("azure_messages_processed")
|
|
198
|
+
except Exception as e:
|
|
199
|
+
retry += 1
|
|
200
|
+
self.log.error(f"Azure processing failed (retry {retry}): {e}")
|
|
201
|
+
metrics.inc("azure_retry")
|
|
202
|
+
payload["retry_count"] = retry
|
|
203
|
+
updated = base64.b64encode(json.dumps(payload).encode()).decode()
|
|
204
|
+
try:
|
|
205
|
+
await qc.update_message(msg, content=updated, visibility_timeout=60)
|
|
206
|
+
except Exception:
|
|
207
|
+
await qc.delete_message(msg)
|
|
208
|
+
|
|
209
|
+
def _download_blob(self, blob_url):
|
|
210
|
+
"""
|
|
211
|
+
Download blob content using SAS token or connection string credentials.
|
|
212
|
+
Falls back to anonymous only if no credentials present (will fail for private accounts).
|
|
213
|
+
"""
|
|
214
|
+
auth = self._blob_auth or {}
|
|
215
|
+
sas_token = auth.get("sasToken")
|
|
216
|
+
try:
|
|
217
|
+
if sas_token:
|
|
218
|
+
# blob_url may lack SAS; append if not present
|
|
219
|
+
if "?" not in blob_url:
|
|
220
|
+
blob_client = BlobClient.from_blob_url(blob_url + "?" + sas_token)
|
|
221
|
+
else:
|
|
222
|
+
blob_client = BlobClient.from_blob_url(blob_url, credential=sas_token)
|
|
223
|
+
elif "AccountKey=" in (self.cfg.get("storageAccount") or "") or "SharedAccessSignature=" in (self.cfg.get("storageAccount") or ""):
|
|
224
|
+
# Connection string case; create service client and get blob client
|
|
225
|
+
service = BlobServiceClient.from_connection_string(self.cfg["storageAccount"])
|
|
226
|
+
parsed = urlparse(blob_url)
|
|
227
|
+
# path format /container/blobpath
|
|
228
|
+
parts = parsed.path.lstrip("/").split("/", 1)
|
|
229
|
+
container = parts[0]
|
|
230
|
+
blob_name = parts[1] if len(parts) > 1 else ""
|
|
231
|
+
blob_client = service.get_blob_client(container=container, blob=blob_name)
|
|
232
|
+
else:
|
|
233
|
+
# Anonymous attempt (likely to fail on private account)
|
|
234
|
+
blob_client = BlobClient.from_blob_url(blob_url)
|
|
235
|
+
return blob_client.download_blob().readall()
|
|
236
|
+
except Exception as e:
|
|
237
|
+
raise RuntimeError(f"Blob download failed: {e}")
|
|
238
|
+
|
|
239
|
+
def stop(self):
|
|
240
|
+
self._stop = True
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
import signal
|
|
5
|
+
|
|
6
|
+
class BaseRunner:
|
|
7
|
+
def __init__(self, cfg):
|
|
8
|
+
self.cfg = cfg
|
|
9
|
+
self._stop = threading.Event()
|
|
10
|
+
|
|
11
|
+
def start(self):
|
|
12
|
+
raise NotImplementedError
|
|
13
|
+
|
|
14
|
+
def stop(self):
|
|
15
|
+
self._stop.set()
|
|
16
|
+
|
|
17
|
+
def stopped(self):
|
|
18
|
+
return self._stop.is_set()
|
|
19
|
+
|
|
20
|
+
def install_signal_handler(runners):
|
|
21
|
+
def handler(sig, frame):
|
|
22
|
+
logging.info(f"Shutdown signal ({sig}) received.")
|
|
23
|
+
for r in runners:
|
|
24
|
+
r.stop()
|
|
25
|
+
signal.signal(signal.SIGINT, handler)
|
|
26
|
+
signal.signal(signal.SIGTERM, handler)
|