sap-ecs-log-forwarder 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,303 @@
1
+ import click
2
+
3
+ from sap_ecs_log_forwarder.crypto import encrypt_value, generate_key, get_active_key
4
+ from .config import load_config, save_config
5
+
6
+ @click.group()
7
+ def cli():
8
+ pass
9
+
10
+ @cli.command("config-path")
11
+ def config_path():
12
+ """Show resolved config file path."""
13
+ from sap_ecs_log_forwarder.config import CONFIG_FILE
14
+ click.echo(str(CONFIG_FILE.resolve()))
15
+
16
+ @cli.command("set-log-file")
17
+ @click.option("--path", prompt=True, help="Path to write JSON logs (e.g., /var/log/sap-log-forwarder/app.log)")
18
+ def set_log_file(path):
19
+ """Configure a file path to write logs. Use an empty path to disable file logging."""
20
+ cfg = load_config()
21
+ p = path.strip()
22
+ if p:
23
+ cfg["logFile"] = p
24
+ click.echo(f"Log file set to: {p}")
25
+ else:
26
+ cfg.pop("logFile", None)
27
+ click.echo("File logging disabled.")
28
+ save_config(cfg)
29
+
30
+ @cli.group()
31
+ def input():
32
+ pass
33
+
34
+ @input.command("add")
35
+ @click.option("--provider", type=click.Choice(["gcp","aws","azure"]), prompt=True)
36
+ @click.option("--name", prompt=True)
37
+ @click.option("--subscription", help="GCP subscription path ( Full Path - projects/{project_id}/subscriptions/{sub_name} )")
38
+ @click.option("--queue", help="AWS/Azure queue URL or name")
39
+ @click.option("--region", help="AWS region")
40
+ @click.option("--bucket", help="Bucket name (AWS/GCP)")
41
+ @click.option("--storage-account", help="Azure storage account or conn string")
42
+ @click.option("--max-retries", type=int, default=5, show_default=True)
43
+ @click.option("--retry-delay", type=int, default=10, show_default=True)
44
+ @click.option("--log-level", type=click.Choice(["DEBUG","INFO","WARNING","ERROR","CRITICAL"]), default="INFO", show_default=True, help="Log level for this input")
45
+ def add_input(provider, name, subscription, queue, region, bucket, storage_account, max_retries, retry_delay, log_level):
46
+ cfg = _load_mutable()
47
+ if any(i.get("name")==name for i in cfg["inputs"]):
48
+ click.echo(f"Input '{name}' exists.")
49
+ return
50
+ base = {
51
+ "provider": provider,
52
+ "name": name,
53
+ "maxRetries": max_retries,
54
+ "retryDelay": retry_delay,
55
+ "includeFilter": [],
56
+ "excludeFilter": [],
57
+ "outputs": [],
58
+ "logLevel": log_level.upper(),
59
+ }
60
+ if provider == "gcp":
61
+ base["subscription"] = subscription or click.prompt("GCP subscription")
62
+ base["bucket"] = bucket or click.prompt("GCP bucket (optional)", default="")
63
+ elif provider == "aws":
64
+ base["queue"] = queue or click.prompt("SQS queue URL")
65
+ base["region"] = region or click.prompt("AWS region")
66
+ base["bucket"] = bucket or click.prompt("S3 bucket")
67
+ elif provider == "azure":
68
+ base["queue"] = queue or click.prompt("Azure queue name")
69
+ base["storageAccount"] = storage_account or click.prompt("Azure storage account / conn string")
70
+ cfg["inputs"].append(base)
71
+ save_config(cfg)
72
+ click.echo(f"Added input '{name}' ({provider}).")
73
+
74
+ @input.command("list")
75
+ def list_inputs():
76
+ cfg = _load_mutable()
77
+ if not cfg["inputs"]:
78
+ click.echo("No inputs.")
79
+ return
80
+ for i in cfg["inputs"]:
81
+ click.echo(f"- {i['name']} [{i['provider']}]")
82
+
83
+ @input.command("remove")
84
+ @click.argument("name")
85
+ def remove_input(name):
86
+ cfg = _load_mutable()
87
+ before = len(cfg["inputs"])
88
+ cfg["inputs"] = [i for i in cfg["inputs"] if i.get("name") != name]
89
+ save_config(cfg)
90
+ if len(cfg["inputs"]) == before:
91
+ click.echo("Not found.")
92
+ else:
93
+ click.echo("Removed.")
94
+
95
+ @cli.group()
96
+ def output():
97
+ pass
98
+
99
+ @output.command("add")
100
+ @click.option("--input-name", prompt=True)
101
+ @click.option("--type", "otype", type=click.Choice(["files","http","console"]), prompt=True)
102
+ @click.option("--destination", help="For files/http")
103
+ @click.option("--compress", is_flag=True, default=False)
104
+ @click.option("--include", "include_filters", multiple=True, help="Regex include filter(s) for output (can be repeated)")
105
+ @click.option("--exclude", "exclude_filters", multiple=True, help="Regex exclude filter(s) for output (can be repeated)")
106
+ def add_output(input_name, otype, destination, compress, include_filters, exclude_filters):
107
+ cfg = _load_mutable()
108
+ inp = _find(cfg, input_name)
109
+ if not inp:
110
+ click.echo("Input not found.")
111
+ return
112
+ out = {"type": otype}
113
+ if otype in ("files","http"):
114
+ out["destination"] = destination or click.prompt("Destination")
115
+ if otype == "files":
116
+ out["compress"] = compress
117
+
118
+ # Attach output-level filters if provided
119
+ if include_filters:
120
+ out["includeFilter"] = list(include_filters)
121
+ if exclude_filters:
122
+ out["excludeFilter"] = list(exclude_filters)
123
+
124
+ if otype in ("files","http") and not out.get("destination"):
125
+ click.echo("Destination required.")
126
+ return
127
+ inp.setdefault("outputs", []).append(out)
128
+ save_config(cfg)
129
+ click.echo("Output added.")
130
+
131
+ @output.command("list")
132
+ @click.argument("input_name")
133
+ def list_outputs(input_name):
134
+ cfg = _load_mutable()
135
+ inp = _find(cfg, input_name)
136
+ if not inp:
137
+ click.echo("Input not found.")
138
+ return
139
+ outs = inp.get("outputs", [])
140
+ if not outs:
141
+ click.echo("No outputs.")
142
+ return
143
+ for idx, o in enumerate(outs):
144
+ inc = ", ".join(o.get("includeFilter", [])) or "-"
145
+ exc = ", ".join(o.get("excludeFilter", [])) or "-"
146
+ click.echo(f"[{idx}] {o['type']} -> {o.get('destination','')} (include: {inc}; exclude: {exc})")
147
+
148
+ @output.command("remove")
149
+ @click.option("--input-name", prompt=True)
150
+ @click.option("--index", type=int, prompt=True)
151
+ def remove_output(input_name, index):
152
+ cfg = _load_mutable()
153
+ inp = _find(cfg, input_name)
154
+ if not inp:
155
+ click.echo("Input not found.")
156
+ return
157
+ outs = inp.get("outputs", [])
158
+ if not (0 <= index < len(outs)):
159
+ click.echo("Invalid index.")
160
+ return
161
+ outs.pop(index)
162
+ save_config(cfg)
163
+ click.echo("Removed.")
164
+
165
+ @cli.command("gen-key")
166
+ def gen_key():
167
+ key = generate_key()
168
+ click.echo(f"Generated key: {key}")
169
+ click.echo("Export it: export FORWARDER_ENCRYPTION_KEY='{}'".format(key))
170
+
171
+ @cli.group()
172
+ def creds():
173
+ """Manage encrypted credentials."""
174
+ pass
175
+
176
+ @creds.command("set-provider-auth")
177
+ @click.option("--input-name", prompt=True)
178
+ def set_provider_auth(input_name):
179
+ cfg = _load_mutable()
180
+ inp = _find(cfg, input_name)
181
+ if not inp:
182
+ click.echo("Input not found.")
183
+ return
184
+ provider = inp.get("provider")
185
+ key = get_active_key()
186
+ if not key:
187
+ click.echo("Encryption key not set (env FORWARDER_ENCRYPTION_KEY).")
188
+ return
189
+ auth = {}
190
+ if provider == "aws":
191
+ mode = click.prompt("AWS auth mode (static/dynamic)", default="static")
192
+ if mode == "static":
193
+ access_key = click.prompt("AWS Access Key ID", hide_input=True)
194
+ secret_key = click.prompt("AWS Secret Access Key", hide_input=True)
195
+ auth["accessKeyId"] = "enc:" + encrypt_value(access_key, key)
196
+ auth["secretAccessKey"] = "enc:" + encrypt_value(secret_key, key)
197
+ else:
198
+ client_id = click.prompt("Backend client_id", hide_input=True)
199
+ client_secret = click.prompt("Backend client_secret", hide_input=True)
200
+ login_url = click.prompt("Login URL", default="http://localhost:8000/api/v1/app/login")
201
+ aws_creds_url = click.prompt("AWS Credentials URL", default="http://localhost:8000/api/v1/aws/credentials")
202
+ auth["clientId"] = "enc:" + encrypt_value(client_id, key)
203
+ auth["clientSecret"] = "enc:" + encrypt_value(client_secret, key)
204
+ auth["loginUrl"] = login_url
205
+ auth["awsCredsUrl"] = aws_creds_url
206
+ click.echo("Dynamic AWS auth will request temporary credentials every ~15 minutes.")
207
+ elif provider == "azure":
208
+ mode = click.prompt("Azure auth mode (sas/dynamic)", default="sas")
209
+ if mode == "sas":
210
+ sas = click.prompt("Azure SAS Token", hide_input=True)
211
+ auth["sasToken"] = "enc:" + encrypt_value(sas, key)
212
+ else:
213
+ client_id = click.prompt("Backend client_id", hide_input=True)
214
+ client_secret = click.prompt("Backend client_secret", hide_input=True)
215
+ login_url = click.prompt("Login URL", default="http://localhost:8000/api/v1/app/login")
216
+ creds_url = click.prompt("Credentials URL", default="http://localhost:8000/api/v1/azure/credentials")
217
+ # Store dynamic auth params
218
+ auth["clientId"] = "enc:" + encrypt_value(client_id, key)
219
+ auth["clientSecret"] = "enc:" + encrypt_value(client_secret, key)
220
+ auth["loginUrl"] = login_url
221
+ auth["credsUrl"] = creds_url
222
+ click.echo("Dynamic Azure auth will request temporary SAS tokens every ~15 minutes.")
223
+ elif provider == "gcp":
224
+ mode = click.prompt("GCP service account JSON input mode (file/paste)", default="file")
225
+ if mode == "file":
226
+ path = click.prompt("Path to service account JSON file")
227
+ try:
228
+ with open(path, "r") as f:
229
+ content = f.read()
230
+ except Exception as e:
231
+ click.echo(f"Failed to read file: {e}")
232
+ return
233
+ else:
234
+ click.echo("Paste JSON, finish with EOF (Ctrl-D):")
235
+ try:
236
+ import sys
237
+ content = sys.stdin.read()
238
+ except Exception as e:
239
+ click.echo(f"Failed to read input: {e}")
240
+ return
241
+ auth["serviceAccountJson"] = "enc:" + encrypt_value(content.strip(), key)
242
+ else:
243
+ click.echo("Unsupported provider for auth.")
244
+ return
245
+ auth["encrypted"] = True
246
+ inp["authentication"] = auth
247
+ save_config(cfg)
248
+ click.echo("Provider credentials stored (encrypted).")
249
+
250
+ @creds.command("set-http-auth")
251
+ @click.option("--input-name", prompt=True)
252
+ @click.option("--output-index", type=int, prompt=True)
253
+ @click.option("--auth-type", type=click.Choice(["bearer","api-key","basic"]), prompt=True)
254
+ def set_http_auth(input_name, output_index, auth_type):
255
+ cfg = _load_mutable()
256
+ inp = _find(cfg, input_name)
257
+ if not inp:
258
+ click.echo("Input not found.")
259
+ return
260
+ outs = inp.get("outputs", [])
261
+ if not (0 <= output_index < len(outs)):
262
+ click.echo("Invalid index.")
263
+ return
264
+ out = outs[output_index]
265
+ if out.get("type") != "http":
266
+ click.echo("Selected output not HTTP.")
267
+ return
268
+ key = get_active_key()
269
+ if not key:
270
+ click.echo("Encryption key not set (env FORWARDER_ENCRYPTION_KEY).")
271
+ return
272
+ if auth_type == "bearer":
273
+ token = click.prompt("Bearer token", hide_input=True)
274
+ out["authorization"] = {"type":"bearer","token":"enc:"+encrypt_value(token, key),"encrypted":True}
275
+ elif auth_type == "api-key":
276
+ api_key = click.prompt("API key", hide_input=True)
277
+ out["authorization"] = {"type":"api-key","apiKey":"enc:"+encrypt_value(api_key, key),"encrypted":True}
278
+ elif auth_type == "basic":
279
+ user = click.prompt("Username")
280
+ pwd = click.prompt("Password", hide_input=True)
281
+ out["authorization"] = {
282
+ "type":"basic",
283
+ "user":user,
284
+ "password":"enc:"+encrypt_value(pwd, key),
285
+ "encrypted":True
286
+ }
287
+ save_config(cfg)
288
+ click.echo("Credentials stored (encrypted).")
289
+
290
+ def _load_mutable():
291
+ try:
292
+ return load_config()
293
+ except FileNotFoundError:
294
+ return {"logLevel":"info","inputs":[]}
295
+
296
+ def _find(cfg, name):
297
+ for i in cfg.get("inputs", []):
298
+ if i.get("name")==name:
299
+ return i
300
+ return None
301
+
302
+ if __name__ == "__main__":
303
+ cli()
@@ -0,0 +1,99 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ from pathlib import Path
5
+
6
+ def _resolve_config_path():
7
+ # 1. Environment variable override
8
+ env_path = os.getenv("SAP_LOG_FORWARDER_CONFIG")
9
+ if env_path:
10
+ return Path(env_path).expanduser()
11
+
12
+ # 2. Existing local config.json (backward compatibility)
13
+ local = Path("config.json")
14
+ if local.exists():
15
+ return local
16
+
17
+ # 3. Default in user home
18
+ home_path = Path.home() / ".sapecslogforwarder"
19
+ try:
20
+ home_path.mkdir(parents=True, exist_ok=True)
21
+ except Exception:
22
+ pass
23
+ return home_path / "config.json"
24
+
25
+
26
+ CONFIG_FILE = _resolve_config_path()
27
+
28
+ def validate_input(inp):
29
+ provider = inp.get("provider")
30
+ required = {
31
+ "gcp": ["subscription"],
32
+ "aws": ["queue","region","bucket"],
33
+ "azure": ["queue","storageAccount"]
34
+ }.get(provider, [])
35
+ missing = [r for r in required if not inp.get(r)]
36
+ if missing:
37
+ raise ValueError(f"Input '{inp.get('name')}' missing required fields: {missing}")
38
+ if not isinstance(inp.get("outputs", []), list):
39
+ raise ValueError(f"Input '{inp.get('name')}' outputs must be a list.")
40
+ auth = inp.get("authentication")
41
+ if not auth:
42
+ return # auth optional
43
+
44
+ if not isinstance(auth, dict):
45
+ raise ValueError(f"Input '{inp.get('name')}' authentication must be an object.")
46
+
47
+ if provider == "aws":
48
+ static_ok = ("accessKeyId" in auth and "secretAccessKey" in auth)
49
+ dynamic_ok = (all(k in auth for k in ("clientId","clientSecret","loginUrl")) and ("awsCredsUrl" in auth or "credsUrl" in auth))
50
+ if not (static_ok or dynamic_ok):
51
+ raise ValueError(
52
+ f"Input '{inp.get('name')}' AWS auth must include either static keys "
53
+ "(accessKeyId, secretAccessKey) or dynamic fields "
54
+ "(clientId, clientSecret, loginUrl, awsCredsUrl)."
55
+ )
56
+ elif provider == "azure":
57
+ static_ok = ("sasToken" in auth)
58
+ dynamic_ok = (all(k in auth for k in ("clientId","clientSecret","loginUrl","credsUrl")))
59
+ if not (static_ok or dynamic_ok):
60
+ raise ValueError(
61
+ f"Input '{inp.get('name')}' Azure auth must include either 'sasToken' "
62
+ "or dynamic fields (clientId, clientSecret, loginUrl, credsUrl)."
63
+ )
64
+ elif provider == "gcp":
65
+ if "serviceAccountJson" not in auth:
66
+ raise ValueError(f"Input '{inp.get('name')}' GCP auth missing 'serviceAccountJson'.")
67
+
68
+
69
+ def load_config():
70
+ if not CONFIG_FILE.exists():
71
+ raise FileNotFoundError(f"Config file missing: {CONFIG_FILE.resolve()}")
72
+ with CONFIG_FILE.open() as f:
73
+ data = json.load(f)
74
+ if "inputs" not in data or not isinstance(data["inputs"], list):
75
+ raise ValueError("Config must contain 'inputs' list.")
76
+ for inp in data["inputs"]:
77
+ validate_input(inp)
78
+ return data
79
+
80
+ def save_config(cfg):
81
+ # Ensure parent exists
82
+ try:
83
+ CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
84
+ except Exception:
85
+ pass
86
+ with CONFIG_FILE.open("w") as f:
87
+ json.dump(cfg, f, indent=2)
88
+
89
+
90
+ def get_log_level(cfg):
91
+ level = cfg.get("logLevel", "INFO").upper()
92
+ return getattr(logging, level, logging.INFO)
93
+
94
+ def get_log_file(cfg):
95
+ """Return log file path if configured, else None."""
96
+ path = cfg.get("logFile")
97
+ if not path:
98
+ return None
99
+ return str(Path(path).expanduser())
@@ -0,0 +1,56 @@
1
+ import logging
2
+ import threading
3
+ import time
4
+
5
+ from sap_ecs_log_forwarder.json_logging import setup_structured_logging
6
+ from sap_ecs_log_forwarder.metrics import format_metrics, reset_metrics
7
+ from .config import CONFIG_FILE, get_log_file, load_config, get_log_level
8
+ from .aws import AWSRunner
9
+ from .gcp import GCPRunner
10
+ from .azure import AzureRunner
11
+
12
+ PROVIDERS = {
13
+ "gcp": GCPRunner,
14
+ "aws": AWSRunner,
15
+ "azure": AzureRunner,
16
+ }
17
+
18
+ def _metrics_logger():
19
+ while True:
20
+ time.sleep(30)
21
+ logging.info("metrics snapshot\n" + format_metrics())
22
+ ## Verify if is the beginning of the day (00:00 UTC)
23
+ now = time.gmtime()
24
+ if now.tm_hour == 0 and (now.tm_min == 0 or now.tm_min == 1):
25
+ logging.info("Daily metrics snapshot\n" + format_metrics())
26
+ reset_metrics()
27
+
28
+ def run_all():
29
+ cfg = load_config()
30
+ logging.info(f"Using config file: {CONFIG_FILE.resolve()}")
31
+ setup_structured_logging(get_log_level(cfg), get_log_file(cfg))
32
+ inputs = cfg.get("inputs", [])
33
+ if not inputs:
34
+ logging.warning("No inputs configured.")
35
+ return
36
+ m_thread = threading.Thread(target=_metrics_logger, daemon=True)
37
+ m_thread.start()
38
+ threads = []
39
+ for inp in inputs:
40
+ p = inp.get("provider")
41
+ cls = PROVIDERS.get(p)
42
+ if not cls:
43
+ logging.error(f"Unknown provider: {p}")
44
+ continue
45
+ runner = cls(inp)
46
+ t = threading.Thread(target=runner.start, name=f"{p}-{inp.get('name','input')}", daemon=True)
47
+ t.start()
48
+ threads.append(t)
49
+ try:
50
+ for t in threads:
51
+ t.join()
52
+ except KeyboardInterrupt:
53
+ logging.info("Shutdown requested.")
54
+
55
+ if __name__ == "__main__":
56
+ run_all()
@@ -0,0 +1,41 @@
1
+ import os
2
+ from cryptography.fernet import Fernet
3
+
4
+ _ENV_KEY = "FORWARDER_ENCRYPTION_KEY"
5
+
6
+ def decrypt_auth_dict(auth):
7
+ if not isinstance(auth, dict):
8
+ return {}
9
+ dec = {}
10
+ for k, v in auth.items():
11
+ if k == "encrypted":
12
+ continue
13
+ # URLs are not encrypted; keep as-is
14
+ if k in ("loginUrl","credsUrl","awsCredsUrl"):
15
+ dec[k] = v
16
+ continue
17
+ dec[k] = decrypt_value_if_needed(v)
18
+ return dec
19
+
20
+ def get_active_key():
21
+ key = os.getenv(_ENV_KEY)
22
+ return key
23
+
24
+ def generate_key():
25
+ return Fernet.generate_key().decode()
26
+
27
+ def encrypt_value(value, key):
28
+ if not value:
29
+ return value
30
+ f = Fernet(key.encode() if isinstance(key, str) else key)
31
+ return f.encrypt(value.encode()).decode()
32
+
33
+ def decrypt_value_if_needed(value):
34
+ if not value or not value.startswith("enc:"):
35
+ return value
36
+ key = get_active_key()
37
+ if not key:
38
+ return value # Cannot decrypt without key
39
+ raw = value[4:]
40
+ f = Fernet(key.encode() if isinstance(key, str) else key)
41
+ return f.decrypt(raw.encode()).decode()
@@ -0,0 +1,105 @@
1
+ import json
2
+ import time
3
+ import logging
4
+ from functools import partial
5
+ from google.cloud import pubsub_v1
6
+ from google.api_core.exceptions import PermissionDenied, NotFound
7
+ from google.oauth2 import service_account
8
+
9
+ from sap_ecs_log_forwarder import metrics
10
+ from sap_ecs_log_forwarder.crypto import decrypt_auth_dict
11
+ from sap_ecs_log_forwarder.processor import emit
12
+ from sap_ecs_log_forwarder.utils import compile_filters, decode_bytes, is_relevant, split_lines
13
+
14
+ class GCPRunner:
15
+ def __init__(self, cfg):
16
+ self.cfg = cfg
17
+ self._stop = False
18
+ inc, exc = compile_filters(cfg.get("includeFilter", []), cfg.get("excludeFilter", []))
19
+ self.include = inc
20
+ self.exclude = exc
21
+ self.max_retries = cfg.get("maxRetries", 5)
22
+ self.retry_delay = cfg.get("retryDelay", 10)
23
+ lvl = getattr(logging, str(self.cfg.get("logLevel","INFO")).upper(), logging.INFO)
24
+ self.log = logging.getLogger(f"input.{self.cfg.get('name','aws')}")
25
+ self.log.setLevel(lvl)
26
+
27
+ def _valid_sub(self, path):
28
+ try:
29
+ c = pubsub_v1.SubscriberClient(credentials=self._storage_credentials) if self._storage_credentials else pubsub_v1.SubscriberClient()
30
+ c.get_subscription(request={"subscription": path})
31
+ return True
32
+ except PermissionDenied:
33
+ return True
34
+ except NotFound:
35
+ self.log.error(f"Subscription not found: {path}")
36
+ return False
37
+ except Exception as e:
38
+ self.log.error(f"Subscription check failed: {e}")
39
+ return False
40
+
41
+ def start(self):
42
+ credentials = None
43
+ auth = decrypt_auth_dict(self.cfg.get("authentication", {}))
44
+ sa_json = auth.get("serviceAccountJson")
45
+ if sa_json:
46
+ try:
47
+ info = json.loads(sa_json)
48
+ credentials = service_account.Credentials.from_service_account_info(info)
49
+ except Exception as e:
50
+ self.log.error(f"Failed to parse GCP service account JSON: {e}")
51
+ client = pubsub_v1.SubscriberClient(credentials=credentials) if credentials else pubsub_v1.SubscriberClient()
52
+ self._storage_credentials = credentials
53
+ path = self.cfg.get("subscription")
54
+ if not path or not self._valid_sub(path):
55
+ return
56
+ cb = partial(self._callback, credentials=credentials)
57
+ future = client.subscribe(path, callback=cb)
58
+ self.log.info(f"[{self.cfg['name']}] Listening on {path}")
59
+ try:
60
+ future.result()
61
+ except Exception as e:
62
+ self.log.error(f"GCP runner stopped: {e}")
63
+ future.cancel()
64
+
65
+ def _callback(self, message, credentials=None):
66
+ if self._stop:
67
+ message.nack()
68
+ return
69
+ try:
70
+ payload = json.loads(message.data.decode("utf-8"))
71
+ except Exception as e:
72
+ self.log.error(f"JSON decode failure: {e}")
73
+ message.ack()
74
+ return
75
+ bucket = payload.get("bucket")
76
+ name = payload.get("name","")
77
+ event_type = message.attributes.get("eventType")
78
+ if event_type != "OBJECT_FINALIZE" or not is_relevant(name, self.include, self.exclude):
79
+ self.log.debug(f"[{self.cfg['name']}] Ignoring GCP object event: {name} ({event_type})")
80
+ message.ack()
81
+ return
82
+ retries = 0
83
+ while retries < self.max_retries:
84
+ try:
85
+ self.log.debug(f"[{self.cfg['name']}] Processing GCP object gs://{bucket}/{name}")
86
+ from google.cloud import storage
87
+ sc = storage.Client(credentials=credentials) if credentials else storage.Client()
88
+ blob = sc.bucket(bucket).blob(name)
89
+ raw = blob.download_as_bytes()
90
+ text = decode_bytes(raw)
91
+ lines = split_lines(text)
92
+ emit(lines, name, self.cfg.get("outputs", []))
93
+ self.log.debug(f"[{self.cfg['name']}] Processed GCP object gs://{bucket}/{name}")
94
+ message.ack()
95
+ metrics.inc("gcp_messages_processed")
96
+ return
97
+ except Exception as e:
98
+ retries += 1
99
+ metrics.inc("gcp_retry")
100
+ self.log.error(f"GCP process error attempt {retries}: {e}")
101
+ time.sleep(min(self.retry_delay * (2 ** (retries-1)), 120))
102
+ message.ack()
103
+
104
+ def stop(self):
105
+ self._stop = True
@@ -0,0 +1,42 @@
1
+ import json
2
+ import logging
3
+ import time
4
+ from logging import StreamHandler, FileHandler
5
+
6
+ class JsonFormatter(logging.Formatter):
7
+ def format(self, record):
8
+ data = {
9
+ "ts": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(record.created)),
10
+ "level": record.levelname,
11
+ "message": record.getMessage(),
12
+ "logger": record.name,
13
+ "thread": record.threadName,
14
+ "pid": record.process,
15
+ "module": record.module,
16
+ "func": record.funcName,
17
+ "line": record.lineno,
18
+ "stack": self.formatException(record.exc_info) if record.exc_info else None,
19
+ }
20
+ if hasattr(record, "source"):
21
+ data["source"] = record.source
22
+ if record.__dict__.get("destination"):
23
+ data["destination"] = record.__dict__["destination"]
24
+ return json.dumps(data)
25
+
26
+ def setup_structured_logging(level, log_file: str):
27
+ formatter = JsonFormatter()
28
+ root = logging.getLogger()
29
+ root.handlers.clear()
30
+
31
+ # Always keep console output
32
+ sh = StreamHandler()
33
+ sh.setFormatter(formatter)
34
+ root.addHandler(sh)
35
+
36
+ # Optional file output
37
+ if log_file:
38
+ fh = FileHandler(log_file)
39
+ fh.setFormatter(formatter)
40
+ root.addHandler(fh)
41
+
42
+ root.setLevel(level)
@@ -0,0 +1,37 @@
1
+ import threading
2
+ from collections import Counter
3
+ import time
4
+
5
+ class _Metrics:
6
+ def __init__(self):
7
+ self._lock = threading.Lock()
8
+ self._counters = Counter()
9
+ self._start = time.time()
10
+
11
+ def inc(self, name, value=1):
12
+ with self._lock:
13
+ self._counters[name] += value
14
+
15
+ def snapshot(self):
16
+ with self._lock:
17
+ return dict(self._counters), self._start
18
+
19
+ def reset(self):
20
+ with self._lock:
21
+ self._counters = Counter()
22
+ self._start = time.time()
23
+
24
+ metrics = _Metrics()
25
+
26
+ def inc(name, value=1):
27
+ metrics.inc(name, value)
28
+
29
+ def format_metrics():
30
+ counters, start = metrics.snapshot()
31
+ lines = [f"# start_time_seconds {start:.0f}"]
32
+ for k,v in counters.items():
33
+ lines.append(f"{k} {v}")
34
+ return "\n".join(lines)
35
+
36
+ def reset_metrics():
37
+ metrics.reset()