beamflow-lib 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. beamflow_lib-0.3.0/PKG-INFO +29 -0
  2. beamflow_lib-0.3.0/beamflow_lib/__init__.py +21 -0
  3. beamflow_lib-0.3.0/beamflow_lib/config/__init__.py +0 -0
  4. beamflow_lib-0.3.0/beamflow_lib/config/env_loader.py +127 -0
  5. beamflow_lib-0.3.0/beamflow_lib/config/observability_config.py +37 -0
  6. beamflow_lib-0.3.0/beamflow_lib/config/runtime_config.py +78 -0
  7. beamflow_lib-0.3.0/beamflow_lib/context.py +141 -0
  8. beamflow_lib-0.3.0/beamflow_lib/decorators.py +350 -0
  9. beamflow_lib-0.3.0/beamflow_lib/observability/__init__.py +0 -0
  10. beamflow_lib-0.3.0/beamflow_lib/observability/ingestion.py +529 -0
  11. beamflow_lib-0.3.0/beamflow_lib/observability/logging.py +84 -0
  12. beamflow_lib-0.3.0/beamflow_lib/observability/model.py +161 -0
  13. beamflow_lib-0.3.0/beamflow_lib/observability/record_links.py +6 -0
  14. beamflow_lib-0.3.0/beamflow_lib/observability/registry.py +100 -0
  15. beamflow_lib-0.3.0/beamflow_lib/observability/schema.sql +122 -0
  16. beamflow_lib-0.3.0/beamflow_lib/observability/stores/__init__.py +0 -0
  17. beamflow_lib-0.3.0/beamflow_lib/observability/stores/bigquery_stores.py +163 -0
  18. beamflow_lib-0.3.0/beamflow_lib/observability/stores/file_stores.py +92 -0
  19. beamflow_lib-0.3.0/beamflow_lib/observability/stores/gcs_blob_store.py +57 -0
  20. beamflow_lib-0.3.0/beamflow_lib/observability/stores/protocols.py +53 -0
  21. beamflow_lib-0.3.0/beamflow_lib/observability/stores/pubsub_stores.py +73 -0
  22. beamflow_lib-0.3.0/beamflow_lib/pipelines/__init__.py +15 -0
  23. beamflow_lib-0.3.0/beamflow_lib/pipelines/backends/__init__.py +0 -0
  24. beamflow_lib-0.3.0/beamflow_lib/pipelines/backends/managed_feed.py +95 -0
  25. beamflow_lib-0.3.0/beamflow_lib/pipelines/consumer.py +322 -0
  26. beamflow_lib-0.3.0/beamflow_lib/pipelines/ingress.py +103 -0
  27. beamflow_lib-0.3.0/beamflow_lib/pipelines/records_feed.py +122 -0
  28. beamflow_lib-0.3.0/beamflow_lib/pipelines/records_model.py +17 -0
  29. beamflow_lib-0.3.0/beamflow_lib/pipelines/redis_client.py +28 -0
  30. beamflow_lib-0.3.0/beamflow_lib/queue/__init__.py +0 -0
  31. beamflow_lib-0.3.0/beamflow_lib/queue/asyncio_backend.py +121 -0
  32. beamflow_lib-0.3.0/beamflow_lib/queue/backend.py +98 -0
  33. beamflow_lib-0.3.0/beamflow_lib/queue/backends/__init__.py +0 -0
  34. beamflow_lib-0.3.0/beamflow_lib/queue/backends/managed_tasks.py +216 -0
  35. beamflow_lib-0.3.0/beamflow_lib/queue/consumer.py +17 -0
  36. beamflow_lib-0.3.0/pyproject.toml +38 -0
@@ -0,0 +1,29 @@
1
+ Metadata-Version: 2.3
2
+ Name: beamflow-lib
3
+ Version: 0.3.0
4
+ Summary: Shared domain logic and utilities for the Beamflow managed platform.
5
+ Author: juraj.bezdek@gmail.com
6
+ Author-email: juraj.bezdek@gmail.com
7
+ Requires-Python: >=3.11
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Requires-Dist: PyJWT (>=2.8.0)
13
+ Requires-Dist: beamflow-clients (>=0.3.0,<0.4.0)
14
+ Requires-Dist: cryptography (>=42.0.0)
15
+ Requires-Dist: email-validator (>=2.0.0)
16
+ Requires-Dist: google-cloud-build (>=3.23.0)
17
+ Requires-Dist: google-cloud-firestore (>=2.16.0)
18
+ Requires-Dist: google-cloud-scheduler (>=2.13.0)
19
+ Requires-Dist: google-cloud-storage (>=2.14.0)
20
+ Requires-Dist: google-cloud-tasks (>=2.16.0)
21
+ Requires-Dist: httpx (>=0.27.0)
22
+ Requires-Dist: opentelemetry-api (>=1.24.0)
23
+ Requires-Dist: opentelemetry-exporter-otlp (>=1.24.0)
24
+ Requires-Dist: opentelemetry-sdk (>=1.24.0)
25
+ Requires-Dist: pydantic (>=2.10.0)
26
+ Requires-Dist: pydantic-settings (>=2.2.1)
27
+ Requires-Dist: python-dotenv (>=1.0.1)
28
+ Requires-Dist: pyyaml (>=6.0.1)
29
+ Requires-Dist: redis (>=5.0.3)
@@ -0,0 +1,21 @@
1
+ from .observability.logging import logger
2
+ from .context import integration_context, get_context
3
+ from .decorators import integration_task, integration_step
4
+ from .pipelines.ingress import ingress
5
+ from .pipelines.records_feed import RecordsFeed
6
+ from .pipelines.records_model import RecordData
7
+ from .pipelines.consumer import feed_consumer, ConsumerRunner
8
+
9
+ __all__ = [
10
+ "logger",
11
+ "integration_context",
12
+ "get_context",
13
+ "integration_task",
14
+ "integration_step",
15
+ "ingress",
16
+ "RecordsFeed",
17
+ "RecordData",
18
+ "feed_consumer",
19
+ "ConsumerRunner"
20
+ ]
21
+
File without changes
@@ -0,0 +1,127 @@
1
+ import yaml
2
+ from typing import Any, Dict, Optional
3
+ from pathlib import Path
4
+ from .runtime_config import RuntimeConfig, ClientsConfigPointer, BackendConfig, WebhooksConfig
5
+ from .observability_config import ObservabilityConfig
6
+ from beamflow_clients.config import ClientSettings
7
+
8
+ def load_env_yaml(path: str | Path) -> Dict[str, Any]:
9
+ """Load an environment YAML file."""
10
+ with open(path, "r") as f:
11
+ data = yaml.safe_load(f)
12
+ return data if isinstance(data, dict) else {}
13
+
14
+ def _load_data_from_dir(path: Path) -> tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]], Dict[str, ClientSettings]]:
15
+ """Helper to load config components from a single directory."""
16
+ # 1. Observability
17
+ obs_file = path / "observability.yaml"
18
+ observability = None
19
+ if obs_file.exists():
20
+ observability = load_env_yaml(obs_file)
21
+
22
+ # 2. Backend
23
+ backend_file = path / "backend.yaml"
24
+ backend = None
25
+ if backend_file.exists():
26
+ backend = load_env_yaml(backend_file)
27
+
28
+ # 3. Clients
29
+ clients_file = path / "clients.yaml"
30
+ clients: Dict[str, ClientSettings] = {}
31
+
32
+ if clients_file.exists():
33
+ clients_pointer_data = load_env_yaml(clients_file)
34
+ pointer = ClientsConfigPointer.model_validate(clients_pointer_data)
35
+
36
+ clients_dir = Path(pointer.path)
37
+ if not clients_dir.is_absolute():
38
+ clients_dir = (path / clients_dir).resolve()
39
+
40
+ if not clients_dir.is_dir():
41
+ raise ValueError(f"Clients directory {clients_dir} (from clients.yaml) does not exist")
42
+
43
+ pattern = pointer.pattern
44
+ recursive = pointer.recursive
45
+
46
+ search_pattern = f"**/{pattern}" if recursive else pattern
47
+ client_files = list(clients_dir.glob(search_pattern))
48
+
49
+ for cf in client_files:
50
+ if not cf.is_file():
51
+ continue
52
+
53
+ client_data = load_env_yaml(cf)
54
+ client_id = client_data.get("client_id") or client_data.get("clientId") or cf.stem
55
+
56
+ if client_id in clients:
57
+ raise ValueError(f"Duplicate client_id '{client_id}' found in {cf}")
58
+
59
+ if "client_id" not in client_data:
60
+ client_data["client_id"] = client_id
61
+
62
+ settings = ClientSettings.model_validate(client_data)
63
+
64
+ # Known keys to exclude from extra
65
+ settings_fields = set(ClientSettings.model_fields.keys())
66
+ for field in ClientSettings.model_fields.values():
67
+ if field.alias:
68
+ settings_fields.add(field.alias)
69
+
70
+ known_keys = settings_fields | {"client_id", "clientId"}
71
+ extra = {k: v for k, v in client_data.items() if k not in known_keys}
72
+ settings.extra.update(extra)
73
+
74
+ clients[client_id] = settings
75
+ return observability, backend, clients
76
+
77
+ def load_config_dir(config_dir: str | Path, environment: str) -> RuntimeConfig:
78
+ """
79
+ Loads unified configuration from a directory.
80
+ - {config_dir}/shared/ - shared base config
81
+ - {config_dir}/{environment}/ - environment-specific overrides
82
+ """
83
+
84
+ print("Loading config for environment: ", environment)
85
+ config_path = Path(config_dir)
86
+ if not config_path.is_dir():
87
+ raise ValueError(f"Config directory {config_dir} does not exist or is not a directory")
88
+
89
+ shared_path = config_path / "shared"
90
+ env_path = config_path / environment
91
+
92
+ # Load shared config
93
+ obs_shared, backend_shared, clients_shared = None, None, {}
94
+ if shared_path.is_dir():
95
+ obs_shared, backend_shared, clients_shared = _load_data_from_dir(shared_path)
96
+
97
+ # Load env config
98
+ obs_env, backend_env, clients_env = None, None, {}
99
+ if env_path.is_dir():
100
+ obs_env, backend_env, clients_env = _load_data_from_dir(env_path)
101
+ elif environment != "shared":
102
+ raise ValueError(f"Environment directory {env_path} does not exist")
103
+
104
+ # Merge
105
+ # Observability: Env wins
106
+ observability_data = obs_env or obs_shared
107
+ observability = ObservabilityConfig.model_validate(observability_data) if observability_data else None
108
+
109
+ # Backend: Env wins
110
+ import os
111
+ backend_yaml_env = backend_env or backend_shared or {}
112
+
113
+ backend_data = backend_yaml_env.get("backend")
114
+ backend_config = BackendConfig.model_validate(backend_data) if backend_data else BackendConfig()
115
+
116
+ webhooks_data = backend_yaml_env.get("webhooks")
117
+ webhooks_config = WebhooksConfig.model_validate(webhooks_data) if webhooks_data else WebhooksConfig()
118
+
119
+ # Clients: Merge dicts, Env wins on collision
120
+ clients = {**clients_shared, **clients_env}
121
+
122
+ return RuntimeConfig(
123
+ observability=observability,
124
+ backend=backend_config,
125
+ webhooks=webhooks_config,
126
+ clients=clients
127
+ )
@@ -0,0 +1,37 @@
1
+ from enum import Enum
2
+ from typing import Optional
3
+ from pydantic import BaseModel, ConfigDict, Field
4
+
5
+ class DurabilityMode(str, Enum):
6
+ IMMEDIATE = "immediate"
7
+ EVENTUAL = "eventual"
8
+
9
+ class StoreType(str, Enum):
10
+ GCP = "gcp"
11
+ LOCAL = "local"
12
+
13
+ class LoggingConfig(BaseModel):
14
+ enabled: bool = True
15
+ min_level: str = "INFO"
16
+ include_prefixes: list[str] = Field(default_factory=list, alias="includePrefixes")
17
+ exclude_prefixes: list[str] = Field(default_factory=list, alias="excludePrefixes")
18
+ filter_fn: Optional[str] = Field(None, alias="filterFn")
19
+
20
+ model_config = ConfigDict(populate_by_name=True)
21
+
22
+ class ObservabilityConfig(BaseModel):
23
+ durability: DurabilityMode = DurabilityMode.EVENTUAL
24
+ store_type: StoreType = StoreType.LOCAL
25
+
26
+ # Local settings
27
+ local_store_path: str = Field("logs", alias="localStorePath")
28
+
29
+ # GCP settings
30
+ gcp_project_id: Optional[str] = Field(None, alias="gcpProjectId")
31
+ gcp_dataset_id: Optional[str] = Field(None, alias="gcpDatasetId")
32
+ gcp_bucket_name: Optional[str] = Field(None, alias="gcpBucketName")
33
+
34
+ # Logging settings
35
+ logging: LoggingConfig = Field(default_factory=LoggingConfig)
36
+
37
+ model_config = ConfigDict(populate_by_name=True)
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+ from enum import Enum
3
+ from typing import Dict, Any, Optional, List
4
+ from pydantic import BaseModel, Field, ConfigDict
5
+ from .observability_config import ObservabilityConfig
6
+ from beamflow_clients.config import ClientSettings
7
+
8
+
9
+ class ClientsConfigPointer(BaseModel):
10
+ path: str
11
+ pattern: str = "*.yaml"
12
+ recursive: bool = False
13
+
14
+ class BackendType(str, Enum):
15
+ """Task queue backend types."""
16
+ ASYNC = "async_backend" # In-process async execution
17
+ DRAMATIQ = "dramatiq" # Dramatiq queue
18
+ MANAGED = "managed" # Managed platform (Cloud Tasks via API)
19
+
20
+ class DramatiqConfig(BaseModel):
21
+ """Dramatiq backend configuration."""
22
+ redis_url: Optional[str] = None # Redis connection string, overrides REDIS_URL env var if set
23
+
24
+ model_config = ConfigDict(populate_by_name=True)
25
+
26
+ class BackendConfig(BaseModel):
27
+ """Configuration for task backend."""
28
+ type: BackendType = BackendType.DRAMATIQ
29
+ dramatiq: Optional[DramatiqConfig] = None
30
+
31
+ model_config = ConfigDict(populate_by_name=True)
32
+
33
+ class WebhooksConfig(BaseModel):
34
+ """Configuration for webhook ingress."""
35
+ prefix: str = "/webhooks"
36
+
37
+
38
+ model_config = ConfigDict(populate_by_name=True)
39
+
40
+ class RuntimeConfig(BaseModel):
41
+ observability: Optional[ObservabilityConfig] = None
42
+ backend: BackendConfig = Field(default_factory=BackendConfig)
43
+ clients: Dict[str, ClientSettings] = Field(default_factory=dict)
44
+ webhooks: WebhooksConfig = Field(default_factory=WebhooksConfig)
45
+
46
+
47
+ model_config = ConfigDict(populate_by_name=True)
48
+
49
+ class RuntimeConfigRegistry:
50
+ """
51
+ A registry for the runtime configuration.
52
+ """
53
+ def __init__(self, config: RuntimeConfig):
54
+ self._config = config
55
+ self._clients = config.clients
56
+
57
+ @property
58
+ def observability(self) -> Optional[ObservabilityConfig]:
59
+ return self._config.observability
60
+
61
+ @property
62
+ def backend(self) -> BackendConfig:
63
+ return self._config.backend
64
+
65
+ def get_client(self, client_id: str) -> Optional[ClientSettings]:
66
+ return self._clients.get(client_id)
67
+
68
+ def __getitem__(self, key: str) -> ClientSettings:
69
+ if key not in self._clients:
70
+ raise KeyError(f"Client '{key}' not found.")
71
+ return self._clients[key]
72
+
73
+ def __contains__(self, key: str) -> bool:
74
+ return key in self._clients
75
+
76
+ @property
77
+ def clients(self) -> Dict[str, ClientSettings]:
78
+ return self._clients
@@ -0,0 +1,141 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Any, Mapping, Optional
3
+ import contextvars
4
+ import uuid
5
+
6
+ @dataclass(frozen=True)
7
+ class IntegrationContext:
8
+ integration: str
9
+ integration_pipeline: str
10
+ run_id: str
11
+ traceparent: Optional[str] = None
12
+ tracestate: Optional[str] = None
13
+ baggage: Mapping[str, str] = field(default_factory=dict)
14
+ tags: Mapping[str, Any] = field(default_factory=dict)
15
+ tenant_id: str = "default"
16
+ current_record_key: Optional[str] = None
17
+
18
+
19
+
20
+ @property
21
+ def corelation(self) -> Any:
22
+ # Import inside to avoid circular dependencies
23
+ from .observability.model import Correlation
24
+
25
+ trace_id = None
26
+ span_id = None
27
+ if self.traceparent:
28
+ parts = self.traceparent.split("-")
29
+ if len(parts) >= 3:
30
+ trace_id = parts[1]
31
+ span_id = parts[2]
32
+
33
+ return Correlation(
34
+ integration=self.integration,
35
+ integration_pipeline=self.integration_pipeline,
36
+ run_id=self.run_id,
37
+ trace_id=self.trace_id,
38
+ span_id=self.span_id,
39
+ tags={k: str(v) for k, v in self.tags.items()}
40
+ )
41
+
42
+ @property
43
+ def trace_id(self) -> Optional[str]:
44
+ if self.traceparent:
45
+ parts = self.traceparent.split("-")
46
+ if len(parts) >= 2:
47
+ return parts[1]
48
+ return None
49
+
50
+ @property
51
+ def span_id(self) -> Optional[str]:
52
+ if self.traceparent:
53
+ parts = self.traceparent.split("-")
54
+ if len(parts) >= 3:
55
+ return parts[2]
56
+ return None
57
+
58
+
59
+
60
+ _current_ctx: contextvars.ContextVar[Optional[IntegrationContext]] = contextvars.ContextVar(
61
+ "framework.integration_context", default=None
62
+ )
63
+
64
+ def current_context() -> Optional[IntegrationContext]:
65
+ """Return the current integration context."""
66
+ return _current_ctx.get()
67
+
68
+ get_context = current_context
69
+
70
+ def set_context(ctx: IntegrationContext) -> Any:
71
+ """Set the current integration context. Returns a token for resetting."""
72
+ return _current_ctx.set(ctx)
73
+
74
+ def reset_context(token: Any):
75
+ """Reset the integration context to its previous state."""
76
+ _current_ctx.reset(token)
77
+
78
+ class integration_context:
79
+ """
80
+ Context manager for setting/creating an IntegrationContext.
81
+ If no context exists, it creates one with a new run_id.
82
+ """
83
+ def __init__(
84
+ self,
85
+ integration: Optional[str] = None,
86
+ integration_pipeline: Optional[str] = None,
87
+ run_id: Optional[str] = None,
88
+ tenant_id: Optional[str] = None,
89
+ current_record_key: Optional[str] = None,
90
+
91
+ **kwargs
92
+
93
+ ):
94
+ self.overrides = {
95
+ "integration": integration,
96
+ "integration_pipeline": integration_pipeline,
97
+ "run_id": run_id,
98
+ "tenant_id": tenant_id,
99
+ **kwargs
100
+ }
101
+
102
+ self.token = None
103
+
104
+ def __enter__(self) -> IntegrationContext:
105
+ parent = current_context()
106
+ if parent:
107
+ # Reuse parent values if not overridden
108
+ merged = {
109
+ "integration": self.overrides.get("integration") or parent.integration,
110
+ "integration_pipeline": self.overrides.get("integration_pipeline") or parent.integration_pipeline,
111
+ "run_id": self.overrides.get("run_id") or parent.run_id,
112
+ "tenant_id": self.overrides.get("tenant_id") or parent.tenant_id,
113
+ "traceparent": parent.traceparent,
114
+ "tracestate": parent.tracestate,
115
+ "baggage": {**parent.baggage, **(self.overrides.get("baggage") or {})},
116
+ "tags": {**parent.tags, **(self.overrides.get("tags") or {})},
117
+ "current_record_key": self.overrides.get("current_record_key") or parent.current_record_key
118
+ }
119
+
120
+
121
+ else:
122
+ # New root context
123
+ merged = {
124
+ "integration": self.overrides.get("integration") or "unknown",
125
+ "integration_pipeline": self.overrides.get("integration_pipeline") or "unknown",
126
+ "run_id": self.overrides.get("run_id") or str(uuid.uuid4()),
127
+ "tenant_id": self.overrides.get("tenant_id") or "default",
128
+ "baggage": self.overrides.get("baggage") or {},
129
+ "tags": self.overrides.get("tags") or {},
130
+ "current_record_key": self.overrides.get("current_record_key")
131
+ }
132
+
133
+
134
+
135
+ ctx = IntegrationContext(**merged)
136
+ self.token = set_context(ctx)
137
+ return ctx
138
+
139
+ def __exit__(self, exc_type, exc_val, exc_tb):
140
+ if self.token:
141
+ reset_context(self.token)