flowstash-lib 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. flowstash_lib-0.1.0/PKG-INFO +17 -0
  2. flowstash_lib-0.1.0/flowstash_lib/__init__.py +27 -0
  3. flowstash_lib-0.1.0/flowstash_lib/config/__init__.py +0 -0
  4. flowstash_lib-0.1.0/flowstash_lib/config/env_loader.py +131 -0
  5. flowstash_lib-0.1.0/flowstash_lib/config/observability_config.py +37 -0
  6. flowstash_lib-0.1.0/flowstash_lib/config/runtime_config.py +142 -0
  7. flowstash_lib-0.1.0/flowstash_lib/context.py +141 -0
  8. flowstash_lib-0.1.0/flowstash_lib/decorators.py +350 -0
  9. flowstash_lib-0.1.0/flowstash_lib/observability/__init__.py +0 -0
  10. flowstash_lib-0.1.0/flowstash_lib/observability/ingestion.py +529 -0
  11. flowstash_lib-0.1.0/flowstash_lib/observability/logging.py +84 -0
  12. flowstash_lib-0.1.0/flowstash_lib/observability/model.py +161 -0
  13. flowstash_lib-0.1.0/flowstash_lib/observability/record_links.py +6 -0
  14. flowstash_lib-0.1.0/flowstash_lib/observability/registry.py +100 -0
  15. flowstash_lib-0.1.0/flowstash_lib/observability/schema.sql +122 -0
  16. flowstash_lib-0.1.0/flowstash_lib/observability/stores/__init__.py +0 -0
  17. flowstash_lib-0.1.0/flowstash_lib/observability/stores/bigquery_stores.py +163 -0
  18. flowstash_lib-0.1.0/flowstash_lib/observability/stores/file_stores.py +92 -0
  19. flowstash_lib-0.1.0/flowstash_lib/observability/stores/gcs_blob_store.py +57 -0
  20. flowstash_lib-0.1.0/flowstash_lib/observability/stores/protocols.py +53 -0
  21. flowstash_lib-0.1.0/flowstash_lib/observability/stores/pubsub_stores.py +73 -0
  22. flowstash_lib-0.1.0/flowstash_lib/pipelines/__init__.py +15 -0
  23. flowstash_lib-0.1.0/flowstash_lib/pipelines/backends/__init__.py +0 -0
  24. flowstash_lib-0.1.0/flowstash_lib/pipelines/backends/managed_feed.py +183 -0
  25. flowstash_lib-0.1.0/flowstash_lib/pipelines/consumer.py +322 -0
  26. flowstash_lib-0.1.0/flowstash_lib/pipelines/ingress.py +140 -0
  27. flowstash_lib-0.1.0/flowstash_lib/pipelines/records_feed.py +143 -0
  28. flowstash_lib-0.1.0/flowstash_lib/pipelines/records_model.py +17 -0
  29. flowstash_lib-0.1.0/flowstash_lib/pipelines/redis_client.py +52 -0
  30. flowstash_lib-0.1.0/flowstash_lib/queue/__init__.py +0 -0
  31. flowstash_lib-0.1.0/flowstash_lib/queue/asyncio_backend.py +122 -0
  32. flowstash_lib-0.1.0/flowstash_lib/queue/backend.py +102 -0
  33. flowstash_lib-0.1.0/flowstash_lib/queue/backends/__init__.py +0 -0
  34. flowstash_lib-0.1.0/flowstash_lib/queue/backends/managed_tasks.py +213 -0
  35. flowstash_lib-0.1.0/flowstash_lib/queue/consumer.py +17 -0
  36. flowstash_lib-0.1.0/flowstash_lib/state/__init__.py +13 -0
  37. flowstash_lib-0.1.0/flowstash_lib/state/entry.py +24 -0
  38. flowstash_lib-0.1.0/flowstash_lib/state/protocol.py +68 -0
  39. flowstash_lib-0.1.0/flowstash_lib/state/stores/__init__.py +1 -0
  40. flowstash_lib-0.1.0/flowstash_lib/state/stores/managed_store.py +141 -0
  41. flowstash_lib-0.1.0/flowstash_lib/state/stores/redis_store.py +201 -0
  42. flowstash_lib-0.1.0/flowstash_lib/state/stores/sqlite_store.py +177 -0
  43. flowstash_lib-0.1.0/pyproject.toml +26 -0
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.3
2
+ Name: flowstash-lib
3
+ Version: 0.1.0
4
+ Summary: Shared domain logic and utilities for the flowstash managed platform.
5
+ Author: juraj.bezdek@gmail.com
6
+ Author-email: juraj.bezdek@gmail.com
7
+ Requires-Python: >=3.11
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Requires-Dist: flowstash-clients (>=0.3.0,<0.4.0)
13
+ Requires-Dist: httpx (>=0.27.0)
14
+ Requires-Dist: opentelemetry-api (>=1.24.0)
15
+ Requires-Dist: pydantic (>=2.10.0)
16
+ Requires-Dist: pyyaml (>=6.0.1)
17
+ Requires-Dist: redis (>=5.0.3)
@@ -0,0 +1,27 @@
1
+ from .observability.logging import logger
2
+ from .context import integration_context, get_context
3
+ from .decorators import integration_task, integration_step
4
+ from .pipelines.ingress import ingress
5
+ from .pipelines.records_feed import RecordsFeed
6
+ from .pipelines.records_model import RecordData
7
+ from .pipelines.consumer import feed_consumer, ConsumerRunner
8
+ from .state import StateEntry, StateStoreProtocol, SQLiteStateStore, RedisStateStore, ManagedStateStore
9
+
10
+ __all__ = [
11
+ "logger",
12
+ "integration_context",
13
+ "get_context",
14
+ "integration_task",
15
+ "integration_step",
16
+ "ingress",
17
+ "RecordsFeed",
18
+ "RecordData",
19
+ "feed_consumer",
20
+ "ConsumerRunner",
21
+ "StateEntry",
22
+ "StateStoreProtocol",
23
+ "SQLiteStateStore",
24
+ "RedisStateStore",
25
+ "ManagedStateStore",
26
+ ]
27
+
File without changes
@@ -0,0 +1,131 @@
1
+ import yaml
2
+ from typing import Any, Dict, Optional
3
+ from pathlib import Path
4
+ from .runtime_config import RuntimeConfig, ClientsConfigPointer, BackendConfig, WebhooksConfig, StateStoreConfig
5
+ from .observability_config import ObservabilityConfig
6
+ from flowstash_clients.config import ClientSettings
7
+
8
+ def load_env_yaml(path: str | Path) -> Dict[str, Any]:
9
+ """Load an environment YAML file."""
10
+ with open(path, "r") as f:
11
+ data = yaml.safe_load(f)
12
+ return data if isinstance(data, dict) else {}
13
+
14
+ def _load_data_from_dir(path: Path) -> tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]], Dict[str, ClientSettings]]:
15
+ """Helper to load config components from a single directory."""
16
+ # 1. Observability
17
+ obs_file = path / "observability.yaml"
18
+ observability = None
19
+ if obs_file.exists():
20
+ observability = load_env_yaml(obs_file)
21
+
22
+ # 2. Backend
23
+ backend_file = path / "backend.yaml"
24
+ backend = None
25
+ if backend_file.exists():
26
+ backend = load_env_yaml(backend_file)
27
+
28
+ # 3. Clients
29
+ clients_file = path / "clients.yaml"
30
+ clients: Dict[str, ClientSettings] = {}
31
+
32
+ if clients_file.exists():
33
+ clients_pointer_data = load_env_yaml(clients_file)
34
+ pointer = ClientsConfigPointer.model_validate(clients_pointer_data)
35
+
36
+ clients_dir = Path(pointer.path)
37
+ if not clients_dir.is_absolute():
38
+ clients_dir = (path / clients_dir).resolve()
39
+
40
+ if not clients_dir.is_dir():
41
+ raise ValueError(f"Clients directory {clients_dir} (from clients.yaml) does not exist")
42
+
43
+ pattern = pointer.pattern
44
+ recursive = pointer.recursive
45
+
46
+ search_pattern = f"**/{pattern}" if recursive else pattern
47
+ client_files = list(clients_dir.glob(search_pattern))
48
+
49
+ for cf in client_files:
50
+ if not cf.is_file():
51
+ continue
52
+
53
+ client_data = load_env_yaml(cf)
54
+ client_id = client_data.get("client_id") or client_data.get("clientId") or cf.stem
55
+
56
+ if client_id in clients:
57
+ raise ValueError(f"Duplicate client_id '{client_id}' found in {cf}")
58
+
59
+ if "client_id" not in client_data:
60
+ client_data["client_id"] = client_id
61
+
62
+ settings = ClientSettings.model_validate(client_data)
63
+
64
+ # Known keys to exclude from extra
65
+ settings_fields = set(ClientSettings.model_fields.keys())
66
+ for field in ClientSettings.model_fields.values():
67
+ if field.alias:
68
+ settings_fields.add(field.alias)
69
+
70
+ known_keys = settings_fields | {"client_id", "clientId"}
71
+ extra = {k: v for k, v in client_data.items() if k not in known_keys}
72
+ settings.extra.update(extra)
73
+
74
+ clients[client_id] = settings
75
+ return observability, backend, clients
76
+
77
+ def load_config_dir(config_dir: str | Path, environment: str) -> RuntimeConfig:
78
+ """
79
+ Loads unified configuration from a directory.
80
+ - {config_dir}/shared/ - shared base config
81
+ - {config_dir}/{environment}/ - environment-specific overrides
82
+ """
83
+
84
+ print("Loading config for environment: ", environment)
85
+ config_path = Path(config_dir)
86
+ if not config_path.is_dir():
87
+ raise ValueError(f"Config directory {config_dir} does not exist or is not a directory")
88
+
89
+ shared_path = config_path / "shared"
90
+ env_path = config_path / environment
91
+
92
+ # Load shared config
93
+ obs_shared, backend_shared, clients_shared = None, None, {}
94
+ if shared_path.is_dir():
95
+ obs_shared, backend_shared, clients_shared = _load_data_from_dir(shared_path)
96
+
97
+ # Load env config
98
+ obs_env, backend_env, clients_env = None, None, {}
99
+ if env_path.is_dir():
100
+ obs_env, backend_env, clients_env = _load_data_from_dir(env_path)
101
+ elif environment != "shared":
102
+ raise ValueError(f"Environment directory {env_path} does not exist")
103
+
104
+ # Merge
105
+ # Observability: Env wins
106
+ observability_data = obs_env or obs_shared
107
+ observability = ObservabilityConfig.model_validate(observability_data) if observability_data else None
108
+
109
+ # Backend: Env wins
110
+ import os
111
+ backend_yaml_env = backend_env or backend_shared or {}
112
+
113
+ backend_data = backend_yaml_env.get("backend")
114
+ backend_config = BackendConfig.model_validate(backend_data) if backend_data else BackendConfig()
115
+
116
+ webhooks_data = backend_yaml_env.get("webhooks")
117
+ webhooks_config = WebhooksConfig.model_validate(webhooks_data) if webhooks_data else WebhooksConfig()
118
+
119
+ state_store_data = backend_yaml_env.get("state_store")
120
+ state_store_config = StateStoreConfig.model_validate(state_store_data) if state_store_data else StateStoreConfig()
121
+
122
+ # Clients: Merge dicts, Env wins on collision
123
+ clients = {**clients_shared, **clients_env}
124
+
125
+ return RuntimeConfig(
126
+ observability=observability,
127
+ backend=backend_config,
128
+ webhooks=webhooks_config,
129
+ state_store=state_store_config,
130
+ clients=clients
131
+ )
@@ -0,0 +1,37 @@
1
+ from enum import Enum
2
+ from typing import Optional
3
+ from pydantic import BaseModel, ConfigDict, Field
4
+
5
+ class DurabilityMode(str, Enum):
6
+ IMMEDIATE = "immediate"
7
+ EVENTUAL = "eventual"
8
+
9
+ class StoreType(str, Enum):
10
+ GCP = "gcp"
11
+ LOCAL = "local"
12
+
13
+ class LoggingConfig(BaseModel):
14
+ enabled: bool = True
15
+ min_level: str = "INFO"
16
+ include_prefixes: list[str] = Field(default_factory=list, alias="includePrefixes")
17
+ exclude_prefixes: list[str] = Field(default_factory=list, alias="excludePrefixes")
18
+ filter_fn: Optional[str] = Field(None, alias="filterFn")
19
+
20
+ model_config = ConfigDict(populate_by_name=True)
21
+
22
+ class ObservabilityConfig(BaseModel):
23
+ durability: DurabilityMode = DurabilityMode.EVENTUAL
24
+ store_type: StoreType = StoreType.LOCAL
25
+
26
+ # Local settings
27
+ local_store_path: str = Field("logs", alias="localStorePath")
28
+
29
+ # GCP settings
30
+ gcp_project_id: Optional[str] = Field(None, alias="gcpProjectId")
31
+ gcp_dataset_id: Optional[str] = Field(None, alias="gcpDatasetId")
32
+ gcp_bucket_name: Optional[str] = Field(None, alias="gcpBucketName")
33
+
34
+ # Logging settings
35
+ logging: LoggingConfig = Field(default_factory=LoggingConfig)
36
+
37
+ model_config = ConfigDict(populate_by_name=True)
@@ -0,0 +1,142 @@
1
+ from __future__ import annotations
2
+ from enum import Enum
3
+ from typing import Dict, Any, Optional, List
4
+ from pydantic import BaseModel, Field, ConfigDict
5
+ from .observability_config import ObservabilityConfig
6
+ from flowstash_clients.config import ClientSettings
7
+ import os
8
+
9
+
10
+ class ClientsConfigPointer(BaseModel):
11
+ path: str
12
+ pattern: str = "*.yaml"
13
+ recursive: bool = False
14
+
15
+ class BackendType(str, Enum):
16
+ """Task queue backend types."""
17
+ ASYNC = "asyncio" # In-process async execution
18
+ DRAMATIQ = "dramatiq" # Dramatiq queue
19
+ MANAGED = "managed" # Managed platform (Cloud Tasks via API)
20
+
21
+ class DramatiqConfig(BaseModel):
22
+ """Dramatiq backend configuration."""
23
+ redis_url: Optional[str] = None # Redis connection string, overrides REDIS_URL env var if set
24
+
25
+ model_config = ConfigDict(populate_by_name=True)
26
+
27
+ class BackendConfig(BaseModel):
28
+ """Configuration for task backend."""
29
+ type: BackendType = BackendType.DRAMATIQ
30
+ dramatiq: Optional[DramatiqConfig] = None
31
+
32
+ model_config = ConfigDict(populate_by_name=True)
33
+
34
+ class WebhooksConfig(BaseModel):
35
+ """Configuration for webhook ingress."""
36
+ prefix: str = "/webhooks"
37
+
38
+ model_config = ConfigDict(populate_by_name=True)
39
+
40
+
41
+ class StateStoreType(str, Enum):
42
+ """State store backend types."""
43
+ REDIS = "redis"
44
+ SQLITE = "sqlite"
45
+ MANAGED = "managed"
46
+
47
+
48
+ class SQLiteStoreConfig(BaseModel):
49
+ """SQLite state store configuration."""
50
+ db_path: str = ".flowstash_state.db"
51
+
52
+ model_config = ConfigDict(populate_by_name=True)
53
+
54
+
55
+ class RedisStoreConfig(BaseModel):
56
+ """Redis state store configuration."""
57
+ # If None, falls back to REDIS_URL env var (same as DramatiqConfig)
58
+ redis_url: Optional[str] = None
59
+
60
+ model_config = ConfigDict(populate_by_name=True)
61
+
62
+
63
+ class ManagedStoreConfig(BaseModel):
64
+ """Managed (HTTP) state store configuration."""
65
+ base_url: Optional[str] = None # Falls back to flowstash_API_URL env var
66
+ api_key: Optional[str] = None # Falls back to flowstash_API_KEY env var
67
+
68
+ model_config = ConfigDict(populate_by_name=True)
69
+
70
+
71
+ class StateStoreConfig(BaseModel):
72
+ """Top-level state store configuration block (backend.yaml: state_store:)."""
73
+ type: StateStoreType = StateStoreType.REDIS
74
+ sqlite: Optional[SQLiteStoreConfig] = None
75
+ redis: Optional[RedisStoreConfig] = None
76
+ managed: Optional[ManagedStoreConfig] = None
77
+
78
+ model_config = ConfigDict(populate_by_name=True)
79
+
80
+ def build_store(self):
81
+ """Instantiate and return the configured StateStore implementation."""
82
+ if self.type == StateStoreType.SQLITE:
83
+ from flowstash_lib.state.stores.sqlite_store import SQLiteStateStore
84
+ cfg = self.sqlite or SQLiteStoreConfig()
85
+ return SQLiteStateStore(db_path=cfg.db_path)
86
+
87
+ if self.type == StateStoreType.REDIS:
88
+ from flowstash_lib.state.stores.redis_store import RedisStateStore
89
+ cfg = self.redis or RedisStoreConfig()
90
+ redis_url = cfg.redis_url or os.environ.get("REDIS_URL")
91
+ return RedisStateStore(redis_url=redis_url)
92
+
93
+ if self.type == StateStoreType.MANAGED:
94
+ from flowstash_lib.state.stores.managed_store import ManagedStateStore
95
+ cfg = self.managed or ManagedStoreConfig()
96
+ return ManagedStateStore(
97
+ base_url=cfg.base_url,
98
+ api_key=cfg.api_key,
99
+ )
100
+
101
+ raise ValueError(f"Unknown state store type: {self.type}")
102
+
103
+
104
+ class RuntimeConfig(BaseModel):
105
+ observability: Optional[ObservabilityConfig] = None
106
+ backend: BackendConfig = Field(default_factory=BackendConfig)
107
+ clients: Dict[str, ClientSettings] = Field(default_factory=dict)
108
+ webhooks: WebhooksConfig = Field(default_factory=WebhooksConfig)
109
+ state_store: StateStoreConfig = Field(default_factory=StateStoreConfig)
110
+
111
+ model_config = ConfigDict(populate_by_name=True)
112
+
113
+ class RuntimeConfigRegistry:
114
+ """
115
+ A registry for the runtime configuration.
116
+ """
117
+ def __init__(self, config: RuntimeConfig):
118
+ self._config = config
119
+ self._clients = config.clients
120
+
121
+ @property
122
+ def observability(self) -> Optional[ObservabilityConfig]:
123
+ return self._config.observability
124
+
125
+ @property
126
+ def backend(self) -> BackendConfig:
127
+ return self._config.backend
128
+
129
+ def get_client(self, client_id: str) -> Optional[ClientSettings]:
130
+ return self._clients.get(client_id)
131
+
132
+ def __getitem__(self, key: str) -> ClientSettings:
133
+ if key not in self._clients:
134
+ raise KeyError(f"Client '{key}' not found.")
135
+ return self._clients[key]
136
+
137
+ def __contains__(self, key: str) -> bool:
138
+ return key in self._clients
139
+
140
+ @property
141
+ def clients(self) -> Dict[str, ClientSettings]:
142
+ return self._clients
@@ -0,0 +1,141 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Any, Mapping, Optional
3
+ import contextvars
4
+ import uuid
5
+
6
+ @dataclass(frozen=True)
7
+ class IntegrationContext:
8
+ integration: str
9
+ integration_pipeline: str
10
+ run_id: str
11
+ traceparent: Optional[str] = None
12
+ tracestate: Optional[str] = None
13
+ baggage: Mapping[str, str] = field(default_factory=dict)
14
+ tags: Mapping[str, Any] = field(default_factory=dict)
15
+ tenant_id: str = "default"
16
+ current_record_key: Optional[str] = None
17
+
18
+
19
+
20
+ @property
21
+ def corelation(self) -> Any:
22
+ # Import inside to avoid circular dependencies
23
+ from .observability.model import Correlation
24
+
25
+ trace_id = None
26
+ span_id = None
27
+ if self.traceparent:
28
+ parts = self.traceparent.split("-")
29
+ if len(parts) >= 3:
30
+ trace_id = parts[1]
31
+ span_id = parts[2]
32
+
33
+ return Correlation(
34
+ integration=self.integration,
35
+ integration_pipeline=self.integration_pipeline,
36
+ run_id=self.run_id,
37
+ trace_id=self.trace_id,
38
+ span_id=self.span_id,
39
+ tags={k: str(v) for k, v in self.tags.items()}
40
+ )
41
+
42
+ @property
43
+ def trace_id(self) -> Optional[str]:
44
+ if self.traceparent:
45
+ parts = self.traceparent.split("-")
46
+ if len(parts) >= 2:
47
+ return parts[1]
48
+ return None
49
+
50
+ @property
51
+ def span_id(self) -> Optional[str]:
52
+ if self.traceparent:
53
+ parts = self.traceparent.split("-")
54
+ if len(parts) >= 3:
55
+ return parts[2]
56
+ return None
57
+
58
+
59
+
60
+ _current_ctx: contextvars.ContextVar[Optional[IntegrationContext]] = contextvars.ContextVar(
61
+ "framework.integration_context", default=None
62
+ )
63
+
64
+ def current_context() -> Optional[IntegrationContext]:
65
+ """Return the current integration context."""
66
+ return _current_ctx.get()
67
+
68
+ get_context = current_context
69
+
70
+ def set_context(ctx: IntegrationContext) -> Any:
71
+ """Set the current integration context. Returns a token for resetting."""
72
+ return _current_ctx.set(ctx)
73
+
74
+ def reset_context(token: Any):
75
+ """Reset the integration context to its previous state."""
76
+ _current_ctx.reset(token)
77
+
78
+ class integration_context:
79
+ """
80
+ Context manager for setting/creating an IntegrationContext.
81
+ If no context exists, it creates one with a new run_id.
82
+ """
83
+ def __init__(
84
+ self,
85
+ integration: Optional[str] = None,
86
+ integration_pipeline: Optional[str] = None,
87
+ run_id: Optional[str] = None,
88
+ tenant_id: Optional[str] = None,
89
+ current_record_key: Optional[str] = None,
90
+
91
+ **kwargs
92
+
93
+ ):
94
+ self.overrides = {
95
+ "integration": integration,
96
+ "integration_pipeline": integration_pipeline,
97
+ "run_id": run_id,
98
+ "tenant_id": tenant_id,
99
+ **kwargs
100
+ }
101
+
102
+ self.token = None
103
+
104
+ def __enter__(self) -> IntegrationContext:
105
+ parent = current_context()
106
+ if parent:
107
+ # Reuse parent values if not overridden
108
+ merged = {
109
+ "integration": self.overrides.get("integration") or parent.integration,
110
+ "integration_pipeline": self.overrides.get("integration_pipeline") or parent.integration_pipeline,
111
+ "run_id": self.overrides.get("run_id") or parent.run_id,
112
+ "tenant_id": self.overrides.get("tenant_id") or parent.tenant_id,
113
+ "traceparent": parent.traceparent,
114
+ "tracestate": parent.tracestate,
115
+ "baggage": {**parent.baggage, **(self.overrides.get("baggage") or {})},
116
+ "tags": {**parent.tags, **(self.overrides.get("tags") or {})},
117
+ "current_record_key": self.overrides.get("current_record_key") or parent.current_record_key
118
+ }
119
+
120
+
121
+ else:
122
+ # New root context
123
+ merged = {
124
+ "integration": self.overrides.get("integration") or "unknown",
125
+ "integration_pipeline": self.overrides.get("integration_pipeline") or "unknown",
126
+ "run_id": self.overrides.get("run_id") or str(uuid.uuid4()),
127
+ "tenant_id": self.overrides.get("tenant_id") or "default",
128
+ "baggage": self.overrides.get("baggage") or {},
129
+ "tags": self.overrides.get("tags") or {},
130
+ "current_record_key": self.overrides.get("current_record_key")
131
+ }
132
+
133
+
134
+
135
+ ctx = IntegrationContext(**merged)
136
+ self.token = set_context(ctx)
137
+ return ctx
138
+
139
+ def __exit__(self, exc_type, exc_val, exc_tb):
140
+ if self.token:
141
+ reset_context(self.token)