hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""Batching utilities for the observer manager.
|
|
2
|
+
|
|
3
|
+
This module introduces an ``EventBatcher`` helper that buffers events, builds
|
|
4
|
+
envelopes, and decides when to flush batched payloads to downstream observers.
|
|
5
|
+
It keeps the batching concerns isolated from ``ObserverManager`` so the manager
|
|
6
|
+
can focus on registration and invocation logic.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import contextlib
|
|
13
|
+
import logging
|
|
14
|
+
import uuid
|
|
15
|
+
from collections import Counter
|
|
16
|
+
from collections.abc import Awaitable, Callable, Sequence
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from datetime import UTC, datetime
|
|
19
|
+
from enum import StrEnum
|
|
20
|
+
from typing import cast
|
|
21
|
+
|
|
22
|
+
from .events import Event, NodeFailed, PipelineCompleted
|
|
23
|
+
|
|
24
|
+
LOGGER = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BatchFlushReason(StrEnum):
|
|
28
|
+
"""Reason codes for why a batch was flushed."""
|
|
29
|
+
|
|
30
|
+
SIZE = "size"
|
|
31
|
+
TIME = "time"
|
|
32
|
+
PRIORITY = "priority"
|
|
33
|
+
OVERLOAD = "overload"
|
|
34
|
+
MANUAL = "manual"
|
|
35
|
+
SHUTDOWN = "shutdown"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class OverloadPolicy(StrEnum):
|
|
39
|
+
"""Strategy to apply when the in-memory buffer exceeds its capacity."""
|
|
40
|
+
|
|
41
|
+
DROP_OLDEST = "drop-oldest"
|
|
42
|
+
DEGRADE_TO_UNBATCHED = "degrade-to-unbatched"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(slots=True)
|
|
46
|
+
class BatchingConfig:
|
|
47
|
+
"""Configuration knobs for event batching semantics."""
|
|
48
|
+
|
|
49
|
+
max_batch_size: int = 256
|
|
50
|
+
max_batch_window_ms: float = 50.0
|
|
51
|
+
max_buffer_events: int = 4096
|
|
52
|
+
overload_policy: OverloadPolicy = OverloadPolicy.DROP_OLDEST
|
|
53
|
+
priority_event_types: tuple[type[Event], ...] = (NodeFailed, PipelineCompleted)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(slots=True)
|
|
57
|
+
class BatchingMetrics:
|
|
58
|
+
"""Simple in-memory metrics tracked by the batcher."""
|
|
59
|
+
|
|
60
|
+
event_batches_total: int = 0
|
|
61
|
+
event_batch_flush_reason: Counter[str] = field(default_factory=Counter)
|
|
62
|
+
events_dropped_total: int = 0
|
|
63
|
+
events_unbatched_total: int = 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(slots=True)
|
|
67
|
+
class EventBatchEnvelope:
|
|
68
|
+
"""Metadata-rich container for a batch of events.
|
|
69
|
+
|
|
70
|
+
NOTE: Once the event taxonomy work lands, convert this envelope to the
|
|
71
|
+
canonical shape defined there. The marker keeps the linkage visible for a
|
|
72
|
+
future refactor.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
# NOTE(event-taxonomy): align fields with the canonical envelope schema.
|
|
76
|
+
batch_id: str
|
|
77
|
+
sequence_no: int
|
|
78
|
+
created_at: datetime
|
|
79
|
+
events: Sequence[Event]
|
|
80
|
+
flush_reason: BatchFlushReason
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
FlushCallback = Callable[[EventBatchEnvelope], Awaitable[None]]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class EventBatcher[EventT: Event]:
|
|
87
|
+
"""Collects events and flushes them according to configured rules."""
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
flush: FlushCallback,
|
|
92
|
+
config: BatchingConfig | None = None,
|
|
93
|
+
*,
|
|
94
|
+
logger: logging.Logger | None = None,
|
|
95
|
+
) -> None:
|
|
96
|
+
self._flush_cb = flush
|
|
97
|
+
self._config = config or BatchingConfig()
|
|
98
|
+
self._logger = logger or LOGGER
|
|
99
|
+
|
|
100
|
+
self._metrics = BatchingMetrics()
|
|
101
|
+
self._buffer: list[EventT] = []
|
|
102
|
+
self._buffer_opened_at: datetime | None = None
|
|
103
|
+
self._sequence = 0
|
|
104
|
+
self._timer_task: asyncio.Task[None] | None = None
|
|
105
|
+
self._lock = asyncio.Lock()
|
|
106
|
+
self._closed = False
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def metrics(self) -> BatchingMetrics:
|
|
110
|
+
"""Expose current batching metrics."""
|
|
111
|
+
|
|
112
|
+
return self._metrics
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def has_pending_events(self) -> bool:
|
|
116
|
+
"""Whether events are waiting in the buffer."""
|
|
117
|
+
|
|
118
|
+
return bool(self._buffer)
|
|
119
|
+
|
|
120
|
+
async def add(self, event: EventT) -> None:
|
|
121
|
+
"""Enqueue an event and flush if any rule applies."""
|
|
122
|
+
|
|
123
|
+
envelopes: list[EventBatchEnvelope] = []
|
|
124
|
+
|
|
125
|
+
async with self._lock:
|
|
126
|
+
if self._closed:
|
|
127
|
+
self._logger.debug("Batcher closed; flushing event immediately")
|
|
128
|
+
envelopes.append(self._build_single_event_envelope(event, BatchFlushReason.MANUAL))
|
|
129
|
+
else:
|
|
130
|
+
is_priority = self._is_priority(event)
|
|
131
|
+
self._buffer.append(event)
|
|
132
|
+
self._ensure_buffer_start_timestamp()
|
|
133
|
+
|
|
134
|
+
if len(self._buffer) == 1:
|
|
135
|
+
self._schedule_timer()
|
|
136
|
+
|
|
137
|
+
if self._is_over_capacity():
|
|
138
|
+
envelopes.extend(await self._handle_overload(event))
|
|
139
|
+
elif len(self._buffer) >= self._config.max_batch_size:
|
|
140
|
+
envelope = await self._prepare_flush_locked(BatchFlushReason.SIZE)
|
|
141
|
+
if envelope:
|
|
142
|
+
envelopes.append(envelope)
|
|
143
|
+
elif is_priority:
|
|
144
|
+
envelope = await self._prepare_flush_locked(BatchFlushReason.PRIORITY)
|
|
145
|
+
if envelope:
|
|
146
|
+
envelopes.append(envelope)
|
|
147
|
+
|
|
148
|
+
await self._deliver_envelopes(envelopes)
|
|
149
|
+
|
|
150
|
+
async def flush(self, reason: BatchFlushReason = BatchFlushReason.MANUAL) -> None:
|
|
151
|
+
"""Flush pending events regardless of thresholds."""
|
|
152
|
+
|
|
153
|
+
async with self._lock:
|
|
154
|
+
envelope = await self._prepare_flush_locked(reason)
|
|
155
|
+
|
|
156
|
+
if envelope:
|
|
157
|
+
await self._flush_cb(envelope)
|
|
158
|
+
|
|
159
|
+
async def close(self) -> None:
|
|
160
|
+
"""Flush remaining events and mark the batcher as closed."""
|
|
161
|
+
|
|
162
|
+
async with self._lock:
|
|
163
|
+
self._closed = True
|
|
164
|
+
envelope = await self._prepare_flush_locked(BatchFlushReason.SHUTDOWN)
|
|
165
|
+
await self._cancel_timer()
|
|
166
|
+
|
|
167
|
+
if envelope:
|
|
168
|
+
await self._flush_cb(envelope)
|
|
169
|
+
|
|
170
|
+
# Internal helpers -------------------------------------------------
|
|
171
|
+
|
|
172
|
+
def _is_priority(self, event: EventT) -> bool:
|
|
173
|
+
return any(isinstance(event, priority) for priority in self._config.priority_event_types)
|
|
174
|
+
|
|
175
|
+
def _is_over_capacity(self) -> bool:
|
|
176
|
+
return len(self._buffer) > self._config.max_buffer_events
|
|
177
|
+
|
|
178
|
+
async def _handle_overload(self, triggering_event: EventT) -> list[EventBatchEnvelope]:
|
|
179
|
+
"""Apply overload policy when the buffer exceeds capacity."""
|
|
180
|
+
|
|
181
|
+
policy = self._config.overload_policy
|
|
182
|
+
envelopes: list[EventBatchEnvelope] = []
|
|
183
|
+
|
|
184
|
+
if policy is OverloadPolicy.DROP_OLDEST:
|
|
185
|
+
dropped, resolved = self._drop_oldest_until_capacity()
|
|
186
|
+
if dropped:
|
|
187
|
+
self._metrics.events_dropped_total += dropped
|
|
188
|
+
self._logger.warning(
|
|
189
|
+
"Dropped %s event(s) due to overload (policy=%s)", dropped, policy.value
|
|
190
|
+
)
|
|
191
|
+
if not resolved:
|
|
192
|
+
envelope = await self._prepare_flush_locked(BatchFlushReason.OVERLOAD)
|
|
193
|
+
if envelope:
|
|
194
|
+
envelopes.append(envelope)
|
|
195
|
+
return envelopes
|
|
196
|
+
|
|
197
|
+
if policy is OverloadPolicy.DEGRADE_TO_UNBATCHED:
|
|
198
|
+
latest = self._buffer.pop() if self._buffer else triggering_event
|
|
199
|
+
envelope = await self._prepare_flush_locked(BatchFlushReason.OVERLOAD)
|
|
200
|
+
if envelope:
|
|
201
|
+
envelopes.append(envelope)
|
|
202
|
+
envelopes.append(self._build_single_event_envelope(latest, BatchFlushReason.OVERLOAD))
|
|
203
|
+
self._metrics.events_unbatched_total += 1
|
|
204
|
+
return envelopes
|
|
205
|
+
|
|
206
|
+
raise RuntimeError(f"Unsupported overload policy: {policy!r}")
|
|
207
|
+
|
|
208
|
+
def _drop_oldest_until_capacity(self) -> tuple[int, bool]:
|
|
209
|
+
to_drop = max(len(self._buffer) - self._config.max_buffer_events, 0)
|
|
210
|
+
if to_drop <= 0:
|
|
211
|
+
return 0, True
|
|
212
|
+
|
|
213
|
+
drop_indices: list[int] = []
|
|
214
|
+
for idx, event in enumerate(self._buffer):
|
|
215
|
+
if self._is_priority(event):
|
|
216
|
+
continue
|
|
217
|
+
drop_indices.append(idx)
|
|
218
|
+
if len(drop_indices) == to_drop:
|
|
219
|
+
break
|
|
220
|
+
|
|
221
|
+
if len(drop_indices) < to_drop:
|
|
222
|
+
return 0, False
|
|
223
|
+
|
|
224
|
+
for idx in reversed(drop_indices):
|
|
225
|
+
self._buffer.pop(idx)
|
|
226
|
+
|
|
227
|
+
return to_drop, True
|
|
228
|
+
|
|
229
|
+
def _ensure_buffer_start_timestamp(self) -> None:
|
|
230
|
+
if self._buffer_opened_at is None:
|
|
231
|
+
self._buffer_opened_at = datetime.now(tz=UTC)
|
|
232
|
+
|
|
233
|
+
def _schedule_timer(self) -> None:
|
|
234
|
+
if self._timer_task is not None:
|
|
235
|
+
return
|
|
236
|
+
|
|
237
|
+
window_seconds = max(self._config.max_batch_window_ms / 1000.0, 0)
|
|
238
|
+
if window_seconds == 0:
|
|
239
|
+
return
|
|
240
|
+
|
|
241
|
+
self._timer_task = asyncio.create_task(self._timer_flush(window_seconds))
|
|
242
|
+
|
|
243
|
+
async def _timer_flush(self, delay: float) -> None:
|
|
244
|
+
envelope: EventBatchEnvelope | None = None
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
await asyncio.sleep(delay)
|
|
248
|
+
async with self._lock:
|
|
249
|
+
if self._buffer:
|
|
250
|
+
envelope = await self._prepare_flush_locked(BatchFlushReason.TIME)
|
|
251
|
+
else:
|
|
252
|
+
envelope = None
|
|
253
|
+
finally:
|
|
254
|
+
self._timer_task = None
|
|
255
|
+
|
|
256
|
+
if envelope:
|
|
257
|
+
await self._flush_cb(envelope)
|
|
258
|
+
|
|
259
|
+
async def _prepare_flush_locked(self, reason: BatchFlushReason) -> EventBatchEnvelope | None:
|
|
260
|
+
if not self._buffer:
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
await self._cancel_timer()
|
|
264
|
+
|
|
265
|
+
envelope = EventBatchEnvelope(
|
|
266
|
+
batch_id=uuid.uuid4().hex,
|
|
267
|
+
sequence_no=self._next_sequence(),
|
|
268
|
+
created_at=self._buffer_opened_at or datetime.now(tz=UTC),
|
|
269
|
+
events=cast("Sequence[Event]", tuple(self._buffer)),
|
|
270
|
+
flush_reason=reason,
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
self._buffer.clear()
|
|
274
|
+
self._buffer_opened_at = None
|
|
275
|
+
|
|
276
|
+
self._record_envelope(envelope)
|
|
277
|
+
return envelope
|
|
278
|
+
|
|
279
|
+
async def _cancel_timer(self) -> None:
|
|
280
|
+
if self._timer_task is None:
|
|
281
|
+
return
|
|
282
|
+
task = self._timer_task
|
|
283
|
+
self._timer_task = None
|
|
284
|
+
if task is asyncio.current_task():
|
|
285
|
+
return
|
|
286
|
+
task.cancel()
|
|
287
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
288
|
+
await task
|
|
289
|
+
|
|
290
|
+
def _build_single_event_envelope(
|
|
291
|
+
self, event: EventT, reason: BatchFlushReason
|
|
292
|
+
) -> EventBatchEnvelope:
|
|
293
|
+
envelope = EventBatchEnvelope(
|
|
294
|
+
batch_id=uuid.uuid4().hex,
|
|
295
|
+
sequence_no=self._next_sequence(),
|
|
296
|
+
created_at=datetime.now(tz=UTC),
|
|
297
|
+
events=cast("Sequence[Event]", (event,)),
|
|
298
|
+
flush_reason=reason,
|
|
299
|
+
)
|
|
300
|
+
self._record_envelope(envelope)
|
|
301
|
+
return envelope
|
|
302
|
+
|
|
303
|
+
async def _deliver_envelopes(self, envelopes: list[EventBatchEnvelope]) -> None:
|
|
304
|
+
for envelope in envelopes:
|
|
305
|
+
await self._flush_cb(envelope)
|
|
306
|
+
|
|
307
|
+
def _record_envelope(self, envelope: EventBatchEnvelope) -> None:
|
|
308
|
+
self._metrics.event_batches_total += 1
|
|
309
|
+
self._metrics.event_batch_flush_reason[envelope.flush_reason.value] += 1
|
|
310
|
+
self._logger.debug(
|
|
311
|
+
"Flushing batch %s (seq=%s, size=%s, reason=%s)",
|
|
312
|
+
envelope.batch_id,
|
|
313
|
+
envelope.sequence_no,
|
|
314
|
+
len(envelope.events),
|
|
315
|
+
envelope.flush_reason.value,
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
def _next_sequence(self) -> int:
|
|
319
|
+
self._sequence += 1
|
|
320
|
+
return self._sequence
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
__all__ = [
|
|
324
|
+
"BatchFlushReason",
|
|
325
|
+
"OverloadPolicy",
|
|
326
|
+
"BatchingConfig",
|
|
327
|
+
"BatchingMetrics",
|
|
328
|
+
"EventBatchEnvelope",
|
|
329
|
+
"EventBatcher",
|
|
330
|
+
]
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Function-level decorators for event system metadata.
|
|
2
|
+
|
|
3
|
+
These decorators attach metadata to functions without performing any
|
|
4
|
+
registration side effects. Managers read the metadata at runtime to
|
|
5
|
+
configure control handlers and observers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Callable, Iterable
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import Any, Literal, TypeVar
|
|
13
|
+
|
|
14
|
+
from .events import Event
|
|
15
|
+
|
|
16
|
+
EVENT_METADATA_ATTR = "__hexdag_event_metadata__"
|
|
17
|
+
|
|
18
|
+
ControlHandlerKind = Literal["control_handler"]
|
|
19
|
+
ObserverKind = Literal["observer"]
|
|
20
|
+
DecoratorKind = ControlHandlerKind | ObserverKind
|
|
21
|
+
|
|
22
|
+
TFunc = TypeVar("TFunc", bound=Callable[..., Any])
|
|
23
|
+
|
|
24
|
+
EventType = type[Event]
|
|
25
|
+
EventTypesInput = EventType | Iterable[EventType] | None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class EventDecoratorMetadata:
|
|
30
|
+
"""Metadata attached to decorated event functions."""
|
|
31
|
+
|
|
32
|
+
kind: DecoratorKind
|
|
33
|
+
name: str | None = None
|
|
34
|
+
priority: int | None = None
|
|
35
|
+
event_types: set[EventType] | None = None
|
|
36
|
+
description: str | None = None
|
|
37
|
+
id: str | None = None
|
|
38
|
+
timeout: float | None = None
|
|
39
|
+
max_concurrency: int | None = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def normalize_event_types(event_types: EventTypesInput) -> set[EventType] | None:
|
|
43
|
+
"""Normalize user-provided event types to a validated set."""
|
|
44
|
+
|
|
45
|
+
if event_types is None:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
if isinstance(event_types, type):
|
|
49
|
+
_ensure_event_subclass(event_types)
|
|
50
|
+
return {event_types}
|
|
51
|
+
|
|
52
|
+
if isinstance(event_types, Iterable):
|
|
53
|
+
normalized: set[EventType] = set()
|
|
54
|
+
for item in event_types:
|
|
55
|
+
normalized.add(_ensure_event_subclass(item))
|
|
56
|
+
return normalized
|
|
57
|
+
|
|
58
|
+
raise TypeError(
|
|
59
|
+
f"event_types must be a type, iterable of types, or None; got {type(event_types).__name__}"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _ensure_event_subclass(event_type: Any) -> EventType:
|
|
64
|
+
"""Validate and return an event subclass."""
|
|
65
|
+
|
|
66
|
+
if not isinstance(event_type, type):
|
|
67
|
+
raise TypeError(
|
|
68
|
+
"event_types must contain Event subclasses; "
|
|
69
|
+
f"got instance of {type(event_type).__name__}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if not issubclass(event_type, Event):
|
|
73
|
+
raise TypeError(f"event_types must contain Event subclasses; got {event_type!r}")
|
|
74
|
+
|
|
75
|
+
return event_type
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def control_handler(
|
|
79
|
+
name: str,
|
|
80
|
+
*,
|
|
81
|
+
priority: int = 100,
|
|
82
|
+
event_types: EventTypesInput = None,
|
|
83
|
+
description: str | None = None,
|
|
84
|
+
) -> Callable[[TFunc], TFunc]:
|
|
85
|
+
"""Mark a function as a control handler policy."""
|
|
86
|
+
|
|
87
|
+
normalized_events = normalize_event_types(event_types)
|
|
88
|
+
|
|
89
|
+
def decorator(func: TFunc) -> TFunc:
|
|
90
|
+
metadata = EventDecoratorMetadata(
|
|
91
|
+
kind="control_handler",
|
|
92
|
+
name=name,
|
|
93
|
+
priority=priority,
|
|
94
|
+
event_types=normalized_events,
|
|
95
|
+
description=description,
|
|
96
|
+
)
|
|
97
|
+
func.__dict__[EVENT_METADATA_ATTR] = metadata
|
|
98
|
+
return func
|
|
99
|
+
|
|
100
|
+
return decorator
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def observer(
|
|
104
|
+
*,
|
|
105
|
+
event_types: EventTypesInput = None,
|
|
106
|
+
timeout: float | None = 5.0,
|
|
107
|
+
max_concurrency: int | None = None,
|
|
108
|
+
id: str | None = None,
|
|
109
|
+
) -> Callable[[TFunc], TFunc]:
|
|
110
|
+
"""Mark a function as an observer with runtime metadata."""
|
|
111
|
+
|
|
112
|
+
normalized_events = normalize_event_types(event_types)
|
|
113
|
+
|
|
114
|
+
if max_concurrency is not None and max_concurrency < 1:
|
|
115
|
+
raise ValueError("max_concurrency must be positive when provided")
|
|
116
|
+
|
|
117
|
+
def decorator(func: TFunc) -> TFunc:
|
|
118
|
+
metadata = EventDecoratorMetadata(
|
|
119
|
+
kind="observer",
|
|
120
|
+
event_types=normalized_events,
|
|
121
|
+
timeout=timeout,
|
|
122
|
+
max_concurrency=max_concurrency,
|
|
123
|
+
id=id,
|
|
124
|
+
)
|
|
125
|
+
func.__dict__[EVENT_METADATA_ATTR] = metadata
|
|
126
|
+
return func
|
|
127
|
+
|
|
128
|
+
return decorator
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
__all__ = [
|
|
132
|
+
"EVENT_METADATA_ATTR",
|
|
133
|
+
"EventDecoratorMetadata",
|
|
134
|
+
"EventType",
|
|
135
|
+
"EventTypesInput",
|
|
136
|
+
"control_handler",
|
|
137
|
+
"observer",
|
|
138
|
+
"normalize_event_types",
|
|
139
|
+
]
|