kubiya-control-plane-api 0.1.0__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kubiya-control-plane-api might be problematic. Click here for more details.
- control_plane_api/README.md +266 -0
- control_plane_api/__init__.py +0 -0
- control_plane_api/__version__.py +1 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +98 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/1382bec74309_initial_migration_with_all_models.py +251 -0
- control_plane_api/alembic/versions/1f54bc2a37e3_add_analytics_tables.py +162 -0
- control_plane_api/alembic/versions/2e4cb136dc10_rename_toolset_ids_to_skill_ids_in_teams.py +30 -0
- control_plane_api/alembic/versions/31cd69a644ce_add_skill_templates_table.py +28 -0
- control_plane_api/alembic/versions/89e127caa47d_add_jobs_and_job_executions_tables.py +161 -0
- control_plane_api/alembic/versions/add_llm_models_table.py +51 -0
- control_plane_api/alembic/versions/b0e10697f212_add_runtime_column_to_teams_simple.py +42 -0
- control_plane_api/alembic/versions/ce43b24b63bf_add_execution_trigger_source_and_fix_.py +155 -0
- control_plane_api/alembic/versions/d4eaf16e3f8d_rename_toolsets_to_skills.py +84 -0
- control_plane_api/alembic/versions/efa2dc427da1_rename_metadata_to_custom_metadata.py +32 -0
- control_plane_api/alembic/versions/f973b431d1ce_add_workflow_executor_to_skill_types.py +44 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +379 -0
- control_plane_api/app/activities/team_activities.py +410 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +577 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +354 -0
- control_plane_api/app/config/model_pricing.py +318 -0
- control_plane_api/app/config.py +95 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/job_executor.py +312 -0
- control_plane_api/app/lib/kubiya_client.py +235 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/planning_tools/__init__.py +22 -0
- control_plane_api/app/lib/planning_tools/agents.py +155 -0
- control_plane_api/app/lib/planning_tools/base.py +189 -0
- control_plane_api/app/lib/planning_tools/environments.py +214 -0
- control_plane_api/app/lib/planning_tools/resources.py +240 -0
- control_plane_api/app/lib/planning_tools/teams.py +198 -0
- control_plane_api/app/lib/policy_enforcer_client.py +939 -0
- control_plane_api/app/lib/redis_client.py +436 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/temporal_client.py +138 -0
- control_plane_api/app/lib/validation/__init__.py +20 -0
- control_plane_api/app/lib/validation/runtime_validation.py +287 -0
- control_plane_api/app/main.py +128 -0
- control_plane_api/app/middleware/__init__.py +8 -0
- control_plane_api/app/middleware/auth.py +513 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +27 -0
- control_plane_api/app/models/agent.py +79 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +81 -0
- control_plane_api/app/models/environment.py +63 -0
- control_plane_api/app/models/execution.py +93 -0
- control_plane_api/app/models/job.py +179 -0
- control_plane_api/app/models/llm_model.py +75 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +47 -0
- control_plane_api/app/models/session.py +38 -0
- control_plane_api/app/models/team.py +66 -0
- control_plane_api/app/models/workflow.py +55 -0
- control_plane_api/app/policies/README.md +121 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +364 -0
- control_plane_api/app/routers/agents_v2.py +1260 -0
- control_plane_api/app/routers/analytics.py +1014 -0
- control_plane_api/app/routers/context_manager.py +562 -0
- control_plane_api/app/routers/environment_context.py +270 -0
- control_plane_api/app/routers/environments.py +715 -0
- control_plane_api/app/routers/execution_environment.py +517 -0
- control_plane_api/app/routers/executions.py +1911 -0
- control_plane_api/app/routers/health.py +92 -0
- control_plane_api/app/routers/health_v2.py +326 -0
- control_plane_api/app/routers/integrations.py +274 -0
- control_plane_api/app/routers/jobs.py +1344 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +361 -0
- control_plane_api/app/routers/policies.py +639 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +902 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +155 -0
- control_plane_api/app/routers/skills.py +1001 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/task_planning.py +1256 -0
- control_plane_api/app/routers/task_queues.py +654 -0
- control_plane_api/app/routers/team_context.py +270 -0
- control_plane_api/app/routers/teams.py +1400 -0
- control_plane_api/app/routers/worker_queues.py +1545 -0
- control_plane_api/app/routers/workers.py +935 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/job_schemas.py +295 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_service.py +619 -0
- control_plane_api/app/services/litellm_service.py +190 -0
- control_plane_api/app/services/policy_service.py +525 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/skills/__init__.py +44 -0
- control_plane_api/app/skills/base.py +229 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/data_visualization.py +154 -0
- control_plane_api/app/skills/docker.py +104 -0
- control_plane_api/app/skills/file_generation.py +94 -0
- control_plane_api/app/skills/file_system.py +110 -0
- control_plane_api/app/skills/python.py +92 -0
- control_plane_api/app/skills/registry.py +65 -0
- control_plane_api/app/skills/shell.py +102 -0
- control_plane_api/app/skills/workflow_executor.py +469 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +507 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +222 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/team_execution.py +399 -0
- control_plane_api/scripts/seed_models.py +239 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1241 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/runtime_activities.py +388 -0
- control_plane_api/worker/activities/skill_activities.py +267 -0
- control_plane_api/worker/activities/team_activities.py +1217 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +275 -0
- control_plane_api/worker/control_plane_client.py +529 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +31 -0
- control_plane_api/worker/runtimes/base.py +789 -0
- control_plane_api/worker/runtimes/claude_code_runtime.py +1443 -0
- control_plane_api/worker/runtimes/default_runtime.py +617 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_executor.py +422 -0
- control_plane_api/worker/services/agent_executor_v2.py +383 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/data_visualization.py +827 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +194 -0
- control_plane_api/worker/services/skill_factory.py +175 -0
- control_plane_api/worker/services/team_executor.py +574 -0
- control_plane_api/worker/services/team_executor_v2.py +465 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1418 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +305 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +373 -0
- control_plane_api/worker/worker.py +753 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +589 -0
- control_plane_api/worker/workflows/team_execution.py +429 -0
- kubiya_control_plane_api-0.3.4.dist-info/METADATA +229 -0
- kubiya_control_plane_api-0.3.4.dist-info/RECORD +182 -0
- kubiya_control_plane_api-0.3.4.dist-info/entry_points.txt +2 -0
- kubiya_control_plane_api-0.3.4.dist-info/top_level.txt +1 -0
- kubiya_control_plane_api-0.1.0.dist-info/METADATA +0 -66
- kubiya_control_plane_api-0.1.0.dist-info/RECORD +0 -5
- kubiya_control_plane_api-0.1.0.dist-info/top_level.txt +0 -1
- {kubiya_control_plane_api-0.1.0.dist-info/licenses → control_plane_api}/LICENSE +0 -0
- {kubiya_control_plane_api-0.1.0.dist-info → kubiya_control_plane_api-0.3.4.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Smart chunk batching for streaming to reduce HTTP requests.
|
|
3
|
+
|
|
4
|
+
Instead of sending one POST per chunk (50-70 requests), batch chunks
|
|
5
|
+
with configurable time/size windows (5-10 requests).
|
|
6
|
+
|
|
7
|
+
Batching Strategy:
|
|
8
|
+
- Time window: Flush after X ms (default: 100ms)
|
|
9
|
+
- Size window: Flush when batch reaches Y bytes (default: 100 bytes)
|
|
10
|
+
- Immediate flush: On tool events, errors, or completion
|
|
11
|
+
|
|
12
|
+
This provides:
|
|
13
|
+
- 90%+ reduction in HTTP requests
|
|
14
|
+
- Still feels real-time (100ms is imperceptible)
|
|
15
|
+
- Lower latency (fewer round trips)
|
|
16
|
+
- Better serverless performance (fewer cold starts)
|
|
17
|
+
- Lower costs (fewer invocations)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import asyncio
|
|
21
|
+
import time
|
|
22
|
+
from typing import Dict, Any, Optional, Callable
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
import structlog
|
|
25
|
+
|
|
26
|
+
logger = structlog.get_logger()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class BatchConfig:
|
|
31
|
+
"""Configuration for chunk batching behavior."""
|
|
32
|
+
|
|
33
|
+
# Time-based batching: flush after this many milliseconds
|
|
34
|
+
time_window_ms: int = 100
|
|
35
|
+
|
|
36
|
+
# Size-based batching: flush when accumulated content reaches this size
|
|
37
|
+
size_window_bytes: int = 100
|
|
38
|
+
|
|
39
|
+
# Maximum batch size before forced flush (safety limit)
|
|
40
|
+
max_batch_size_bytes: int = 1000
|
|
41
|
+
|
|
42
|
+
# Enable/disable batching (for testing/debugging)
|
|
43
|
+
enabled: bool = True
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_env(cls) -> "BatchConfig":
|
|
47
|
+
"""
|
|
48
|
+
Create configuration from environment variables.
|
|
49
|
+
|
|
50
|
+
Environment variables:
|
|
51
|
+
CHUNK_BATCHING_ENABLED: Enable/disable batching (default: true)
|
|
52
|
+
CHUNK_BATCHING_TIME_WINDOW_MS: Time window in ms (default: 100)
|
|
53
|
+
CHUNK_BATCHING_SIZE_WINDOW_BYTES: Size window in bytes (default: 100)
|
|
54
|
+
CHUNK_BATCHING_MAX_SIZE_BYTES: Max batch size in bytes (default: 1000)
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
BatchConfig instance with values from environment
|
|
58
|
+
"""
|
|
59
|
+
import os
|
|
60
|
+
|
|
61
|
+
return cls(
|
|
62
|
+
enabled=os.getenv("CHUNK_BATCHING_ENABLED", "true").lower() == "true",
|
|
63
|
+
time_window_ms=int(os.getenv("CHUNK_BATCHING_TIME_WINDOW_MS", "100")),
|
|
64
|
+
size_window_bytes=int(os.getenv("CHUNK_BATCHING_SIZE_WINDOW_BYTES", "100")),
|
|
65
|
+
max_batch_size_bytes=int(os.getenv("CHUNK_BATCHING_MAX_SIZE_BYTES", "1000")),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class ContentBatch:
|
|
71
|
+
"""Accumulated content chunks waiting to be flushed."""
|
|
72
|
+
|
|
73
|
+
chunks: list[str] = field(default_factory=list)
|
|
74
|
+
total_size: int = 0
|
|
75
|
+
first_chunk_time: Optional[float] = None
|
|
76
|
+
|
|
77
|
+
def add(self, content: str) -> None:
|
|
78
|
+
"""Add content to the batch."""
|
|
79
|
+
self.chunks.append(content)
|
|
80
|
+
self.total_size += len(content.encode('utf-8'))
|
|
81
|
+
|
|
82
|
+
if self.first_chunk_time is None:
|
|
83
|
+
self.first_chunk_time = time.time()
|
|
84
|
+
|
|
85
|
+
def get_combined_content(self) -> str:
|
|
86
|
+
"""Get all chunks combined into single string."""
|
|
87
|
+
return ''.join(self.chunks)
|
|
88
|
+
|
|
89
|
+
def clear(self) -> None:
|
|
90
|
+
"""Clear the batch after flushing."""
|
|
91
|
+
self.chunks.clear()
|
|
92
|
+
self.total_size = 0
|
|
93
|
+
self.first_chunk_time = None
|
|
94
|
+
|
|
95
|
+
def is_empty(self) -> bool:
|
|
96
|
+
"""Check if batch is empty."""
|
|
97
|
+
return len(self.chunks) == 0
|
|
98
|
+
|
|
99
|
+
def age_ms(self) -> float:
|
|
100
|
+
"""Get age of batch in milliseconds."""
|
|
101
|
+
if self.first_chunk_time is None:
|
|
102
|
+
return 0
|
|
103
|
+
return (time.time() - self.first_chunk_time) * 1000
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class ChunkBatcher:
|
|
107
|
+
"""
|
|
108
|
+
Smart batching for streaming chunks to reduce HTTP requests.
|
|
109
|
+
|
|
110
|
+
Usage:
|
|
111
|
+
batcher = ChunkBatcher(
|
|
112
|
+
publish_func=control_plane.publish_event,
|
|
113
|
+
execution_id=execution_id,
|
|
114
|
+
message_id=message_id,
|
|
115
|
+
config=BatchConfig(time_window_ms=100, size_window_bytes=100)
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Add chunks as they arrive
|
|
119
|
+
await batcher.add_chunk("Hello")
|
|
120
|
+
await batcher.add_chunk(" world")
|
|
121
|
+
|
|
122
|
+
# Flush remaining chunks when done
|
|
123
|
+
await batcher.flush()
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
def __init__(
|
|
127
|
+
self,
|
|
128
|
+
publish_func: Callable,
|
|
129
|
+
execution_id: str,
|
|
130
|
+
message_id: str,
|
|
131
|
+
config: Optional[BatchConfig] = None
|
|
132
|
+
):
|
|
133
|
+
self.publish_func = publish_func
|
|
134
|
+
self.execution_id = execution_id
|
|
135
|
+
self.message_id = message_id
|
|
136
|
+
self.config = config or BatchConfig()
|
|
137
|
+
|
|
138
|
+
self.batch = ContentBatch()
|
|
139
|
+
self._flush_task: Optional[asyncio.Task] = None
|
|
140
|
+
self._stats = {
|
|
141
|
+
"chunks_received": 0,
|
|
142
|
+
"batches_sent": 0,
|
|
143
|
+
"bytes_sent": 0,
|
|
144
|
+
"flushes_by_time": 0,
|
|
145
|
+
"flushes_by_size": 0,
|
|
146
|
+
"flushes_manual": 0,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
async def add_chunk(self, content: str) -> None:
|
|
150
|
+
"""
|
|
151
|
+
Add a chunk to the batch.
|
|
152
|
+
|
|
153
|
+
Automatically flushes if:
|
|
154
|
+
- Batch size exceeds size_window_bytes
|
|
155
|
+
- Batch age exceeds time_window_ms
|
|
156
|
+
- Max batch size is reached (safety)
|
|
157
|
+
"""
|
|
158
|
+
if not self.config.enabled:
|
|
159
|
+
# Batching disabled - send immediately
|
|
160
|
+
await self._publish_batch([content])
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
self._stats["chunks_received"] += 1
|
|
164
|
+
self.batch.add(content)
|
|
165
|
+
|
|
166
|
+
# Check if we should flush immediately due to size
|
|
167
|
+
should_flush_size = self.batch.total_size >= self.config.size_window_bytes
|
|
168
|
+
should_flush_max = self.batch.total_size >= self.config.max_batch_size_bytes
|
|
169
|
+
|
|
170
|
+
if should_flush_max:
|
|
171
|
+
# Safety: flush immediately if max size reached
|
|
172
|
+
logger.debug(
|
|
173
|
+
"Flushing batch (max size reached)",
|
|
174
|
+
execution_id=self.execution_id[:8],
|
|
175
|
+
batch_size=self.batch.total_size,
|
|
176
|
+
chunk_count=len(self.batch.chunks),
|
|
177
|
+
)
|
|
178
|
+
await self.flush(reason="max_size")
|
|
179
|
+
elif should_flush_size:
|
|
180
|
+
# Size threshold reached - flush now
|
|
181
|
+
await self.flush(reason="size")
|
|
182
|
+
else:
|
|
183
|
+
# Start/reset timer for time-based flush
|
|
184
|
+
await self._schedule_time_flush()
|
|
185
|
+
|
|
186
|
+
async def _schedule_time_flush(self) -> None:
|
|
187
|
+
"""Schedule a time-based flush if not already scheduled."""
|
|
188
|
+
if self._flush_task is not None and not self._flush_task.done():
|
|
189
|
+
# Timer already running
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
self._flush_task = asyncio.create_task(self._time_based_flush())
|
|
193
|
+
|
|
194
|
+
async def _time_based_flush(self) -> None:
|
|
195
|
+
"""Wait for time window, then flush."""
|
|
196
|
+
await asyncio.sleep(self.config.time_window_ms / 1000.0)
|
|
197
|
+
|
|
198
|
+
if not self.batch.is_empty():
|
|
199
|
+
await self.flush(reason="time")
|
|
200
|
+
|
|
201
|
+
async def flush(self, reason: str = "manual") -> None:
|
|
202
|
+
"""
|
|
203
|
+
Flush current batch immediately.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
reason: Why flush was triggered (for stats/debugging)
|
|
207
|
+
"""
|
|
208
|
+
if self.batch.is_empty():
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
# Cancel pending timer if any
|
|
212
|
+
if self._flush_task is not None and not self._flush_task.done():
|
|
213
|
+
self._flush_task.cancel()
|
|
214
|
+
try:
|
|
215
|
+
await self._flush_task
|
|
216
|
+
except asyncio.CancelledError:
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
# Publish the batch
|
|
220
|
+
chunks = self.batch.chunks.copy()
|
|
221
|
+
await self._publish_batch(chunks)
|
|
222
|
+
|
|
223
|
+
# Update stats
|
|
224
|
+
if reason == "time":
|
|
225
|
+
self._stats["flushes_by_time"] += 1
|
|
226
|
+
elif reason == "size" or reason == "max_size":
|
|
227
|
+
self._stats["flushes_by_size"] += 1
|
|
228
|
+
else:
|
|
229
|
+
self._stats["flushes_manual"] += 1
|
|
230
|
+
|
|
231
|
+
# Clear batch
|
|
232
|
+
self.batch.clear()
|
|
233
|
+
|
|
234
|
+
async def _publish_batch(self, chunks: list[str]) -> None:
|
|
235
|
+
"""Publish a batch of chunks as single event."""
|
|
236
|
+
combined_content = ''.join(chunks)
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
# Call publish_func (could be sync or async)
|
|
240
|
+
result = self.publish_func(
|
|
241
|
+
execution_id=self.execution_id,
|
|
242
|
+
event_type="message_chunk",
|
|
243
|
+
data={
|
|
244
|
+
"role": "assistant",
|
|
245
|
+
"content": combined_content,
|
|
246
|
+
"is_chunk": True,
|
|
247
|
+
"message_id": self.message_id,
|
|
248
|
+
# Metadata for debugging
|
|
249
|
+
"batch_info": {
|
|
250
|
+
"chunk_count": len(chunks),
|
|
251
|
+
"batch_size": len(combined_content.encode('utf-8')),
|
|
252
|
+
} if len(chunks) > 1 else None,
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Await if it's a coroutine
|
|
257
|
+
if asyncio.iscoroutine(result):
|
|
258
|
+
await result
|
|
259
|
+
|
|
260
|
+
self._stats["batches_sent"] += 1
|
|
261
|
+
self._stats["bytes_sent"] += len(combined_content.encode('utf-8'))
|
|
262
|
+
|
|
263
|
+
except Exception as e:
|
|
264
|
+
logger.warning(
|
|
265
|
+
"Failed to publish batch",
|
|
266
|
+
execution_id=self.execution_id[:8],
|
|
267
|
+
error=str(e),
|
|
268
|
+
chunk_count=len(chunks),
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
272
|
+
"""
|
|
273
|
+
Get batching statistics.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Dict with stats about batching performance
|
|
277
|
+
"""
|
|
278
|
+
chunks_received = self._stats["chunks_received"]
|
|
279
|
+
batches_sent = self._stats["batches_sent"]
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
**self._stats,
|
|
283
|
+
"reduction_percent": round(
|
|
284
|
+
(1 - batches_sent / max(chunks_received, 1)) * 100, 1
|
|
285
|
+
) if chunks_received > 0 else 0,
|
|
286
|
+
"avg_batch_size": round(
|
|
287
|
+
chunks_received / max(batches_sent, 1), 1
|
|
288
|
+
) if batches_sent > 0 else 0,
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
async def close(self) -> None:
|
|
292
|
+
"""
|
|
293
|
+
Close the batcher and flush remaining chunks.
|
|
294
|
+
|
|
295
|
+
Call this when streaming is complete.
|
|
296
|
+
"""
|
|
297
|
+
await self.flush(reason="close")
|
|
298
|
+
|
|
299
|
+
# Log stats
|
|
300
|
+
stats = self.get_stats()
|
|
301
|
+
logger.info(
|
|
302
|
+
"Chunk batching stats",
|
|
303
|
+
execution_id=self.execution_id[:8],
|
|
304
|
+
**stats
|
|
305
|
+
)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Retry utilities with exponential backoff"""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import httpx
|
|
5
|
+
from typing import Callable, TypeVar, Optional
|
|
6
|
+
from functools import wraps
|
|
7
|
+
import structlog
|
|
8
|
+
|
|
9
|
+
logger = structlog.get_logger()
|
|
10
|
+
|
|
11
|
+
T = TypeVar('T')
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def retry_with_backoff(
|
|
15
|
+
max_retries: int = 3,
|
|
16
|
+
initial_delay: float = 1.0,
|
|
17
|
+
backoff_factor: float = 2.0,
|
|
18
|
+
exceptions: tuple = (httpx.TimeoutException, httpx.ConnectError)
|
|
19
|
+
):
|
|
20
|
+
"""
|
|
21
|
+
Decorator for retrying functions with exponential backoff
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
max_retries: Maximum number of retry attempts
|
|
25
|
+
initial_delay: Initial delay in seconds
|
|
26
|
+
backoff_factor: Multiplier for each retry (exponential)
|
|
27
|
+
exceptions: Tuple of exceptions to catch and retry
|
|
28
|
+
"""
|
|
29
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
30
|
+
@wraps(func)
|
|
31
|
+
def wrapper(*args, **kwargs) -> T:
|
|
32
|
+
last_exception = None
|
|
33
|
+
|
|
34
|
+
for attempt in range(max_retries):
|
|
35
|
+
try:
|
|
36
|
+
return func(*args, **kwargs)
|
|
37
|
+
except exceptions as e:
|
|
38
|
+
last_exception = e
|
|
39
|
+
if attempt < max_retries - 1:
|
|
40
|
+
delay = initial_delay * (backoff_factor ** attempt)
|
|
41
|
+
logger.warning(
|
|
42
|
+
f"{func.__name__} failed, retrying...",
|
|
43
|
+
extra={
|
|
44
|
+
"attempt": attempt + 1,
|
|
45
|
+
"max_retries": max_retries,
|
|
46
|
+
"delay": delay,
|
|
47
|
+
"error": str(e)
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
time.sleep(delay)
|
|
51
|
+
else:
|
|
52
|
+
logger.error(
|
|
53
|
+
f"{func.__name__} failed after {max_retries} attempts",
|
|
54
|
+
extra={"error": str(e)}
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
raise last_exception
|
|
58
|
+
|
|
59
|
+
return wrapper
|
|
60
|
+
return decorator
|