aline-ai 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
realign/worker_core.py CHANGED
@@ -14,13 +14,16 @@ import logging
14
14
  import math
15
15
  import os
16
16
  import sys
17
+ import time
18
+ from datetime import datetime, timezone
17
19
  from pathlib import Path
18
20
  from typing import Any, Dict, Optional
19
21
 
20
22
  from .db.sqlite_db import SQLiteDatabase
21
23
  from .db.locks import make_lock_owner
24
+ from .logging_config import setup_logger
22
25
 
23
- logger = logging.getLogger(__name__)
26
+ logger = setup_logger("realign.worker_core", "worker_core.log")
24
27
 
25
28
 
26
29
  class PermanentJobError(RuntimeError):
@@ -53,11 +56,10 @@ class AlineWorker:
53
56
  self.worker_id = make_lock_owner("worker")
54
57
  self.running = False
55
58
 
56
- # Reuse the existing commit pipeline (LLM turn creation) implemented on DialogueWatcher.
57
- # We instantiate it without starting its polling loop.
58
- from .watcher_core import DialogueWatcher
59
+ # Commit/turn processing pipeline (no watcher loop / polling).
60
+ from .commit_pipeline import CommitPipeline
59
61
 
60
- self._watcher = DialogueWatcher()
62
+ self._pipeline = CommitPipeline(lock_owner_prefix="worker")
61
63
 
62
64
  async def start(self) -> None:
63
65
  self.running = True
@@ -67,7 +69,7 @@ class AlineWorker:
67
69
  try:
68
70
  job = self.db.claim_next_job(
69
71
  worker_id=self.worker_id,
70
- kinds=["turn_summary", "session_summary", "agent_description"],
72
+ kinds=["session_process", "turn_summary", "session_summary", "agent_description"],
71
73
  )
72
74
  if not job:
73
75
  await asyncio.sleep(self.poll_interval_seconds)
@@ -91,6 +93,17 @@ class AlineWorker:
91
93
  payload = job.get("payload") or {}
92
94
 
93
95
  try:
96
+ if kind == "session_process":
97
+ await self._process_session_process_job(payload)
98
+ try:
99
+ session_id = str(payload.get("session_id") or "")
100
+ if session_id:
101
+ self.db.enqueue_session_summary_job(session_id=session_id)
102
+ except Exception as e:
103
+ logger.warning(f"Failed to enqueue session summary after session_process: {e}")
104
+ self.db.finish_job(job_id=job_id, worker_id=self.worker_id, success=True)
105
+ return
106
+
94
107
  if kind == "turn_summary":
95
108
  await self._process_turn_summary_job(payload)
96
109
  # Always enqueue a session_summary job after a successful turn job.
@@ -183,6 +196,188 @@ class AlineWorker:
183
196
  retry_after_seconds=delay,
184
197
  )
185
198
 
199
+ async def _process_session_process_job(self, payload: Dict[str, Any]) -> None:
200
+ t0 = time.monotonic()
201
+ session_id = str(payload.get("session_id") or "").strip()
202
+ session_file_path = Path(str(payload.get("session_file_path") or ""))
203
+ session_type_raw = str(payload.get("session_type") or "").strip().lower()
204
+ source_event = str(payload.get("source_event") or "").strip().lower()
205
+ workspace_path_raw = payload.get("workspace_path")
206
+ no_track = bool(payload.get("no_track") or False)
207
+ agent_id = str(payload.get("agent_id") or "").strip()
208
+ terminal_id = str(payload.get("terminal_id") or "").strip()
209
+
210
+ if not session_file_path:
211
+ raise ValueError(f"Invalid session_process payload: {payload}")
212
+ if not session_file_path.exists():
213
+ raise FileNotFoundError(f"Session file not found: {session_file_path}")
214
+ if not session_id:
215
+ session_id = session_file_path.stem
216
+
217
+ logger.info(
218
+ "session_process start: session_id=%s source_event=%s session_type=%s file=%s",
219
+ session_id,
220
+ source_event or "",
221
+ session_type_raw or "",
222
+ str(session_file_path),
223
+ )
224
+
225
+ session_type = session_type_raw or str(self._pipeline._detect_session_type(session_file_path))
226
+
227
+ if session_type == "codex" and (not agent_id or not terminal_id):
228
+ # Best-effort: infer linkage from Aline-managed CODEX_HOME layouts even if the notify
229
+ # runner didn't propagate env vars into the job payload.
230
+ try:
231
+ from .codex_home import agent_id_from_codex_session_file, terminal_id_from_codex_session_file
232
+
233
+ if not agent_id:
234
+ agent_id = str(agent_id_from_codex_session_file(session_file_path) or "").strip()
235
+ if not terminal_id:
236
+ terminal_id = str(terminal_id_from_codex_session_file(session_file_path) or "").strip()
237
+ except Exception:
238
+ pass
239
+
240
+ # We intentionally do NOT validate that workspace_path exists; it is used mostly
241
+ # for stable grouping/metadata, and the commit pipeline can operate without a real repo.
242
+ project_path: Path
243
+ if isinstance(workspace_path_raw, str) and workspace_path_raw.strip():
244
+ project_path = Path(workspace_path_raw.strip())
245
+ else:
246
+ # Best-effort: use session metadata for Codex; otherwise fall back to existing path.
247
+ project_path = session_file_path.parent
248
+ if session_type == "codex":
249
+ try:
250
+ from .codex_terminal_linker import read_codex_session_meta
251
+
252
+ meta = read_codex_session_meta(session_file_path)
253
+ if meta and (meta.cwd or "").strip():
254
+ project_path = Path(str(meta.cwd).strip())
255
+ except Exception:
256
+ pass
257
+
258
+ # Ensure the session record exists so agent associations are not lost on "no-op" jobs
259
+ # (e.g. when all turns are already committed and session_process returns early).
260
+ try:
261
+ started_at = datetime.fromtimestamp(session_file_path.stat().st_mtime)
262
+ if session_type == "codex":
263
+ try:
264
+ from .codex_terminal_linker import read_codex_session_meta
265
+
266
+ meta = read_codex_session_meta(session_file_path)
267
+ if meta and meta.started_at is not None:
268
+ dt = meta.started_at
269
+ if dt.tzinfo is not None:
270
+ dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
271
+ started_at = dt
272
+ except Exception:
273
+ pass
274
+
275
+ self.db.get_or_create_session(
276
+ session_id=session_id,
277
+ session_file_path=session_file_path,
278
+ session_type=session_type,
279
+ started_at=started_at,
280
+ workspace_path=str(project_path) if str(project_path).strip() else None,
281
+ metadata={"source_event": source_event or "", "source": "worker"},
282
+ agent_id=agent_id or None,
283
+ )
284
+ except Exception:
285
+ pass
286
+
287
+ # Best-effort: link session to agent/terminal metadata even when no turns need processing.
288
+ if agent_id and session_id:
289
+ try:
290
+ self.db.update_session_agent_id(session_id, agent_id)
291
+ except Exception:
292
+ pass
293
+ if terminal_id and session_id:
294
+ try:
295
+ self.db.insert_window_link(
296
+ terminal_id=terminal_id,
297
+ agent_id=agent_id or None,
298
+ session_id=session_id,
299
+ provider=session_type or session_type_raw or "",
300
+ source=f"{session_type or session_type_raw or 'unknown'}:worker",
301
+ ts=time.time(),
302
+ )
303
+ except Exception:
304
+ pass
305
+
306
+ # Determine the safe "max turn" boundary.
307
+ if source_event == "stop":
308
+ max_turn = int(self._pipeline._get_total_turn_count(session_file_path))
309
+ else:
310
+ # Polling/idle paths: respect trigger semantics (Claude excludes last turn).
311
+ max_turn = int(self._pipeline._count_complete_turns(session_file_path))
312
+
313
+ if max_turn <= 0:
314
+ logger.info(
315
+ "session_process noop: session_id=%s max_turn=%s duration_s=%.3f",
316
+ session_id,
317
+ max_turn,
318
+ time.monotonic() - t0,
319
+ )
320
+ return
321
+
322
+ committed = set()
323
+ try:
324
+ committed = self.db.get_committed_turn_numbers(session_id)
325
+ except Exception:
326
+ committed = set()
327
+
328
+ processed = 0
329
+ skipped = 0
330
+
331
+ missing_turns = [t for t in range(1, max_turn + 1) if t not in committed]
332
+ if not missing_turns:
333
+ logger.info(
334
+ "session_process noop: session_id=%s max_turn=%s duration_s=%.3f",
335
+ session_id,
336
+ max_turn,
337
+ time.monotonic() - t0,
338
+ )
339
+ return
340
+
341
+ # Batch turns under one project lease lock to reduce overhead.
342
+ if len(missing_turns) > 1 and hasattr(self._pipeline, "_run_realign_commit_batch"):
343
+ results = self._pipeline._run_realign_commit_batch(
344
+ project_path,
345
+ session_file=session_file_path,
346
+ target_turns=missing_turns,
347
+ quiet=True,
348
+ skip_session_summary=True,
349
+ no_track=no_track,
350
+ )
351
+ for t in missing_turns:
352
+ if results.get(int(t)):
353
+ committed.add(int(t))
354
+ processed += 1
355
+ await asyncio.sleep(0)
356
+ else:
357
+ # Fallback: per-turn commit (single missing turn or old pipeline).
358
+ for turn_number in missing_turns:
359
+ created = self._pipeline._run_realign_commit(
360
+ project_path,
361
+ session_file=session_file_path,
362
+ target_turn=int(turn_number),
363
+ quiet=True,
364
+ skip_session_summary=True,
365
+ no_track=no_track,
366
+ )
367
+ if created:
368
+ committed.add(int(turn_number))
369
+ processed += 1
370
+ await asyncio.sleep(0)
371
+
372
+ logger.info(
373
+ "session_process done: session_id=%s max_turn=%s processed=%s skipped=%s duration_s=%.3f",
374
+ session_id,
375
+ max_turn,
376
+ processed,
377
+ skipped,
378
+ time.monotonic() - t0,
379
+ )
380
+
186
381
  async def _process_turn_summary_job(self, payload: Dict[str, Any]) -> None:
187
382
  session_id = str(payload.get("session_id") or "")
188
383
  turn_number = int(payload.get("turn_number") or 0)
@@ -201,19 +396,15 @@ class AlineWorker:
201
396
  if not session_file_path.exists():
202
397
  raise FileNotFoundError(f"Session file not found: {session_file_path}")
203
398
 
204
- project_path: Optional[Path] = None
399
+ # Intentionally avoid expensive/fragile project path extraction. If no explicit path is
400
+ # provided, fall back to the session file's parent directory.
205
401
  if isinstance(workspace_path_raw, str) and workspace_path_raw.strip():
206
402
  project_path = Path(workspace_path_raw.strip())
207
- if not project_path.exists():
208
- project_path = None
209
- if project_path is None:
210
- project_path = self._watcher._extract_project_path(session_file_path)
211
-
212
- if not project_path:
213
- raise RuntimeError(f"Could not determine project path for session {session_id}")
403
+ else:
404
+ project_path = session_file_path.parent
214
405
 
215
406
  # Run the existing commit pipeline (writes turn record + content).
216
- created = self._watcher._run_realign_commit(
407
+ created = self._pipeline._run_realign_commit(
217
408
  project_path,
218
409
  session_file=session_file_path,
219
410
  target_turn=turn_number,