@aj-archipelago/cortex 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/config.js +25 -4
  2. package/helper-apps/cortex-autogen2/agents.py +19 -6
  3. package/helper-apps/cortex-autogen2/services/azure_ai_search.py +115 -0
  4. package/helper-apps/cortex-autogen2/services/run_analyzer.py +594 -0
  5. package/helper-apps/cortex-autogen2/task_processor.py +98 -2
  6. package/lib/crypto.js +1 -0
  7. package/lib/entityConstants.js +12 -35
  8. package/lib/keyValueStorageClient.js +53 -1
  9. package/lib/util.js +33 -6
  10. package/package.json +2 -1
  11. package/pathways/system/entity/memory/sys_memory_manager.js +1 -0
  12. package/pathways/system/entity/memory/sys_memory_process.js +4 -3
  13. package/pathways/system/entity/memory/sys_memory_update.js +4 -3
  14. package/pathways/system/entity/memory/sys_read_memory.js +12 -4
  15. package/pathways/system/entity/memory/sys_save_memory.js +16 -9
  16. package/pathways/system/entity/memory/sys_search_memory.js +5 -4
  17. package/pathways/system/entity/sys_entity_agent.js +2 -1
  18. package/pathways/system/entity/tools/sys_tool_bing_search.js +2 -2
  19. package/pathways/system/entity/tools/sys_tool_bing_search_afagent.js +1 -2
  20. package/pathways/system/entity/tools/sys_tool_callmodel.js +2 -1
  21. package/pathways/system/entity/tools/sys_tool_coding.js +1 -2
  22. package/pathways/system/entity/tools/sys_tool_grok_x_search.js +1 -1
  23. package/pathways/system/entity/tools/sys_tool_image.js +2 -1
  24. package/pathways/system/entity/tools/sys_tool_image_gemini.js +3 -3
  25. package/pathways/system/entity/tools/sys_tool_mermaid.js +187 -38
  26. package/pathways/system/entity/tools/sys_tool_reasoning.js +2 -0
  27. package/pathways/system/entity/tools/sys_tool_verify.js +1 -1
  28. package/pathways/transcribe_gemini.js +3 -2
  29. package/server/graphql.js +1 -1
  30. package/server/pathwayResolver.js +8 -7
  31. package/server/plugins/veoVideoPlugin.js +29 -1
  32. package/testrun.log +35371 -0
  33. package/tests/integration/graphql/async/stream/vendors/openai_streaming.test.js +1 -3
  34. package/tests/unit/core/crypto.test.js +65 -0
  35. package/tests/unit/core/doubleEncryptionStorageClient.test.js +262 -0
@@ -0,0 +1,594 @@
1
+ import re
2
+ import os
3
+ import json
4
+ from datetime import datetime
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ from autogen_core.models import UserMessage
8
+
9
+
10
+ MASK = "***"
11
+ DEFAULT_KEY_PHRASE = "Validate environment and paths early"
12
+
13
+
14
+ def _truncate(text: Optional[str], limit: int = 2000) -> str:
15
+ if not text:
16
+ return ""
17
+ t = str(text)
18
+ if len(t) <= limit:
19
+ return t
20
+ return t[:limit] + "\n... [truncated]"
21
+
22
+
23
+ def redact(text: Optional[str]) -> str:
24
+ """
25
+ Redact tokens, SAS query params, and bearer headers from the given text.
26
+ Keeps general readability while removing secrets.
27
+ """
28
+ if not text:
29
+ return ""
30
+ s = str(text)
31
+ # Mask Authorization Bearer tokens
32
+ s = re.sub(r"(?i)(authorization\s*:\s*bearer\s+)([^\s]+)", rf"\1{MASK}", s)
33
+ # Mask common SAS params 'sig' and 'se'
34
+ s = re.sub(r"(?i)([?&]sig=)([^&\s]+)", rf"\1{MASK}", s)
35
+ s = re.sub(r"(?i)([?&]se=)([^&\s]+)", rf"\1{MASK}", s)
36
+ # Collapse long base64-like runs
37
+ s = re.sub(r"([A-Za-z0-9+/]{64,}={0,2})", MASK, s)
38
+ return s
39
+
40
+
41
+ def collect_run_metrics(messages: List[Any]) -> Dict[str, Any]:
42
+ turns = len(messages or [])
43
+ tool_calls = 0
44
+ error_mentions = 0
45
+ schema_err_mentions = 0
46
+
47
+ for idx, m in enumerate(messages or []):
48
+ try:
49
+ mtype = getattr(m, "type", None)
50
+ content = getattr(m, "content", None)
51
+ text = str(content) if content is not None else ""
52
+ if mtype == "ToolCallExecutionEvent":
53
+ tool_calls += 1
54
+ low = text.lower()
55
+ if any(x in low for x in ("error", "exception", "traceback", "task not completed")):
56
+ error_mentions += 1
57
+ if ("tool_calls" in text) and ("MultiMessage" in text):
58
+ schema_err_mentions += 1
59
+ except Exception:
60
+ continue
61
+
62
+ return {
63
+ "turnCount": turns,
64
+ "toolCallCount": tool_calls,
65
+ "errorMentions": error_mentions,
66
+ "schemaErrorMentions": schema_err_mentions,
67
+ }
68
+
69
+
70
+ def extract_errors(messages: List[Any]) -> List[Dict[str, Any]]:
71
+ """
72
+ Extract concrete error signals from the stream with strict filtering to avoid advice-like lines.
73
+ Includes:
74
+ - Tool execution errors (ToolCallExecutionEvent items flagged is_error)
75
+ - Python/stack traces and typical 'Error:'/'Exception:' lines
76
+ - Explicit 'TASK NOT COMPLETED:' markers
77
+ Excludes:
78
+ - Advice like 'include error handling', 'no errors', etc.
79
+ """
80
+ def classify_error_line(line: str) -> Optional[str]:
81
+ s = (line or "").strip()
82
+ if not s:
83
+ return None
84
+ low = s.lower()
85
+ # Exclusions: advisory phrases
86
+ if any(p in low for p in ["error handling", "handle errors", "no errors", "without errors", "few errors", "low error"]):
87
+ return None
88
+ # Strong markers
89
+ if "task not completed" in low:
90
+ return "termination"
91
+ if "traceback (most recent call last)" in low:
92
+ return "traceback"
93
+ # Common error prefixes or exception class names
94
+ if re.search(r"^(\s*(error|exception)\b|[A-Za-z]+Error:|[A-Za-z]+Exception:)", s, re.IGNORECASE):
95
+ return "runtime"
96
+ # Generic lines that merely include the word 'error' are ignored
97
+ return None
98
+
99
+ results: List[Dict[str, Any]] = []
100
+ seen: set = set()
101
+
102
+ for idx, m in enumerate(messages or []):
103
+ try:
104
+ src = getattr(m, "source", None)
105
+ created_at = getattr(m, "created_at", None)
106
+
107
+ # 1) Tool execution errors with is_error flag
108
+ mtype = getattr(m, "type", None)
109
+ if mtype == "ToolCallExecutionEvent" and hasattr(m, "content") and isinstance(getattr(m, "content"), list):
110
+ try:
111
+ for res in getattr(m, "content"):
112
+ try:
113
+ if hasattr(res, "is_error") and getattr(res, "is_error"):
114
+ msg = _truncate(redact(str(getattr(res, "content", "") or "")), 512)
115
+ low = msg.lower()
116
+ key = (low[:120], src)
117
+ if key in seen:
118
+ continue
119
+ seen.add(key)
120
+ results.append({
121
+ "type": "tool_error",
122
+ "message": msg,
123
+ "source": src or "unknown",
124
+ "firstSeenIndex": idx,
125
+ "createdAt": str(created_at) if created_at else None,
126
+ })
127
+ except Exception:
128
+ continue
129
+ except Exception:
130
+ pass
131
+
132
+ # 2) Parse textual lines for strong error markers
133
+ content = getattr(m, "content", None)
134
+ text = str(content) if content is not None else ""
135
+ for line in (text.splitlines() if text else []):
136
+ kind = classify_error_line(line)
137
+ if not kind:
138
+ continue
139
+ msg = _truncate(redact(line.strip()), 512)
140
+ low = msg.lower()
141
+ key = (low[:120], src)
142
+ if key in seen:
143
+ continue
144
+ seen.add(key)
145
+ # Capture specific dependency/image/source hints for reuse
146
+ entry = {
147
+ "type": kind,
148
+ "message": msg,
149
+ "source": src or "unknown",
150
+ "firstSeenIndex": idx,
151
+ "createdAt": str(created_at) if created_at else None,
152
+ }
153
+ # Categories intentionally not assigned; rely on LLM to infer lessons
154
+ results.append(entry)
155
+ except Exception:
156
+ continue
157
+
158
+ return results
159
+
160
+
161
+ async def summarize_learnings(messages_text: str, errors_text: str, model_client) -> Tuple[str, str]:
162
+ """
163
+ Return (best_practices_text, antipatterns_text) using the LLM; avoid static heuristics.
164
+ """
165
+ try:
166
+ prompt = f"""
167
+ You are a senior reliability engineer extracting high-value, reusable lessons from an agent transcript.
168
+
169
+ Task: Produce two sections with concise bullets (≤18 words each):
170
+ 1) BEST PRACTICES (5–10 bullets): concrete, repeatable actions that prevent failures and speed future runs
171
+ 2) ANTIPATTERNS (5 bullets): mistakes to avoid
172
+
173
+ Rules:
174
+ - No secrets or environment-specific values
175
+ - Prefer actionable checks (dependency preflight, schema validation), robust fallbacks, and proven fast paths
176
+ - Reflect image acquisition pitfalls (network blocks, non-image payloads, licensing) if present
177
+
178
+ TRANSCRIPT (redacted):
179
+ {_truncate(messages_text, 6000)}
180
+
181
+ ERROR EXCERPTS:
182
+ {_truncate(errors_text, 2000)}
183
+
184
+ Output format exactly:
185
+ BEST PRACTICES:
186
+ - ...
187
+ - ...
188
+ ANTIPATTERNS:
189
+ - ...
190
+ - ...
191
+ """
192
+ msgs = [UserMessage(content=prompt, source="run_analyzer_summarize")]
193
+ resp = await model_client.create(messages=msgs)
194
+ text = (resp.content or "").strip()
195
+ best = []
196
+ anti = []
197
+ section = None
198
+ for line in text.splitlines():
199
+ t = line.strip()
200
+ if not t:
201
+ continue
202
+ u = t.upper()
203
+ if u.startswith("BEST PRACTICES"):
204
+ section = "best"
205
+ continue
206
+ if u.startswith("ANTIPATTERNS"):
207
+ section = "anti"
208
+ continue
209
+ if t.startswith("-"):
210
+ if section == "best":
211
+ best.append(t)
212
+ elif section == "anti":
213
+ anti.append(t)
214
+ return ("\n".join(best[:10]), "\n".join(anti[:10]))
215
+ except Exception:
216
+ return ("", "")
217
+
218
+
219
+ async def generate_improvement_playbook(
220
+ messages_text: str,
221
+ errors: List[Dict[str, Any]],
222
+ metrics: Dict[str, Any],
223
+ external_sources: Optional[List[str]],
224
+ model_client,
225
+ ) -> Dict[str, Any]:
226
+ """
227
+ Ask the LLM to produce a compact, high-signal "Improvements Playbook" for future runs.
228
+ Returns dict: { text: str, actionables: int, improvement_score: int, has_failures: bool }
229
+ """
230
+ try:
231
+ if not model_client:
232
+ return {
233
+ "text": (
234
+ "IMPROVEMENTS PLAYBOOK\n\n"
235
+ "Key Failures & Fixes:\n- None observed.\n\n"
236
+ "Proven Source Patterns:\n- Prefer authoritative sites; record source URLs explicitly.\n\n"
237
+ "Effective Patterns:\n- Validate required data structures before main logic.\n- Use absolute paths; print/log key steps.\n\n"
238
+ "Reliability:\n- Retry downloads 3x with backoff; cache downloaded assets.\n\n"
239
+ "Guardrails:\n- Terminate gracefully on missing dependencies and report clear remediation.\n\n"
240
+ "Next-Time Plan Outline:\n1) Verify dependencies\n2) Validate inputs\n3) Fetch assets with retries\n4) Assemble\n5) Upload & present\n"
241
+ ),
242
+ "actionables": 6,
243
+ "improvement_score": 60,
244
+ "has_failures": bool(errors),
245
+ }
246
+
247
+ err_lines = []
248
+ for e in (errors or [])[:30]:
249
+ try:
250
+ err_lines.append(f"- [{e.get('source','?')}] {e.get('message','')}")
251
+ except Exception:
252
+ continue
253
+ err_block = "\n".join(err_lines)
254
+ src_block = "\n".join([s for s in (external_sources or []) if isinstance(s, str)])
255
+ prompt = f"""
256
+ You are optimizing a multi-agent system. Create a compact, high-signal Improvements Playbook strictly for future runs.
257
+
258
+ GOAL: Document only reusable improvements and concrete fixes that will materially improve similar tasks next time. Avoid generic advice.
259
+
260
+ INPUT METRICS (json):
261
+ {json.dumps(metrics, indent=2)}
262
+
263
+ CONCRETE FAILURES:
264
+ {_truncate(err_block, 1800)}
265
+
266
+ KNOWN EXTERNAL SOURCES (non-blob):
267
+ {_truncate(src_block, 1200)}
268
+
269
+ CONVERSATION EXCERPTS (redacted):
270
+ {_truncate(messages_text, 6000)}
271
+
272
+ OUTPUT FORMAT (exact headings, concise bullets ≤18 words each):
273
+ IMPROVEMENTS PLAYBOOK
274
+
275
+ Key Failures & Fixes:
276
+ - ...
277
+
278
+ Proven Source Patterns:
279
+ - ...
280
+
281
+ Effective Tool/Code Patterns:
282
+ - ...
283
+
284
+ Reliability (retries, rate-limit, caching):
285
+ - ...
286
+
287
+ Guardrails & Preconditions:
288
+ - ...
289
+
290
+ Next-Time Plan Outline:
291
+ 1) ...
292
+ 2) ...
293
+ 3) ...
294
+
295
+ Image Acquisition Failure Taxonomy (when URLs valid but not usable):
296
+ - Network/HTTP: 403/404/429, timeouts, SSL/captcha blocks
297
+ - Format/Integrity: Content-Type mismatch, non-image payload, Pillow .verify() fails
298
+ - License/Robots: disallowed scraping or reuse; fallback and record reason
299
+ - Mitigations: HEAD check; user-agent; backoff; alternate domain; sprite/fallback pack; manifest notes
300
+
301
+ IMPROVEMENT SCORE: <0-100>
302
+ ACTIONABLES: <integer count of distinct concrete actions>
303
+ """
304
+ msgs = [UserMessage(content=prompt, source="run_analyzer_improvements")]
305
+ resp = await model_client.create(messages=msgs)
306
+ text = (resp.content or "").strip()
307
+
308
+ # Parse score and actionables
309
+ score = 0
310
+ actionables = 0
311
+ try:
312
+ m = re.search(r"IMPROVEMENT\s*SCORE\s*:\s*(\d{1,3})", text, re.IGNORECASE)
313
+ if m:
314
+ score = max(0, min(100, int(m.group(1))))
315
+ except Exception:
316
+ pass
317
+ try:
318
+ m2 = re.search(r"ACTIONABLES\s*:\s*(\d+)", text, re.IGNORECASE)
319
+ if m2:
320
+ actionables = max(0, int(m2.group(1)))
321
+ except Exception:
322
+ # Fallback: count bullets
323
+ actionables = _count_bullets(text)
324
+
325
+ # Build structured quick-reference hints for planner reuse next time
326
+ hints: List[str] = []
327
+ tlow = text.lower()
328
+ if any(k in tlow for k in ["no module named", "importerror", "cannot import"]):
329
+ hints.append("Preflight: import python-pptx, Pillow; pip install if import fails")
330
+ if any(k in tlow for k in ["categorychartdata", "radarchart", "bar chart data"]):
331
+ hints.append("Use python-pptx CategoryChartData; avoid RadarChartData (unsupported)")
332
+ if any(k in tlow for k in ["antialias", "resampling.lanczos"]):
333
+ hints.append("Pillow: use Image.Resampling.LANCZOS instead of ANTIALIAS")
334
+ if any(k in tlow for k in ["cannot identify image file", "head request", "license"]):
335
+ hints.append("Validate image URLs via HEAD; Pillow .verify(); ensure license before embed")
336
+
337
+ return {
338
+ "text": text,
339
+ "actionables": actionables if actionables > 0 else _count_bullets(text),
340
+ "improvement_score": score,
341
+ "has_failures": bool(errors),
342
+ "hints": hints,
343
+ }
344
+ except Exception:
345
+ return {
346
+ "text": "IMPROVEMENTS PLAYBOOK\n\nKey Failures & Fixes:\n- None parsed due to summarizer error.",
347
+ "actionables": 0,
348
+ "improvement_score": 0,
349
+ "has_failures": bool(errors),
350
+ }
351
+
352
+
353
+ def build_run_document(
354
+ task_id: str,
355
+ task_text: str,
356
+ owner: Optional[str],
357
+ models: Optional[Dict[str, Any]],
358
+ assets: Optional[Dict[str, Any]],
359
+ metrics: Dict[str, Any],
360
+ errors: List[Dict[str, Any]],
361
+ improvement_text: str,
362
+ final_snippet: str,
363
+ ) -> Dict[str, Any]:
364
+ now_iso = datetime.utcnow().isoformat() + "Z"
365
+
366
+ # Build sectioned content text (single string field for index)
367
+ parts: List[str] = []
368
+ parts.append("Metrics:\n" + _truncate(json.dumps(metrics, indent=2), 1200))
369
+ if errors:
370
+ err_lines = []
371
+ for e in errors[:20]:
372
+ try:
373
+ err_lines.append(f"- [{e.get('source','?')}] {e.get('message','')}")
374
+ except Exception:
375
+ continue
376
+ parts.append("Errors:\n" + "\n".join(err_lines))
377
+ if improvement_text:
378
+ parts.append("Improvements Playbook:\n" + improvement_text)
379
+ # Extract high-signal tags for future retrieval and reuse
380
+ try:
381
+ tags: List[str] = []
382
+ tlow = improvement_text.lower()
383
+ if any(k in tlow for k in ["no module named", "importerror", "cannot import"]):
384
+ tags.append("dependency")
385
+ if any(k in tlow for k in ["image", "png", "jpg", "jpeg", "webp", "cannot identify image file", "head request", "license"]):
386
+ tags.append("image")
387
+ if any(k in tlow for k in ["pptx", "categorychartdata", "radarchart", "antialias", "resampling.lanczos"]):
388
+ tags.append("pptx_api")
389
+ if tags:
390
+ parts.append("Tags:\n" + ", ".join(sorted(set(tags))))
391
+ except Exception:
392
+ pass
393
+ if final_snippet:
394
+ parts.append("Final Output Snippet:\n" + _truncate(redact(final_snippet), 2000))
395
+ # Include external source URLs (not SAS) for provenance; exclude Azure blob SAS links
396
+ try:
397
+ if assets and isinstance(assets, dict):
398
+ raw_sources = []
399
+ try:
400
+ raw_sources.extend(list(assets.get("external_media_urls") or []))
401
+ except Exception:
402
+ pass
403
+ def _is_azure_blob(url: str) -> bool:
404
+ try:
405
+ return "blob.core.windows.net" in (url or "").lower()
406
+ except Exception:
407
+ return False
408
+ srcs = []
409
+ seen = set()
410
+ for u in raw_sources:
411
+ if not isinstance(u, str):
412
+ continue
413
+ if _is_azure_blob(u):
414
+ continue
415
+ if u in seen:
416
+ continue
417
+ seen.add(u)
418
+ srcs.append(u)
419
+ if srcs:
420
+ parts.append("Sources:\n" + _truncate("\n".join(srcs[:24]), 2000))
421
+ except Exception:
422
+ pass
423
+
424
+ content_blob = "\n\n".join(parts)
425
+
426
+ doc = {
427
+ "id": task_id,
428
+ "date": now_iso,
429
+ "task": _truncate(redact(task_text), 4000),
430
+ "content": content_blob,
431
+ "requestId": task_id,
432
+ }
433
+ # Include owner only if provided
434
+ if owner:
435
+ doc["owner"] = owner
436
+ return doc
437
+
438
+
439
+ async def summarize_prior_learnings(similar_docs: List[Dict[str, Any]], model_client) -> str:
440
+ """
441
+ Build <=8 fast-path directives from prior docs aimed at minimizing steps next time.
442
+ Preference order: Improvements Playbook sections; fallback to Best Practices/Antipatterns.
443
+ Output: short bullets (≤18 words) that can be directly embedded into planning as constraints.
444
+ """
445
+ # Extract bullets from prior content
446
+ bullets: List[str] = []
447
+ for d in similar_docs or []:
448
+ try:
449
+ content = str(d.get("content") or "")
450
+ if not content:
451
+ continue
452
+ # Prefer fast-path sections from Improvements Playbook
453
+ sections = [
454
+ "Key Failures & Fixes:",
455
+ "Effective Tool/Code Patterns:",
456
+ "Reliability (retries, rate-limit, caching):",
457
+ "Guardrails & Preconditions:",
458
+ "Next-Time Plan Outline:",
459
+ ]
460
+ extracted = False
461
+ for section in sections:
462
+ idx = content.find(section)
463
+ if idx >= 0:
464
+ seg = content[idx:].split("\n\n", 1)[0]
465
+ for line in seg.splitlines()[1:]:
466
+ t = line.strip()
467
+ if not t:
468
+ continue
469
+ if t[0].isdigit() and (t[1:2] == ")" or t[1:2] == "."):
470
+ t = "- " + t
471
+ if t.startswith("-") and len(t) > 2:
472
+ bullets.append(t)
473
+ extracted = True
474
+ if not extracted:
475
+ # Fallback: Best Practices / Antipatterns
476
+ for section in ("Best Practices:", "Antipatterns:"):
477
+ idx = content.find(section)
478
+ if idx >= 0:
479
+ seg = content[idx:].split("\n\n", 1)[0]
480
+ for line in seg.splitlines()[1:]:
481
+ t = line.strip()
482
+ if t.startswith("-") and len(t) > 2:
483
+ bullets.append(t)
484
+ except Exception:
485
+ continue
486
+
487
+ # Fallback: take first lines of content
488
+ if not bullets:
489
+ for d in similar_docs or []:
490
+ try:
491
+ for line in str(d.get("content") or "").splitlines():
492
+ t = line.strip()
493
+ if t.startswith("-") and len(t) > 2:
494
+ bullets.append(t)
495
+ if len(bullets) >= 12:
496
+ break
497
+ if len(bullets) >= 12:
498
+ break
499
+ except Exception:
500
+ continue
501
+
502
+ # Summarize into ≤8 concise, step-minimizing directives
503
+ if model_client and bullets:
504
+ try:
505
+ prompt = f"""
506
+ Condense these prior lessons into 5-8 FAST-PATH DIRECTIVES (≤18 words each) to minimize steps next time.
507
+ Focus on dependency preflight, known API substitutions, asset/download validation, and deliverables verification.
508
+ Avoid secrets and environment-specific details.
509
+
510
+ LESSONS:
511
+ {chr(10).join(bullets[:40])}
512
+
513
+ Output bullets only (no headings):
514
+ - ...
515
+ - ...
516
+ """
517
+ msgs = [UserMessage(content=prompt, source="run_analyzer_prior")]
518
+ resp = await model_client.create(messages=msgs)
519
+ text = (resp.content or "").strip()
520
+ # Keep only bullet lines
521
+ out_lines = [ln for ln in text.splitlines() if ln.strip().startswith("-")]
522
+ if out_lines:
523
+ return "\n".join(out_lines[:8])
524
+ except Exception:
525
+ pass
526
+
527
+ # No model or failure: return first up to 8 bullets
528
+ uniq = []
529
+ seen = set()
530
+ for b in bullets:
531
+ if b not in seen:
532
+ uniq.append(b)
533
+ seen.add(b)
534
+ if len(uniq) >= 8:
535
+ break
536
+ if not uniq:
537
+ uniq = [
538
+ "- Validate environment and paths early",
539
+ "- Log outputs and errors concisely",
540
+ "- Use absolute paths and avoid placeholders",
541
+ "- Avoid repeating failed steps",
542
+ "- Upload deliverables once, then reference URLs",
543
+ ]
544
+ return "\n".join(uniq)
545
+
546
+
547
+ def _count_bullets(text: Optional[str]) -> int:
548
+ try:
549
+ return sum(1 for ln in (text or "").splitlines() if ln.strip().startswith("-"))
550
+ except Exception:
551
+ return 0
552
+
553
+
554
+ def should_index_run(metrics: Dict[str, Any], errors: List[Dict[str, Any]], best_practices_text: str, antipatterns_text: str, assets: Optional[Dict[str, Any]] = None) -> bool:
555
+ """
556
+ Decide whether to index the run based on signal heuristics:
557
+ - Index if we observed any errors or schema issues
558
+ - Index if tools/assets were used (useful operational trace)
559
+ - Index if there is substantial learnings content (>=7 bullets) and conversation had enough depth
560
+ - Otherwise skip to avoid noise
561
+ """
562
+ try:
563
+ # Any explicit errors/schema problems → index
564
+ if errors and len(errors) > 0:
565
+ return True
566
+ if int(metrics.get("schemaErrorMentions") or 0) > 0:
567
+ return True
568
+
569
+ # Tools used or assets produced are valuable to index
570
+ if int(metrics.get("toolCallCount") or 0) > 0:
571
+ return True
572
+ if assets:
573
+ try:
574
+ up_count = len(assets.get("uploaded_file_urls") or {})
575
+ media_count = len(assets.get("external_media_urls") or [])
576
+ if (up_count + media_count) > 0:
577
+ return True
578
+ except Exception:
579
+ pass
580
+
581
+ # Content-only heuristic: require many actionable bullets and sufficient turns
582
+ total_bullets = _count_bullets(best_practices_text) + _count_bullets(antipatterns_text)
583
+ turns = int(metrics.get("turnCount") or 0)
584
+ if total_bullets >= 7 and turns >= 12:
585
+ text_combined = f"{best_practices_text}\n{antipatterns_text}".lower()
586
+ looks_generic = DEFAULT_KEY_PHRASE.lower() in text_combined
587
+ return not looks_generic
588
+
589
+ return False
590
+ except Exception:
591
+ # On analyzer failure, be conservative and skip
592
+ return False
593
+
594
+