loki-mode 7.63.1 → 7.65.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/memory/engine.py CHANGED
@@ -332,60 +332,75 @@ class MemoryEngine:
332
332
  real topics immediately after a session ends.
333
333
  """
334
334
  try:
335
- index = self.storage.read_json("index.json") or {
336
- "version": "1.1.0",
337
- "topics": [],
338
- "total_memories": 0,
339
- }
340
- context = episode.get("context", {}) if isinstance(episode.get("context"), dict) else {}
341
- phase = (context.get("phase") or episode.get("phase") or "general").lower()
342
- goal = (context.get("goal") or episode.get("goal") or "")[:200]
343
- # Topic id = phase. Multiple episodes in the same phase share a topic.
344
- topic_id = phase or "general"
345
- now = datetime.now(timezone.utc).isoformat()
346
- episode_id = episode.get("id")
347
- cost = float(episode.get("cost_usd", 0) or 0)
348
- tokens = int(episode.get("tokens_used", 0) or 0)
349
- files = list(episode.get("files_modified", []) or [])
350
-
351
- found = None
352
- for topic in index.get("topics", []):
353
- if topic.get("id") == topic_id:
354
- found = topic
355
- break
356
- if found is None:
357
- index.setdefault("topics", []).append({
358
- "id": topic_id,
359
- "summary": goal or f"Activity in phase {topic_id}",
360
- "episode_ids": [episode_id] if episode_id else [],
361
- "episode_count": 1,
362
- "total_cost_usd": cost,
363
- "total_tokens": tokens,
364
- "files_touched": files[:20],
365
- "first_seen": now,
366
- "last_accessed": now,
367
- "relevance_score": 0.5,
368
- })
369
- index["total_memories"] = index.get("total_memories", 0) + 1
370
- else:
371
- # Only count a given episode once. On resume/checkpoint the same
372
- # trace id can be re-saved; without this guard episode_count,
373
- # total_cost_usd, and total_tokens would inflate on every re-save
374
- # even though episode_ids is already de-duplicated.
375
- if episode_id and episode_id not in found.get("episode_ids", []):
376
- found.setdefault("episode_ids", []).append(episode_id)
377
- found["episode_count"] = found.get("episode_count", 0) + 1
378
- found["total_cost_usd"] = float(found.get("total_cost_usd", 0) or 0) + cost
379
- found["total_tokens"] = int(found.get("total_tokens", 0) or 0) + tokens
380
- merged = set(found.get("files_touched", []) or []) | set(files[:20])
381
- found["files_touched"] = sorted(merged)[:50]
382
- found["last_accessed"] = now
383
-
384
- index["last_updated"] = now
385
- self.storage.write_json("index.json", index)
335
+ # H4 lost-update fix (wave-6): hold ONE exclusive lock spanning the
336
+ # full read-modify-write of index.json. _file_lock is reentrant per
337
+ # thread (storage._held_locks is threading.local) and cross-process
338
+ # safe (fcntl.flock), so the inner read_json/write_json calls -- which
339
+ # re-enter _file_lock on the SAME resolved path -- are no-ops and do
340
+ # not deadlock. The lock target is derived from storage._resolve_path
341
+ # so its string key is byte-identical to the one read_json/write_json
342
+ # compute internally (mismatched keys would self-deadlock).
343
+ index_lock = Path(self.storage._resolve_path("index.json"))
344
+ with self.storage._file_lock(index_lock, exclusive=True):
345
+ index = self.storage.read_json("index.json") or {
346
+ "version": "1.1.0",
347
+ "topics": [],
348
+ "total_memories": 0,
349
+ }
350
+ context = episode.get("context", {}) if isinstance(episode.get("context"), dict) else {}
351
+ phase = (context.get("phase") or episode.get("phase") or "general").lower()
352
+ goal = (context.get("goal") or episode.get("goal") or "")[:200]
353
+ # Topic id = phase. Multiple episodes in the same phase share a topic.
354
+ topic_id = phase or "general"
355
+ now = datetime.now(timezone.utc).isoformat()
356
+ episode_id = episode.get("id")
357
+ cost = float(episode.get("cost_usd", 0) or 0)
358
+ tokens = int(episode.get("tokens_used", 0) or 0)
359
+ files = list(episode.get("files_modified", []) or [])
360
+
361
+ found = None
362
+ for topic in index.get("topics", []):
363
+ if topic.get("id") == topic_id:
364
+ found = topic
365
+ break
366
+ if found is None:
367
+ index.setdefault("topics", []).append({
368
+ "id": topic_id,
369
+ "summary": goal or f"Activity in phase {topic_id}",
370
+ "episode_ids": [episode_id] if episode_id else [],
371
+ "episode_count": 1,
372
+ "total_cost_usd": cost,
373
+ "total_tokens": tokens,
374
+ "files_touched": files[:20],
375
+ "first_seen": now,
376
+ "last_accessed": now,
377
+ "relevance_score": 0.5,
378
+ })
379
+ index["total_memories"] = index.get("total_memories", 0) + 1
380
+ else:
381
+ # Only count a given episode once. On resume/checkpoint the same
382
+ # trace id can be re-saved; without this guard episode_count,
383
+ # total_cost_usd, and total_tokens would inflate on every re-save
384
+ # even though episode_ids is already de-duplicated.
385
+ if episode_id and episode_id not in found.get("episode_ids", []):
386
+ found.setdefault("episode_ids", []).append(episode_id)
387
+ found["episode_count"] = found.get("episode_count", 0) + 1
388
+ found["total_cost_usd"] = float(found.get("total_cost_usd", 0) or 0) + cost
389
+ found["total_tokens"] = int(found.get("total_tokens", 0) or 0) + tokens
390
+ merged = set(found.get("files_touched", []) or []) | set(files[:20])
391
+ found["files_touched"] = sorted(merged)[:50]
392
+ found["last_accessed"] = now
393
+
394
+ index["last_updated"] = now
395
+ self.storage.write_json("index.json", index)
386
396
  except Exception: # noqa: BLE001
387
- # Never let index update break episode storage.
388
- pass
397
+ # Never let index update break episode storage, but make the
398
+ # failure observable instead of swallowing it silently (L2).
399
+ logger.warning(
400
+ "Failed to update index.json with episode %s",
401
+ episode.get("id"),
402
+ exc_info=True,
403
+ )
389
404
 
390
405
  def get_episode(self, episode_id: str) -> Optional[EpisodeTrace]:
391
406
  """
@@ -522,8 +537,13 @@ class MemoryEngine:
522
537
  for pattern in patterns_data.get("patterns", []):
523
538
  if not isinstance(pattern, dict):
524
539
  continue
525
- # Filter by confidence
526
- if pattern.get("confidence", 0) < min_confidence:
540
+ # Filter by confidence. Guard against an explicit null confidence
541
+ # (corrupt/hand-edited record): None < float raises TypeError in
542
+ # Python 3, so treat a null as 0 (filtered out unless threshold 0).
543
+ pattern_confidence = pattern.get("confidence")
544
+ if pattern_confidence is None:
545
+ pattern_confidence = 0
546
+ if pattern_confidence < min_confidence:
527
547
  continue
528
548
 
529
549
  # Filter by category if specified
@@ -550,8 +570,10 @@ class MemoryEngine:
550
570
  if pattern_data is None:
551
571
  return
552
572
 
553
- # Update fields
554
- pattern_data["usage_count"] = pattern_data.get("usage_count", 0) + 1
573
+ # Update fields. `or 0` guards against an explicit null usage_count
574
+ # (corrupt/hand-edited record) crashing the increment with a TypeError;
575
+ # a null and 0 are equivalent here so `or` is safe.
576
+ pattern_data["usage_count"] = (pattern_data.get("usage_count") or 0) + 1
555
577
  pattern_data["last_used"] = datetime.now(timezone.utc).isoformat()
556
578
 
557
579
  # Write back via save_pattern which holds an exclusive lock during
@@ -577,9 +599,24 @@ class MemoryEngine:
577
599
  skill_id = skill_dict.get("id", f"skill-{self._generate_id()}")
578
600
  skill_dict["id"] = skill_id
579
601
 
580
- # Generate filename from skill name or ID
581
- skill_name = skill_dict.get("name", skill_id)
582
- filename = skill_name.lower().replace(" ", "-").replace("_", "-")
602
+ # Generate filename from skill name or ID.
603
+ # H3 path-traversal fix (wave-6): the previous filename derivation only
604
+ # replaced spaces and underscores, so a skill name like
605
+ # "../../../tmp/pwned" kept its "/" and ".." and escaped the memory root
606
+ # via the raw open(skill_path, "w") below (which bypasses _resolve_path).
607
+ # Sanitize to safe chars only, matching storage.save_skill's house style,
608
+ # and fall back to the skill id when sanitization collapses to empty.
609
+ skill_name = skill_dict.get("name") or skill_id
610
+ normalized = skill_name.lower().replace(" ", "-").replace("_", "-")
611
+ filename = "".join(
612
+ c if (c.isalnum() or c == "-") else "-"
613
+ for c in normalized
614
+ ).strip("-")
615
+ if not filename:
616
+ filename = "".join(
617
+ c if (c.isalnum() or c == "-") else "-"
618
+ for c in skill_id.lower()
619
+ ).strip("-") or "skill"
583
620
 
584
621
  # Store as markdown
585
622
  content = self._skill_to_markdown(skill_dict)
@@ -899,57 +936,65 @@ class MemoryEngine:
899
936
  context = episode.get("context", {})
900
937
  action_entry = {
901
938
  "timestamp": episode.get("timestamp", datetime.now(timezone.utc).isoformat()),
902
- "action": context.get("goal", "Task completed")[:100],
939
+ "action": (context.get("goal") or "Task completed")[:100],
903
940
  "outcome": episode.get("outcome", "unknown"),
904
- "topic_id": context.get("phase", "general"),
941
+ "topic_id": context.get("phase") or "general",
905
942
  }
906
943
 
907
944
  self.storage.update_timeline(action_entry)
908
945
 
909
946
  def _update_index_with_pattern(self, pattern: Dict[str, Any]) -> None:
910
947
  """Update index with pattern topic."""
911
- index = self.storage.read_json("index.json") or {
912
- "version": "1.0",
913
- "topics": [],
914
- "total_memories": 0,
915
- "total_tokens_available": 0,
916
- }
948
+ # H4 lost-update fix (wave-6): hold ONE exclusive lock spanning the full
949
+ # read-modify-write of index.json so concurrent store_pattern (and
950
+ # store_episode) calls cannot clobber each other. See the matching note
951
+ # in _update_index_with_episode for why the lock target is derived from
952
+ # storage._resolve_path and why the inner read_json/write_json calls do
953
+ # not deadlock (reentrant per-thread, cross-process safe via flock).
954
+ index_lock = Path(self.storage._resolve_path("index.json"))
955
+ with self.storage._file_lock(index_lock, exclusive=True):
956
+ index = self.storage.read_json("index.json") or {
957
+ "version": "1.0",
958
+ "topics": [],
959
+ "total_memories": 0,
960
+ "total_tokens_available": 0,
961
+ }
917
962
 
918
- category = pattern.get("category", "general")
919
-
920
- # An index.json that is valid JSON but missing the "topics" key (e.g.
921
- # written by an older/partial writer, or hand-edited) would crash here
922
- # on index["topics"] because the `or {...}` default only fires when the
923
- # whole file is falsy. setdefault matches the defensive pattern used in
924
- # the sibling _update_index_with_episode.
925
- topics = index.setdefault("topics", [])
926
-
927
- # Find or create topic
928
- topic_found = False
929
- for topic in topics:
930
- if topic.get("id") == category:
931
- topic["last_accessed"] = datetime.now(timezone.utc).isoformat()
932
- topic["relevance_score"] = max(
933
- topic.get("relevance_score", 0.5),
934
- pattern.get("confidence", 0.5),
935
- )
936
- topic_found = True
937
- break
963
+ category = pattern.get("category", "general")
964
+
965
+ # An index.json that is valid JSON but missing the "topics" key (e.g.
966
+ # written by an older/partial writer, or hand-edited) would crash here
967
+ # on index["topics"] because the `or {...}` default only fires when the
968
+ # whole file is falsy. setdefault matches the defensive pattern used in
969
+ # the sibling _update_index_with_episode.
970
+ topics = index.setdefault("topics", [])
971
+
972
+ # Find or create topic
973
+ topic_found = False
974
+ for topic in topics:
975
+ if topic.get("id") == category:
976
+ topic["last_accessed"] = datetime.now(timezone.utc).isoformat()
977
+ topic["relevance_score"] = max(
978
+ topic.get("relevance_score", 0.5),
979
+ pattern.get("confidence", 0.5),
980
+ )
981
+ topic_found = True
982
+ break
938
983
 
939
- if not topic_found:
940
- topics.append({
941
- "id": category,
942
- "summary": f"Patterns for {category}",
943
- "relevance_score": pattern.get("confidence", 0.5),
944
- "last_accessed": datetime.now(timezone.utc).isoformat(),
945
- "token_count": len(json.dumps(pattern)) // 4,
946
- })
984
+ if not topic_found:
985
+ topics.append({
986
+ "id": category,
987
+ "summary": f"Patterns for {category}",
988
+ "relevance_score": pattern.get("confidence", 0.5),
989
+ "last_accessed": datetime.now(timezone.utc).isoformat(),
990
+ "token_count": len(json.dumps(pattern)) // 4,
991
+ })
947
992
 
948
- index["last_updated"] = datetime.now(timezone.utc).isoformat()
949
- if not topic_found:
950
- index["total_memories"] = index.get("total_memories", 0) + 1
993
+ index["last_updated"] = datetime.now(timezone.utc).isoformat()
994
+ if not topic_found:
995
+ index["total_memories"] = index.get("total_memories", 0) + 1
951
996
 
952
- self.storage.write_json("index.json", index)
997
+ self.storage.write_json("index.json", index)
953
998
 
954
999
  def _search_episode(self, episode_id: str) -> Optional[EpisodeTrace]:
955
1000
  """Search for episode across all date directories."""
@@ -1190,9 +1235,13 @@ class MemoryEngine:
1190
1235
  Detect task type from context.
1191
1236
  Uses keyword matching based on goal, action, and phase.
1192
1237
  """
1193
- goal = context.get("goal", "").lower()
1194
- action = context.get("action_type", "").lower()
1195
- phase = context.get("phase", "").lower()
1238
+ # M3 None-guard (wave-6): an explicit null value (e.g. {"goal": None})
1239
+ # makes context.get("goal", "") return None, so None.lower() crashed.
1240
+ # The retrieval.py copy was fixed in v7.61.0; this engine.py copy was
1241
+ # the missed sibling. Coalesce to "" before calling string methods.
1242
+ goal = (context.get("goal") or "").lower()
1243
+ action = (context.get("action_type") or "").lower()
1244
+ phase = (context.get("phase") or "").lower()
1196
1245
 
1197
1246
  signals = {
1198
1247
  "exploration": {
@@ -1277,7 +1326,8 @@ class MemoryEngine:
1277
1326
  episodes = self.get_recent_episodes(limit=50)
1278
1327
  for ep in episodes:
1279
1328
  ep_dict = ep.to_dict() if hasattr(ep, "to_dict") else ep.__dict__.copy()
1280
- goal = ep_dict.get("context", {}).get("goal", "").lower()
1329
+ ep_context = ep_dict.get("context") or {}
1330
+ goal = (ep_context.get("goal") or "").lower()
1281
1331
  score = sum(1 for kw in keywords if kw in goal)
1282
1332
  if score > 0:
1283
1333
  ep_dict["_score"] = score
@@ -1288,7 +1338,7 @@ class MemoryEngine:
1288
1338
  patterns = self.find_patterns(min_confidence=0.3)
1289
1339
  for pattern in patterns:
1290
1340
  p_dict = pattern.to_dict() if hasattr(pattern, "to_dict") else pattern.__dict__.copy()
1291
- pattern_text = p_dict.get("pattern", "").lower()
1341
+ pattern_text = (p_dict.get("pattern") or "").lower()
1292
1342
  score = sum(1 for kw in keywords if kw in pattern_text)
1293
1343
  if score > 0:
1294
1344
  p_dict["_score"] = score
@@ -1299,8 +1349,8 @@ class MemoryEngine:
1299
1349
  skills = self.list_skills()
1300
1350
  for skill in skills:
1301
1351
  s_dict = skill.to_dict() if hasattr(skill, "to_dict") else skill.__dict__.copy()
1302
- name = s_dict.get("name", "").lower()
1303
- desc = s_dict.get("description", "").lower()
1352
+ name = (s_dict.get("name") or "").lower()
1353
+ desc = (s_dict.get("description") or "").lower()
1304
1354
  score = sum(1 for kw in keywords if kw in name or kw in desc)
1305
1355
  if score > 0:
1306
1356
  s_dict["_score"] = score
@@ -940,8 +940,11 @@ class MemoryRetrieval:
940
940
  Returns:
941
941
  Weighted score incorporating importance
942
942
  """
943
- source = result.get("_source", "")
944
- base_score = result.get("_score", 0.5)
943
+ source = result.get("_source") or ""
944
+ # _score is set internally so null is unlikely, but guard for
945
+ # uniformity since it feeds the arithmetic below.
946
+ base_score = result.get("_score")
947
+ base_score = 0.5 if base_score is None else base_score
945
948
 
946
949
  # Map source to weight key
947
950
  weight_key = source
@@ -950,11 +953,17 @@ class MemoryRetrieval:
950
953
 
951
954
  weight = weights.get(weight_key, 0.0)
952
955
 
953
- # Get importance score (default 0.5 if not set)
954
- importance = result.get("importance", 0.5)
956
+ # Get importance score (default 0.5 if not set). Defensive: a
957
+ # corrupt/hand-edited record may carry importance=null, which would
958
+ # raise TypeError in the arithmetic below. Use the default only when
959
+ # missing/null; a legitimate 0.0 is preserved.
960
+ importance = result.get("importance")
961
+ importance = 0.5 if importance is None else importance
955
962
 
956
- # Get confidence for semantic patterns
957
- confidence = result.get("confidence", 1.0)
963
+ # Get confidence for semantic patterns. Same null guard; default 1.0
964
+ # only when missing/null, a legitimate 0.0 is preserved.
965
+ confidence = result.get("confidence")
966
+ confidence = 1.0 if confidence is None else confidence
958
967
 
959
968
  # Combined score: relevance * task_weight * importance * confidence
960
969
  # Importance contributes 30% of the final score
@@ -1141,17 +1150,22 @@ class MemoryRetrieval:
1141
1150
  selected_memories.append(topic)
1142
1151
  budget_remaining -= layer1_tokens
1143
1152
 
1144
- # Layer 2: Expand summaries for top topics
1145
- layer2_budget = int(token_budget * 0.4) # Reserve 40% for summaries
1146
- if budget_remaining > layer2_budget * 0.5:
1153
+ # Layer 2: Expand summaries for top topics.
1154
+ # Gate on the remaining budget (not a fraction of the layer-2 reserve)
1155
+ # and trim the summary set to fit via optimize_context, mirroring
1156
+ # Layer 3 below. Previously this admitted summaries all-or-nothing: a
1157
+ # set that exceeded budget_remaining was dropped entirely, and the gate
1158
+ # compared against layer2_budget*0.5 (a fraction of the reserve) rather
1159
+ # than the budget actually left.
1160
+ if budget_remaining > 100:
1147
1161
  summaries = self._get_topic_summaries(relevant_topics[:5], query, weights)
1148
- layer2_tokens = sum(estimate_memory_tokens(s) for s in summaries)
1162
+ for summary in summaries:
1163
+ summary["_layer"] = 2
1149
1164
 
1150
- if layer2_tokens <= budget_remaining:
1151
- for summary in summaries:
1152
- summary["_layer"] = 2
1153
- selected_memories.append(summary)
1154
- budget_remaining -= layer2_tokens
1165
+ # Optimize to fit remaining budget (trimmed set, not all-or-nothing)
1166
+ optimized = optimize_context(summaries, budget_remaining)
1167
+ selected_memories.extend(optimized)
1168
+ budget_remaining -= sum(estimate_memory_tokens(s) for s in optimized)
1155
1169
 
1156
1170
  # Layer 3: Full details for highest priority items
1157
1171
  if budget_remaining > 100: # At least 100 tokens remaining
@@ -1189,14 +1203,36 @@ class MemoryRetrieval:
1189
1203
 
1190
1204
  scored_topics = []
1191
1205
  for topic in topics:
1192
- topic_name = topic.get("topic", "").lower()
1193
- memory_type = topic.get("type", "").lower()
1206
+ if not isinstance(topic, dict):
1207
+ continue
1208
+ # The index.json writer (engine.py _stamp_topic at ~368 and
1209
+ # store_pattern at ~978) emits topics keyed by "id" (a phase or
1210
+ # category slug, e.g. "implementation", "auth") and "summary"
1211
+ # (prose: the goal text or "Patterns for <category>"). It does NOT
1212
+ # emit "topic", "type", or "last_updated". Previously this scorer
1213
+ # read only "topic"/"type"/"last_updated", so word overlap, type
1214
+ # weighting, and the recency boost were all silent no-ops on real
1215
+ # data. Score against the real keys (id + summary for word overlap,
1216
+ # id as the type/category for the strategy weight, the real recency
1217
+ # keys), and keep the legacy "topic"/"type"/"last_updated" keys as
1218
+ # fallbacks so any older-shape index still ranks.
1219
+ topic_text = " ".join(
1220
+ str(v) for v in (
1221
+ topic.get("summary"),
1222
+ topic.get("id"),
1223
+ topic.get("topic"),
1224
+ ) if v
1225
+ ).lower()
1226
+ # The category/phase slug doubles as the memory-type weight key
1227
+ # (the writer uses the category name as the id). Fall back to the
1228
+ # legacy "type" key for older-shape indexes.
1229
+ memory_type = (topic.get("id") or topic.get("type") or "").lower()
1194
1230
 
1195
1231
  # Calculate relevance score
1196
1232
  score = 0.0
1197
1233
 
1198
1234
  # Word overlap
1199
- topic_words = set(topic_name.split())
1235
+ topic_words = set(topic_text.split())
1200
1236
  overlap = len(query_words & topic_words)
1201
1237
  score += overlap * 0.3
1202
1238
 
@@ -1204,8 +1240,11 @@ class MemoryRetrieval:
1204
1240
  type_weight = weights.get(memory_type, 0.1)
1205
1241
  score += type_weight
1206
1242
 
1207
- # Recency boost
1208
- if topic.get("last_updated"):
1243
+ # Recency boost. The writer stamps "last_accessed"/"first_seen";
1244
+ # "last_updated" is the legacy key.
1245
+ if (topic.get("last_accessed")
1246
+ or topic.get("first_seen")
1247
+ or topic.get("last_updated")):
1209
1248
  score += 0.1
1210
1249
 
1211
1250
  if score > 0:
@@ -1226,8 +1265,15 @@ class MemoryRetrieval:
1226
1265
  summaries = []
1227
1266
 
1228
1267
  for topic in topics:
1229
- topic_name = topic.get("topic", "")
1230
- memory_type = topic.get("type", "episodic")
1268
+ if not isinstance(topic, dict):
1269
+ continue
1270
+ # Mirror _filter_relevant_topics: the writer emits "id"/"summary",
1271
+ # not "topic". Fall back to the legacy "topic" key so both shapes
1272
+ # resolve a usable name. Default type stays "episodic".
1273
+ topic_name = (
1274
+ topic.get("id") or topic.get("topic") or topic.get("summary") or ""
1275
+ )
1276
+ memory_type = topic.get("type") or "episodic"
1231
1277
 
1232
1278
  # Try to load summary from appropriate collection
1233
1279
  if memory_type == "episodic":
@@ -1426,7 +1472,12 @@ class MemoryRetrieval:
1426
1472
  parts.append(f"action: {context['action_type']}")
1427
1473
 
1428
1474
  if context.get("files"):
1429
- parts.append(f"files: {', '.join(context['files'][:3])}")
1475
+ # Defensive: filter to str elements so a list carrying None or
1476
+ # non-str entries (corrupt/hand-edited record) does not raise
1477
+ # TypeError inside join. Mirrors the steps-join in skills search.
1478
+ files = [f for f in context["files"][:3] if isinstance(f, str)]
1479
+ if files:
1480
+ parts.append(f"files: {', '.join(files)}")
1430
1481
 
1431
1482
  return " ".join(parts) if parts else ""
1432
1483
 
@@ -1458,13 +1509,16 @@ class MemoryRetrieval:
1458
1509
  if not data:
1459
1510
  continue
1460
1511
 
1461
- # Score based on keyword matches in goal
1462
- context = data.get("context", {})
1463
- goal = context.get("goal", "").lower()
1512
+ # Score based on keyword matches in goal.
1513
+ # Defensive: a corrupt or hand-edited record may carry
1514
+ # context=null or null string fields; (x or "") avoids
1515
+ # AttributeError on None.
1516
+ context = data.get("context") or {}
1517
+ goal = (context.get("goal") or "").lower()
1464
1518
  score = sum(1 for kw in keywords if kw in goal)
1465
1519
 
1466
1520
  # Also check phase
1467
- phase = context.get("phase", "").lower()
1521
+ phase = (context.get("phase") or "").lower()
1468
1522
  score += sum(0.5 for kw in keywords if kw in phase)
1469
1523
 
1470
1524
  if score > 0:
@@ -1487,16 +1541,21 @@ class MemoryRetrieval:
1487
1541
  for pattern in patterns_data.get("patterns", []):
1488
1542
  if not isinstance(pattern, dict):
1489
1543
  continue
1490
- pattern_text = pattern.get("pattern", "").lower()
1491
- category = pattern.get("category", "").lower()
1492
- correct = pattern.get("correct_approach", "").lower()
1544
+ # Defensive: corrupt or hand-edited records may carry null
1545
+ # string fields; (x or "") avoids AttributeError on None.
1546
+ pattern_text = (pattern.get("pattern") or "").lower()
1547
+ category = (pattern.get("category") or "").lower()
1548
+ correct = (pattern.get("correct_approach") or "").lower()
1493
1549
 
1494
1550
  score = sum(1 for kw in keywords if kw in pattern_text)
1495
1551
  score += sum(0.5 for kw in keywords if kw in category)
1496
1552
  score += sum(0.3 for kw in keywords if kw in correct)
1497
1553
 
1498
- # Weight by confidence
1499
- confidence = pattern.get("confidence", 0.5)
1554
+ # Weight by confidence. Defensive: a null confidence would make
1555
+ # score *= None raise TypeError. Use 0.5 only when missing/null;
1556
+ # a legitimate 0.0 is preserved (it correctly zeroes the score).
1557
+ confidence = pattern.get("confidence")
1558
+ confidence = 0.5 if confidence is None else confidence
1500
1559
  score *= confidence
1501
1560
 
1502
1561
  if score > 0:
@@ -1521,8 +1580,8 @@ class MemoryRetrieval:
1521
1580
  if not data:
1522
1581
  continue
1523
1582
 
1524
- name = data.get("name", "").lower()
1525
- description = data.get("description", "").lower()
1583
+ name = (data.get("name") or "").lower()
1584
+ description = (data.get("description") or "").lower()
1526
1585
  steps_text = " ".join(
1527
1586
  s for s in (data.get("steps") or []) if isinstance(s, str)
1528
1587
  ).lower()
@@ -1549,9 +1608,14 @@ class MemoryRetrieval:
1549
1608
  anti_data = self.storage.read_json("semantic/anti-patterns.json") or {}
1550
1609
 
1551
1610
  for anti in anti_data.get("anti_patterns", []):
1552
- what_fails = anti.get("what_fails", "").lower()
1553
- why = anti.get("why", "").lower()
1554
- prevention = anti.get("prevention", "").lower()
1611
+ # Defensive: mirror the sibling loop below. A corrupt or
1612
+ # hand-edited record may be a non-dict or carry null fields;
1613
+ # the isinstance guard and (x or "") avoid AttributeError.
1614
+ if not isinstance(anti, dict):
1615
+ continue
1616
+ what_fails = (anti.get("what_fails") or "").lower()
1617
+ why = (anti.get("why") or "").lower()
1618
+ prevention = (anti.get("prevention") or "").lower()
1555
1619
 
1556
1620
  score = sum(2 for kw in keywords if kw in what_fails)
1557
1621
  score += sum(1 for kw in keywords if kw in why)
@@ -1576,10 +1640,10 @@ class MemoryRetrieval:
1576
1640
  continue
1577
1641
  if pat.get("category") != "anti-pattern":
1578
1642
  continue
1579
- what_fails = (pat.get("incorrect_approach", "")
1580
- or pat.get("pattern", "")).lower()
1581
- why = pat.get("description", "").lower()
1582
- prevention = pat.get("correct_approach", "").lower()
1643
+ what_fails = (pat.get("incorrect_approach")
1644
+ or pat.get("pattern") or "").lower()
1645
+ why = (pat.get("description") or "").lower()
1646
+ prevention = (pat.get("correct_approach") or "").lower()
1583
1647
 
1584
1648
  score = sum(2 for kw in keywords if kw in what_fails)
1585
1649
  score += sum(1 for kw in keywords if kw in why)