superlocalmemory 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +167 -1803
- package/README.md +212 -397
- package/bin/slm +179 -3
- package/bin/superlocalmemoryv2:learning +4 -0
- package/bin/superlocalmemoryv2:patterns +4 -0
- package/docs/ACCESSIBILITY.md +291 -0
- package/docs/ARCHITECTURE.md +12 -6
- package/docs/FRAMEWORK-INTEGRATIONS.md +300 -0
- package/docs/MCP-MANUAL-SETUP.md +14 -4
- package/install.sh +99 -3
- package/mcp_server.py +291 -1
- package/package.json +2 -1
- package/requirements-learning.txt +12 -0
- package/scripts/verify-v27.sh +233 -0
- package/skills/slm-show-patterns/SKILL.md +224 -0
- package/src/learning/__init__.py +201 -0
- package/src/learning/adaptive_ranker.py +826 -0
- package/src/learning/cross_project_aggregator.py +866 -0
- package/src/learning/engagement_tracker.py +638 -0
- package/src/learning/feature_extractor.py +461 -0
- package/src/learning/feedback_collector.py +690 -0
- package/src/learning/learning_db.py +842 -0
- package/src/learning/project_context_manager.py +582 -0
- package/src/learning/source_quality_scorer.py +685 -0
- package/src/learning/synthetic_bootstrap.py +1047 -0
- package/src/learning/tests/__init__.py +0 -0
- package/src/learning/tests/test_adaptive_ranker.py +328 -0
- package/src/learning/tests/test_aggregator.py +309 -0
- package/src/learning/tests/test_feedback_collector.py +295 -0
- package/src/learning/tests/test_learning_db.py +606 -0
- package/src/learning/tests/test_project_context.py +296 -0
- package/src/learning/tests/test_source_quality.py +355 -0
- package/src/learning/tests/test_synthetic_bootstrap.py +433 -0
- package/src/learning/tests/test_workflow_miner.py +322 -0
- package/src/learning/workflow_pattern_miner.py +665 -0
- package/ui/index.html +346 -13
- package/ui/js/clusters.js +90 -1
- package/ui/js/graph-core.js +445 -0
- package/ui/js/graph-cytoscape-monolithic-backup.js +1168 -0
- package/ui/js/graph-cytoscape.js +1168 -0
- package/ui/js/graph-d3-backup.js +32 -0
- package/ui/js/graph-filters.js +220 -0
- package/ui/js/graph-interactions.js +354 -0
- package/ui/js/graph-ui.js +214 -0
- package/ui/js/memories.js +52 -0
- package/ui/js/modal.js +104 -1
|
@@ -0,0 +1,665 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SuperLocalMemory V2 - Workflow Pattern Miner (v2.7)
|
|
4
|
+
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
+
Licensed under MIT License
|
|
6
|
+
|
|
7
|
+
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
+
Author: Varun Pratap Bhardwaj (Solution Architect)
|
|
9
|
+
|
|
10
|
+
NOTICE: This software is protected by MIT License.
|
|
11
|
+
Attribution must be preserved in all copies or derivatives.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
WorkflowPatternMiner -- Layer 3: Sliding-window sequence and temporal pattern mining.
|
|
16
|
+
|
|
17
|
+
Detects repeating workflow sequences and time-of-day activity preferences
|
|
18
|
+
from memory creation timestamps and content. Uses a custom sliding-window
|
|
19
|
+
n-gram approach inspired by TSW-PrefixSpan (IEEE 2020) -- NO external
|
|
20
|
+
dependencies beyond stdlib.
|
|
21
|
+
|
|
22
|
+
How it works:
|
|
23
|
+
1. Fetches recent memories from memory.db (read-only).
|
|
24
|
+
2. Classifies each memory into one of 7 activity types via keyword scoring.
|
|
25
|
+
3. Extracts n-gram sequences (length 2-5) from the ordered activity stream.
|
|
26
|
+
4. Calculates support (frequency / total windows) for each n-gram.
|
|
27
|
+
5. Mines temporal patterns (dominant activity per time-of-day bucket).
|
|
28
|
+
6. Stores discovered patterns in learning.db via LearningDB.
|
|
29
|
+
|
|
30
|
+
Design decisions:
|
|
31
|
+
- Word-boundary matching prevents false positives (e.g. "document" != "docs").
|
|
32
|
+
- Consecutive identical activities in an n-gram are skipped as noise.
|
|
33
|
+
- Minimum evidence threshold (5 memories) prevents weak temporal claims.
|
|
34
|
+
- Patterns are cleared and re-mined each run (idempotent operation).
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
import json
|
|
38
|
+
import logging
|
|
39
|
+
import re
|
|
40
|
+
import sqlite3
|
|
41
|
+
from collections import Counter
|
|
42
|
+
from datetime import datetime
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
from typing import Dict, List, Optional, Any
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger("superlocalmemory.learning.workflow")
|
|
47
|
+
|
|
48
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
49
|
+
MEMORY_DB_PATH = MEMORY_DIR / "memory.db"
|
|
50
|
+
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
# Activity type taxonomy (7 categories)
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
# Each key maps to a list of keyword phrases. Scoring is word-boundary aware
|
|
55
|
+
# to avoid partial matches (e.g. "test" won't match inside "latest").
|
|
56
|
+
|
|
57
|
+
ACTIVITY_TYPES: Dict[str, List[str]] = {
|
|
58
|
+
"docs": [
|
|
59
|
+
"documentation", "readme", "wiki", "spec", "prd",
|
|
60
|
+
"design doc", "changelog", "api doc",
|
|
61
|
+
],
|
|
62
|
+
"architecture": [
|
|
63
|
+
"architecture", "diagram", "system design", "schema",
|
|
64
|
+
"api design", "data model", "erd",
|
|
65
|
+
],
|
|
66
|
+
"code": [
|
|
67
|
+
"implement", "function", "class", "module", "refactor",
|
|
68
|
+
"code", "feature", "component",
|
|
69
|
+
],
|
|
70
|
+
"test": [
|
|
71
|
+
"test", "pytest", "jest", "coverage", "assertion",
|
|
72
|
+
"mock", "spec", "unit test",
|
|
73
|
+
],
|
|
74
|
+
"debug": [
|
|
75
|
+
"bug", "fix", "error", "stack trace", "debug",
|
|
76
|
+
"issue", "exception", "traceback",
|
|
77
|
+
],
|
|
78
|
+
"deploy": [
|
|
79
|
+
"deploy", "docker", "ci/cd", "pipeline", "release",
|
|
80
|
+
"production", "staging", "build",
|
|
81
|
+
],
|
|
82
|
+
"config": [
|
|
83
|
+
"config", "env", "settings", "setup", "install",
|
|
84
|
+
"dependency", "package", "requirements",
|
|
85
|
+
],
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
# Pre-compile word-boundary regexes per keyword for performance.
|
|
89
|
+
# Each entry: (activity_type, compiled_regex)
|
|
90
|
+
_KEYWORD_PATTERNS: List[tuple] = []
|
|
91
|
+
for _act_type, _keywords in ACTIVITY_TYPES.items():
|
|
92
|
+
for _kw in _keywords:
|
|
93
|
+
# Use re.escape for phrases that may contain special chars (e.g. "ci/cd")
|
|
94
|
+
_KEYWORD_PATTERNS.append(
|
|
95
|
+
(_act_type, re.compile(r"\b" + re.escape(_kw) + r"\b", re.IGNORECASE))
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class WorkflowPatternMiner:
|
|
100
|
+
"""
|
|
101
|
+
Mines workflow sequences and temporal patterns from memory history.
|
|
102
|
+
|
|
103
|
+
Reads from memory.db (never writes to it) and stores discovered
|
|
104
|
+
patterns in learning.db via the shared LearningDB instance.
|
|
105
|
+
|
|
106
|
+
Usage:
|
|
107
|
+
from learning.learning_db import LearningDB
|
|
108
|
+
miner = WorkflowPatternMiner(learning_db=LearningDB())
|
|
109
|
+
results = miner.mine_all()
|
|
110
|
+
print(results['sequences']) # Top workflow sequences
|
|
111
|
+
print(results['temporal']) # Time-of-day preferences
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
memory_db_path: Optional[Path] = None,
|
|
117
|
+
learning_db: Optional[Any] = None,
|
|
118
|
+
):
|
|
119
|
+
"""
|
|
120
|
+
Args:
|
|
121
|
+
memory_db_path: Path to memory.db for reading memories.
|
|
122
|
+
Defaults to ~/.claude-memory/memory.db.
|
|
123
|
+
learning_db: LearningDB instance for storing patterns.
|
|
124
|
+
If None, patterns are returned but not persisted.
|
|
125
|
+
"""
|
|
126
|
+
self.memory_db_path = Path(memory_db_path) if memory_db_path else MEMORY_DB_PATH
|
|
127
|
+
self.learning_db = learning_db
|
|
128
|
+
|
|
129
|
+
# ======================================================================
|
|
130
|
+
# Public API
|
|
131
|
+
# ======================================================================
|
|
132
|
+
|
|
133
|
+
def mine_sequences(
|
|
134
|
+
self,
|
|
135
|
+
memories: Optional[List[dict]] = None,
|
|
136
|
+
min_support: float = 0.3,
|
|
137
|
+
) -> List[dict]:
|
|
138
|
+
"""
|
|
139
|
+
Mine repeating workflow sequences from memory content.
|
|
140
|
+
|
|
141
|
+
Algorithm:
|
|
142
|
+
1. Classify each memory into an activity type.
|
|
143
|
+
2. Build an ordered activity stream (chronological).
|
|
144
|
+
3. Extract n-grams of length 2-5 via sliding window.
|
|
145
|
+
4. Filter out n-grams with consecutive identical activities.
|
|
146
|
+
5. Compute support = count / total_windows_for_that_length.
|
|
147
|
+
6. Return top 20 patterns above *min_support*.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
memories: List of memory dicts with 'content' and 'created_at'.
|
|
151
|
+
If None, fetches the last 500 from memory.db.
|
|
152
|
+
min_support: Minimum support threshold (0.0 - 1.0). Default 0.3.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Sorted list of dicts:
|
|
156
|
+
[{'sequence': ['docs', 'code', 'test'],
|
|
157
|
+
'support': 0.45, 'count': 12, 'length': 3}, ...]
|
|
158
|
+
"""
|
|
159
|
+
if memories is None:
|
|
160
|
+
memories = self._fetch_memories(limit=500)
|
|
161
|
+
|
|
162
|
+
if not memories:
|
|
163
|
+
logger.info("No memories to mine sequences from")
|
|
164
|
+
return []
|
|
165
|
+
|
|
166
|
+
# Step 1 + 2: classify and build activity stream
|
|
167
|
+
activity_stream: List[str] = []
|
|
168
|
+
for mem in memories:
|
|
169
|
+
activity = self._classify_activity(mem.get("content", ""))
|
|
170
|
+
if activity != "unknown":
|
|
171
|
+
activity_stream.append(activity)
|
|
172
|
+
|
|
173
|
+
if len(activity_stream) < 2:
|
|
174
|
+
logger.info(
|
|
175
|
+
"Activity stream too short (%d) for sequence mining",
|
|
176
|
+
len(activity_stream),
|
|
177
|
+
)
|
|
178
|
+
return []
|
|
179
|
+
|
|
180
|
+
# Step 3-5: extract n-grams and compute support
|
|
181
|
+
all_patterns: List[dict] = []
|
|
182
|
+
|
|
183
|
+
for n in range(2, 6): # lengths 2, 3, 4, 5
|
|
184
|
+
if len(activity_stream) < n:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
ngram_counts: Counter = Counter()
|
|
188
|
+
total_windows = len(activity_stream) - n + 1
|
|
189
|
+
|
|
190
|
+
for i in range(total_windows):
|
|
191
|
+
ngram = tuple(activity_stream[i : i + n])
|
|
192
|
+
|
|
193
|
+
# Skip n-grams where any consecutive pair repeats (noise)
|
|
194
|
+
has_repeat = any(
|
|
195
|
+
ngram[j] == ngram[j + 1] for j in range(len(ngram) - 1)
|
|
196
|
+
)
|
|
197
|
+
if has_repeat:
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
ngram_counts[ngram] += 1
|
|
201
|
+
|
|
202
|
+
# Convert to pattern dicts with support
|
|
203
|
+
for ngram, count in ngram_counts.items():
|
|
204
|
+
support = count / total_windows if total_windows > 0 else 0.0
|
|
205
|
+
if support >= min_support:
|
|
206
|
+
all_patterns.append({
|
|
207
|
+
"sequence": list(ngram),
|
|
208
|
+
"support": round(support, 4),
|
|
209
|
+
"count": count,
|
|
210
|
+
"length": n,
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
# Step 6: sort by support descending, limit to top 20
|
|
214
|
+
all_patterns.sort(key=lambda p: (-p["support"], -p["length"]))
|
|
215
|
+
top_patterns = all_patterns[:20]
|
|
216
|
+
|
|
217
|
+
logger.info(
|
|
218
|
+
"Mined %d sequence patterns from %d activities (top %d returned)",
|
|
219
|
+
len(all_patterns),
|
|
220
|
+
len(activity_stream),
|
|
221
|
+
len(top_patterns),
|
|
222
|
+
)
|
|
223
|
+
return top_patterns
|
|
224
|
+
|
|
225
|
+
def mine_temporal_patterns(
|
|
226
|
+
self,
|
|
227
|
+
memories: Optional[List[dict]] = None,
|
|
228
|
+
) -> Dict[str, dict]:
|
|
229
|
+
"""
|
|
230
|
+
Detect time-of-day activity preferences.
|
|
231
|
+
|
|
232
|
+
Buckets each memory by hour into morning/afternoon/evening/night,
|
|
233
|
+
counts activity types per bucket, and identifies the dominant
|
|
234
|
+
activity type for each time period.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
memories: List of memory dicts with 'content' and 'created_at'.
|
|
238
|
+
If None, fetches from memory.db.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Dict keyed by bucket name:
|
|
242
|
+
{'morning': {
|
|
243
|
+
'dominant_activity': 'code',
|
|
244
|
+
'confidence': 0.65,
|
|
245
|
+
'evidence_count': 23,
|
|
246
|
+
'distribution': {'code': 15, 'test': 5, 'debug': 3}
|
|
247
|
+
}, ...}
|
|
248
|
+
|
|
249
|
+
Buckets with fewer than 5 evidence memories are omitted.
|
|
250
|
+
"""
|
|
251
|
+
if memories is None:
|
|
252
|
+
memories = self._fetch_memories(limit=500)
|
|
253
|
+
|
|
254
|
+
if not memories:
|
|
255
|
+
logger.info("No memories to mine temporal patterns from")
|
|
256
|
+
return {}
|
|
257
|
+
|
|
258
|
+
# bucket_name -> Counter of activity types
|
|
259
|
+
bucket_activities: Dict[str, Counter] = {
|
|
260
|
+
"morning": Counter(),
|
|
261
|
+
"afternoon": Counter(),
|
|
262
|
+
"evening": Counter(),
|
|
263
|
+
"night": Counter(),
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
for mem in memories:
|
|
267
|
+
activity = self._classify_activity(mem.get("content", ""))
|
|
268
|
+
if activity == "unknown":
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
hour = self._parse_hour(mem.get("created_at"))
|
|
272
|
+
if hour is None:
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
bucket = self._hour_to_bucket(hour)
|
|
276
|
+
bucket_activities[bucket][activity] += 1
|
|
277
|
+
|
|
278
|
+
# Build result: only include buckets with minimum evidence
|
|
279
|
+
min_evidence = 5
|
|
280
|
+
result: Dict[str, dict] = {}
|
|
281
|
+
|
|
282
|
+
for bucket, counter in bucket_activities.items():
|
|
283
|
+
total = sum(counter.values())
|
|
284
|
+
if total < min_evidence:
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
dominant_activity, dominant_count = counter.most_common(1)[0]
|
|
288
|
+
confidence = round(dominant_count / total, 4) if total > 0 else 0.0
|
|
289
|
+
|
|
290
|
+
result[bucket] = {
|
|
291
|
+
"dominant_activity": dominant_activity,
|
|
292
|
+
"confidence": confidence,
|
|
293
|
+
"evidence_count": total,
|
|
294
|
+
"distribution": dict(counter),
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
logger.info(
|
|
298
|
+
"Mined temporal patterns for %d/%d buckets",
|
|
299
|
+
len(result),
|
|
300
|
+
len(bucket_activities),
|
|
301
|
+
)
|
|
302
|
+
return result
|
|
303
|
+
|
|
304
|
+
def mine_all(
|
|
305
|
+
self,
|
|
306
|
+
memories: Optional[List[dict]] = None,
|
|
307
|
+
) -> dict:
|
|
308
|
+
"""
|
|
309
|
+
Run all mining methods and optionally persist results to learning.db.
|
|
310
|
+
|
|
311
|
+
Fetches memories once and passes to both miners. If a LearningDB
|
|
312
|
+
instance was provided at init, clears old patterns and stores new ones.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
memories: Pre-fetched memories, or None to auto-fetch.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
{'sequences': [...], 'temporal': {...}}
|
|
319
|
+
"""
|
|
320
|
+
if memories is None:
|
|
321
|
+
memories = self._fetch_memories(limit=500)
|
|
322
|
+
|
|
323
|
+
sequences = self.mine_sequences(memories=memories)
|
|
324
|
+
temporal = self.mine_temporal_patterns(memories=memories)
|
|
325
|
+
|
|
326
|
+
# Persist to learning.db if available
|
|
327
|
+
if self.learning_db is not None:
|
|
328
|
+
self._persist_patterns(sequences, temporal)
|
|
329
|
+
|
|
330
|
+
return {
|
|
331
|
+
"sequences": sequences,
|
|
332
|
+
"temporal": temporal,
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
def get_workflow_insights(self) -> dict:
|
|
336
|
+
"""
|
|
337
|
+
Read stored patterns from learning.db and format for display.
|
|
338
|
+
|
|
339
|
+
Returns a user-friendly summary suitable for CLI output or
|
|
340
|
+
dashboard rendering. If no learning_db, returns empty structure.
|
|
341
|
+
"""
|
|
342
|
+
if self.learning_db is None:
|
|
343
|
+
return {
|
|
344
|
+
"sequences": [],
|
|
345
|
+
"temporal": {},
|
|
346
|
+
"summary": "No learning database connected",
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
try:
|
|
350
|
+
seq_patterns = self.learning_db.get_workflow_patterns(
|
|
351
|
+
pattern_type="sequence",
|
|
352
|
+
)
|
|
353
|
+
temp_patterns = self.learning_db.get_workflow_patterns(
|
|
354
|
+
pattern_type="temporal",
|
|
355
|
+
)
|
|
356
|
+
except Exception as e:
|
|
357
|
+
logger.error("Failed to read workflow patterns: %s", e)
|
|
358
|
+
return {
|
|
359
|
+
"sequences": [],
|
|
360
|
+
"temporal": {},
|
|
361
|
+
"summary": "Error reading patterns",
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
# Parse stored JSON values back into structured data
|
|
365
|
+
sequences = []
|
|
366
|
+
for p in seq_patterns:
|
|
367
|
+
try:
|
|
368
|
+
value = json.loads(p.get("pattern_value", "{}"))
|
|
369
|
+
sequences.append({
|
|
370
|
+
"sequence": value.get("sequence", []),
|
|
371
|
+
"support": p.get("confidence", 0.0),
|
|
372
|
+
"count": p.get("evidence_count", 0),
|
|
373
|
+
"length": len(value.get("sequence", [])),
|
|
374
|
+
})
|
|
375
|
+
except (json.JSONDecodeError, TypeError):
|
|
376
|
+
continue
|
|
377
|
+
|
|
378
|
+
temporal = {}
|
|
379
|
+
for p in temp_patterns:
|
|
380
|
+
try:
|
|
381
|
+
value = json.loads(p.get("pattern_value", "{}"))
|
|
382
|
+
bucket_name = p.get("pattern_key", "unknown")
|
|
383
|
+
temporal[bucket_name] = value
|
|
384
|
+
except (json.JSONDecodeError, TypeError):
|
|
385
|
+
continue
|
|
386
|
+
|
|
387
|
+
# Build a natural language summary
|
|
388
|
+
summary_parts = []
|
|
389
|
+
if sequences:
|
|
390
|
+
top = sequences[0]
|
|
391
|
+
seq_str = " -> ".join(top["sequence"])
|
|
392
|
+
summary_parts.append(
|
|
393
|
+
f"Most common workflow: {seq_str} "
|
|
394
|
+
f"(support={top['support']:.0%}, seen {top['count']}x)"
|
|
395
|
+
)
|
|
396
|
+
if temporal:
|
|
397
|
+
for bucket, info in sorted(temporal.items()):
|
|
398
|
+
dominant = info.get("dominant_activity", "?")
|
|
399
|
+
conf = info.get("confidence", 0)
|
|
400
|
+
summary_parts.append(
|
|
401
|
+
f" {bucket}: mostly {dominant} ({conf:.0%} confidence)"
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
return {
|
|
405
|
+
"sequences": sequences,
|
|
406
|
+
"temporal": temporal,
|
|
407
|
+
"summary": "\n".join(summary_parts) if summary_parts else "No patterns discovered yet",
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
# ======================================================================
|
|
411
|
+
# Internal helpers
|
|
412
|
+
# ======================================================================
|
|
413
|
+
|
|
414
|
+
def _classify_activity(self, content: str) -> str:
|
|
415
|
+
"""
|
|
416
|
+
Classify a memory's content into one of the 7 activity types.
|
|
417
|
+
|
|
418
|
+
Scores each type by counting word-boundary keyword matches in the
|
|
419
|
+
content. Returns the highest-scoring type, or 'unknown' if no
|
|
420
|
+
keywords matched.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
content: Raw memory content string.
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
Activity type string (e.g. 'code', 'test') or 'unknown'.
|
|
427
|
+
"""
|
|
428
|
+
if not content:
|
|
429
|
+
return "unknown"
|
|
430
|
+
|
|
431
|
+
scores: Counter = Counter()
|
|
432
|
+
|
|
433
|
+
for act_type, pattern in _KEYWORD_PATTERNS:
|
|
434
|
+
if pattern.search(content):
|
|
435
|
+
scores[act_type] += 1
|
|
436
|
+
|
|
437
|
+
if not scores:
|
|
438
|
+
return "unknown"
|
|
439
|
+
|
|
440
|
+
# Return the type with the highest score
|
|
441
|
+
best_type, _best_count = scores.most_common(1)[0]
|
|
442
|
+
return best_type
|
|
443
|
+
|
|
444
|
+
def _hour_to_bucket(self, hour: int) -> str:
|
|
445
|
+
"""
|
|
446
|
+
Map an hour (0-23) to a time-of-day bucket.
|
|
447
|
+
|
|
448
|
+
Buckets:
|
|
449
|
+
morning = 6-11
|
|
450
|
+
afternoon = 12-17
|
|
451
|
+
evening = 18-23
|
|
452
|
+
night = 0-5
|
|
453
|
+
"""
|
|
454
|
+
if 6 <= hour <= 11:
|
|
455
|
+
return "morning"
|
|
456
|
+
elif 12 <= hour <= 17:
|
|
457
|
+
return "afternoon"
|
|
458
|
+
elif 18 <= hour <= 23:
|
|
459
|
+
return "evening"
|
|
460
|
+
else: # 0-5
|
|
461
|
+
return "night"
|
|
462
|
+
|
|
463
|
+
def _parse_hour(self, timestamp: Optional[str]) -> Optional[int]:
|
|
464
|
+
"""
|
|
465
|
+
Extract the hour from a timestamp string.
|
|
466
|
+
|
|
467
|
+
Handles multiple formats gracefully:
|
|
468
|
+
- ISO 8601: '2026-02-14T09:30:00'
|
|
469
|
+
- SQLite: '2026-02-14 09:30:00'
|
|
470
|
+
- Date only: '2026-02-14' (returns None -- no time info)
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
Hour as int (0-23), or None if parsing fails.
|
|
474
|
+
"""
|
|
475
|
+
if not timestamp:
|
|
476
|
+
return None
|
|
477
|
+
|
|
478
|
+
# Try ISO format first (handles both 'T' and space separator)
|
|
479
|
+
for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S",
|
|
480
|
+
"%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%d %H:%M:%S.%f"):
|
|
481
|
+
try:
|
|
482
|
+
dt = datetime.strptime(timestamp, fmt)
|
|
483
|
+
return dt.hour
|
|
484
|
+
except (ValueError, TypeError):
|
|
485
|
+
continue
|
|
486
|
+
|
|
487
|
+
# Last resort: fromisoformat (Python 3.7+), handles wider range
|
|
488
|
+
try:
|
|
489
|
+
dt = datetime.fromisoformat(timestamp)
|
|
490
|
+
return dt.hour
|
|
491
|
+
except (ValueError, TypeError):
|
|
492
|
+
pass
|
|
493
|
+
|
|
494
|
+
logger.debug("Could not parse timestamp: %s", timestamp)
|
|
495
|
+
return None
|
|
496
|
+
|
|
497
|
+
def _fetch_memories(self, limit: int = 500) -> List[dict]:
|
|
498
|
+
"""
|
|
499
|
+
Read recent memories from memory.db (read-only).
|
|
500
|
+
|
|
501
|
+
Fetches id, content, created_at, and project_name ordered
|
|
502
|
+
chronologically (ASC) so the activity stream preserves the
|
|
503
|
+
user's actual workflow order.
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
limit: Maximum number of memories to fetch.
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
List of dicts with keys: id, content, created_at, project_name.
|
|
510
|
+
Returns empty list on any error.
|
|
511
|
+
"""
|
|
512
|
+
if not self.memory_db_path.exists():
|
|
513
|
+
logger.warning("memory.db not found at %s", self.memory_db_path)
|
|
514
|
+
return []
|
|
515
|
+
|
|
516
|
+
try:
|
|
517
|
+
conn = sqlite3.connect(str(self.memory_db_path), timeout=5)
|
|
518
|
+
conn.row_factory = sqlite3.Row
|
|
519
|
+
# Read-only pragmas
|
|
520
|
+
conn.execute("PRAGMA query_only=ON")
|
|
521
|
+
cursor = conn.cursor()
|
|
522
|
+
|
|
523
|
+
cursor.execute(
|
|
524
|
+
"""
|
|
525
|
+
SELECT id, content, created_at, project_name
|
|
526
|
+
FROM memories
|
|
527
|
+
ORDER BY created_at ASC
|
|
528
|
+
LIMIT ?
|
|
529
|
+
""",
|
|
530
|
+
(limit,),
|
|
531
|
+
)
|
|
532
|
+
rows = cursor.fetchall()
|
|
533
|
+
return [dict(row) for row in rows]
|
|
534
|
+
except sqlite3.Error as e:
|
|
535
|
+
logger.error("Failed to fetch memories: %s", e)
|
|
536
|
+
return []
|
|
537
|
+
finally:
|
|
538
|
+
try:
|
|
539
|
+
conn.close()
|
|
540
|
+
except Exception:
|
|
541
|
+
pass
|
|
542
|
+
|
|
543
|
+
def _persist_patterns(
|
|
544
|
+
self,
|
|
545
|
+
sequences: List[dict],
|
|
546
|
+
temporal: Dict[str, dict],
|
|
547
|
+
) -> None:
|
|
548
|
+
"""
|
|
549
|
+
Clear old patterns and store newly mined ones in learning.db.
|
|
550
|
+
|
|
551
|
+
Uses LearningDB.clear_workflow_patterns() then store_workflow_pattern()
|
|
552
|
+
for each discovered pattern. This is idempotent -- safe to call
|
|
553
|
+
repeatedly.
|
|
554
|
+
"""
|
|
555
|
+
if self.learning_db is None:
|
|
556
|
+
return
|
|
557
|
+
|
|
558
|
+
try:
|
|
559
|
+
# Clear previous patterns (idempotent re-mine)
|
|
560
|
+
self.learning_db.clear_workflow_patterns(pattern_type="sequence")
|
|
561
|
+
self.learning_db.clear_workflow_patterns(pattern_type="temporal")
|
|
562
|
+
|
|
563
|
+
# Store sequence patterns
|
|
564
|
+
for pat in sequences:
|
|
565
|
+
pattern_key = " -> ".join(pat["sequence"])
|
|
566
|
+
pattern_value = json.dumps({
|
|
567
|
+
"sequence": pat["sequence"],
|
|
568
|
+
"count": pat["count"],
|
|
569
|
+
})
|
|
570
|
+
self.learning_db.store_workflow_pattern(
|
|
571
|
+
pattern_type="sequence",
|
|
572
|
+
pattern_key=pattern_key,
|
|
573
|
+
pattern_value=pattern_value,
|
|
574
|
+
confidence=pat["support"],
|
|
575
|
+
evidence_count=pat["count"],
|
|
576
|
+
metadata={"length": pat["length"]},
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
# Store temporal patterns
|
|
580
|
+
for bucket_name, info in temporal.items():
|
|
581
|
+
pattern_value = json.dumps(info)
|
|
582
|
+
self.learning_db.store_workflow_pattern(
|
|
583
|
+
pattern_type="temporal",
|
|
584
|
+
pattern_key=bucket_name,
|
|
585
|
+
pattern_value=pattern_value,
|
|
586
|
+
confidence=info.get("confidence", 0.0),
|
|
587
|
+
evidence_count=info.get("evidence_count", 0),
|
|
588
|
+
metadata={"dominant_activity": info.get("dominant_activity")},
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
total_stored = len(sequences) + len(temporal)
|
|
592
|
+
logger.info("Persisted %d workflow patterns to learning.db", total_stored)
|
|
593
|
+
|
|
594
|
+
# Update engagement metric
|
|
595
|
+
try:
|
|
596
|
+
self.learning_db.increment_engagement(
|
|
597
|
+
"patterns_updated",
|
|
598
|
+
count=total_stored,
|
|
599
|
+
)
|
|
600
|
+
except Exception:
|
|
601
|
+
pass # Engagement tracking is best-effort
|
|
602
|
+
|
|
603
|
+
except Exception as e:
|
|
604
|
+
logger.error("Failed to persist workflow patterns: %s", e)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
# ======================================================================
|
|
608
|
+
# Standalone execution (for CLI: python3 workflow_pattern_miner.py)
|
|
609
|
+
# ======================================================================
|
|
610
|
+
|
|
611
|
+
def main():
|
|
612
|
+
"""Run workflow mining from CLI and print results."""
|
|
613
|
+
import sys
|
|
614
|
+
|
|
615
|
+
logging.basicConfig(
|
|
616
|
+
level=logging.INFO,
|
|
617
|
+
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
# Try to get LearningDB
|
|
621
|
+
learning_db = None
|
|
622
|
+
try:
|
|
623
|
+
# When run from src/learning/ directory or installed path
|
|
624
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
|
625
|
+
from learning_db import LearningDB
|
|
626
|
+
learning_db = LearningDB()
|
|
627
|
+
except ImportError:
|
|
628
|
+
logger.warning("LearningDB not available -- patterns will not be persisted")
|
|
629
|
+
|
|
630
|
+
miner = WorkflowPatternMiner(learning_db=learning_db)
|
|
631
|
+
results = miner.mine_all()
|
|
632
|
+
|
|
633
|
+
# Pretty-print sequences
|
|
634
|
+
sequences = results.get("sequences", [])
|
|
635
|
+
if sequences:
|
|
636
|
+
print(f"\n{'='*60}")
|
|
637
|
+
print(f" Workflow Sequences ({len(sequences)} patterns)")
|
|
638
|
+
print(f"{'='*60}")
|
|
639
|
+
for i, pat in enumerate(sequences, 1):
|
|
640
|
+
seq_str = " -> ".join(pat["sequence"])
|
|
641
|
+
print(f" {i:2d}. {seq_str}")
|
|
642
|
+
print(f" support={pat['support']:.1%} count={pat['count']} length={pat['length']}")
|
|
643
|
+
else:
|
|
644
|
+
print("\n No workflow sequences found.")
|
|
645
|
+
|
|
646
|
+
# Pretty-print temporal patterns
|
|
647
|
+
temporal = results.get("temporal", {})
|
|
648
|
+
if temporal:
|
|
649
|
+
print(f"\n{'='*60}")
|
|
650
|
+
print(f" Temporal Patterns")
|
|
651
|
+
print(f"{'='*60}")
|
|
652
|
+
for bucket in ("morning", "afternoon", "evening", "night"):
|
|
653
|
+
if bucket in temporal:
|
|
654
|
+
info = temporal[bucket]
|
|
655
|
+
print(f" {bucket:>10s}: {info['dominant_activity']:<14s} "
|
|
656
|
+
f"confidence={info['confidence']:.0%} "
|
|
657
|
+
f"evidence={info['evidence_count']}")
|
|
658
|
+
else:
|
|
659
|
+
print("\n No temporal patterns found (need >= 5 memories per time bucket).")
|
|
660
|
+
|
|
661
|
+
print()
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
if __name__ == "__main__":
|
|
665
|
+
main()
|