conceptkernel 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cklib/prov.py ADDED
@@ -0,0 +1,631 @@
1
+ """PROV-O session and provenance for any Concept Kernel.
2
+
3
+ Generic provenance recording — no domain-specific code. Any kernel can
4
+ import ProvChain to track sessions, record actions with before/after state,
5
+ and run verified-action loops (execute -> verify -> record -> retry).
6
+
7
+ Storage: storage/sessions/{session_id}.json (DATA loop)
8
+ Retention: configurable via retention_hours (default 48)
9
+
10
+ Usage:
11
+ from cklib.prov import ProvChain, verified_action
12
+
13
+ prov = ProvChain(
14
+ storage_dir="concepts/MyKernel",
15
+ label="deploy-widgets",
16
+ kernel_urn="ckp://Kernel#MyKernel:v1.0",
17
+ )
18
+
19
+ prov.record_action(
20
+ action_type="widget.deploy",
21
+ outcome="verified",
22
+ kernel="MyKernel",
23
+ intent={"target": "prod"},
24
+ evidence={"deployed": True},
25
+ )
26
+
27
+ verified_action(
28
+ action_fn=lambda: do_deploy(),
29
+ verify_fn=lambda: check_deployed(),
30
+ prov=prov,
31
+ action_type="widget.deploy",
32
+ kernel="MyKernel",
33
+ )
34
+
35
+ prov.close()
36
+ """
37
+
38
+ import glob
39
+ import json
40
+ import os
41
+ import sys
42
+ import time as _time
43
+ import uuid
44
+ from dataclasses import dataclass, field, asdict
45
+ from datetime import datetime, timedelta, timezone
46
+ from typing import Callable
47
+
48
+ from cklib.urn import build_action_urn, build_kernel_urn
49
+
50
+ __all__ = [
51
+ "ProvChain",
52
+ "Session",
53
+ "ActionRecord",
54
+ "verified_action",
55
+ "list_sessions",
56
+ "get_session",
57
+ ]
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # JSON serialisation helper — handles datetime, sets, dataclass fallback
62
+ # ---------------------------------------------------------------------------
63
+
64
+ def _json_default(obj):
65
+ """Custom JSON serialiser for dataclass fields."""
66
+ if isinstance(obj, datetime):
67
+ return obj.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
68
+ if isinstance(obj, set):
69
+ return sorted(obj)
70
+ raise TypeError("Object of type %s is not JSON serializable" % type(obj).__name__)
71
+
72
+
73
+ def _to_dict(obj):
74
+ """Convert a dataclass to a JSON-safe dict, stripping None values."""
75
+ raw = asdict(obj)
76
+ return _strip_none(raw)
77
+
78
+
79
+ def _strip_none(d):
80
+ """Recursively remove None-valued keys from a dict."""
81
+ if isinstance(d, dict):
82
+ return {k: _strip_none(v) for k, v in d.items() if v is not None}
83
+ if isinstance(d, list):
84
+ return [_strip_none(v) for v in d]
85
+ return d
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # ActionRecord
90
+ # ---------------------------------------------------------------------------
91
+
92
+ @dataclass
93
+ class ActionRecord:
94
+ """A single recorded action within a provenance session.
95
+
96
+ Fields follow PROV-O: the action was generated by a kernel,
97
+ optionally via an edge predicate (COMPOSES / EXTENDS / REQUIRES).
98
+ intent captures what the action wanted; evidence captures what happened.
99
+ """
100
+ seq: int
101
+ action_type: str
102
+ outcome: str # "verified" | "failed" | "retry" | "skipped"
103
+ urn: str # CKP Action URN
104
+ kernel: str # CKP Kernel URN of who performed this
105
+ via_edge: str | None = None # "COMPOSES" | "EXTENDS" | "REQUIRES" | None
106
+ intent: dict | None = None # what the action wanted to achieve
107
+ evidence: dict | None = None # what actually happened
108
+ before_state: dict | None = None
109
+ after_state: dict | None = None
110
+ diff_summary: str | None = None
111
+ attempt: int = 1
112
+ generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
113
+ note: str = ""
114
+
115
+
116
+ # ---------------------------------------------------------------------------
117
+ # Session
118
+ # ---------------------------------------------------------------------------
119
+
120
+ @dataclass
121
+ class Session:
122
+ """A provenance session — one per ProvChain lifecycle.
123
+
124
+ session_id is a short UUID prefix for human-friendly reference.
125
+ All timestamps are UTC.
126
+ """
127
+ session_id: str
128
+ urn: str
129
+ label: str
130
+ kernel_action: str # which action initiated this session
131
+ started_at: datetime
132
+ ended_at: datetime | None = None
133
+ was_associated_with: str = "" # kernel URN
134
+ duration_ms: int | None = None
135
+ outcome: str = "in_progress" # "complete" | "failed" | "partial" | "in_progress"
136
+ kernels_involved: list = field(default_factory=list)
137
+ actions: list = field(default_factory=list) # list[ActionRecord]
138
+
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # ProvChain — the session recorder
142
+ # ---------------------------------------------------------------------------
143
+
144
+ class ProvChain:
145
+ """Generic PROV-O session recorder for any Concept Kernel.
146
+
147
+ Creates a session file in storage/sessions/ and atomically overwrites
148
+ it on every recorded action. Automatically enforces retention policy.
149
+
150
+ Usage:
151
+ from cklib.prov import ProvChain
152
+
153
+ prov = ProvChain(
154
+ storage_dir="concepts/MyKernel",
155
+ label="my-operation",
156
+ kernel_urn="ckp://Kernel#MyKernel:v1.0",
157
+ )
158
+ prov.record_action(
159
+ action_type="process.data",
160
+ outcome="verified",
161
+ kernel="MyKernel",
162
+ intent={"target": "file.txt"},
163
+ evidence={"processed": True},
164
+ )
165
+ prov.close()
166
+ """
167
+
168
+ def __init__(
169
+ self,
170
+ storage_dir: str,
171
+ label: str,
172
+ kernel_urn: str,
173
+ kernel_action: str = "",
174
+ retention_hours: int = 48,
175
+ ):
176
+ self.storage_dir = storage_dir
177
+ self.label = label
178
+ self.kernel_urn = kernel_urn
179
+ self.kernel_action = kernel_action
180
+ self.retention_hours = retention_hours
181
+
182
+ # Derive kernel short-name from URN for involved-set
183
+ self._kernel_name = self._name_from_urn(kernel_urn)
184
+ self._kernels_involved: set = {self._kernel_name}
185
+
186
+ self.session_id = str(uuid.uuid4())[:8]
187
+ self._started_at = datetime.now(timezone.utc)
188
+ self._seq = 0
189
+ self._actions: list[ActionRecord] = []
190
+
191
+ # Session dir: storage/sessions/
192
+ self._sessions_dir = os.path.join(storage_dir, "storage", "sessions")
193
+ os.makedirs(self._sessions_dir, exist_ok=True)
194
+ self._file = os.path.join(self._sessions_dir, "%s.json" % self.session_id)
195
+
196
+ self._enforce_retention()
197
+
198
+ # Initial flush — creates the session file immediately
199
+ self._flush()
200
+
201
+ # ------------------------------------------------------------------
202
+ # Public API
203
+ # ------------------------------------------------------------------
204
+
205
+ def record_action(
206
+ self,
207
+ action_type: str,
208
+ outcome: str,
209
+ kernel: str = "",
210
+ via_edge: str | None = None,
211
+ intent: dict | None = None,
212
+ evidence: dict | None = None,
213
+ before_state: dict | None = None,
214
+ after_state: dict | None = None,
215
+ attempt: int = 1,
216
+ note: str = "",
217
+ ) -> ActionRecord:
218
+ """Record an action in this session.
219
+
220
+ Args:
221
+ action_type: dotted action name (e.g. "task.create", "deploy.apply")
222
+ outcome: "verified" | "failed" | "retry" | "skipped"
223
+ kernel: short kernel name (e.g. "CK.Task") — URN built automatically
224
+ via_edge: edge predicate if action crosses a kernel boundary
225
+ intent: dict describing what the action wanted
226
+ evidence: dict describing what actually happened
227
+ before_state: optional state snapshot before the action
228
+ after_state: optional state snapshot after the action
229
+ attempt: attempt number (1-based, for retries)
230
+ note: human-readable note
231
+
232
+ Returns:
233
+ The ActionRecord that was created.
234
+ """
235
+ self._seq += 1
236
+
237
+ # Resolve kernel name and URN
238
+ if not kernel:
239
+ kernel = self._kernel_name
240
+ self._kernels_involved.add(kernel)
241
+
242
+ ts_ms = int(_time.time() * 1000)
243
+ action_urn = build_action_urn(kernel, action_type, ts_ms)
244
+
245
+ # Build kernel URN if not already one
246
+ if kernel.startswith("ckp://"):
247
+ kernel_urn = kernel
248
+ else:
249
+ kernel_urn = build_kernel_urn(kernel)
250
+
251
+ # Compute diff summary if both states provided
252
+ diff_summary = None
253
+ if before_state and after_state:
254
+ diff_summary = _compute_state_diff(before_state, after_state)
255
+
256
+ record = ActionRecord(
257
+ seq=self._seq,
258
+ action_type=action_type,
259
+ outcome=outcome,
260
+ urn=action_urn,
261
+ kernel=kernel_urn,
262
+ via_edge=via_edge,
263
+ intent=intent,
264
+ evidence=evidence,
265
+ before_state=before_state,
266
+ after_state=after_state,
267
+ diff_summary=diff_summary,
268
+ attempt=attempt,
269
+ generated_at=datetime.now(timezone.utc),
270
+ note=note,
271
+ )
272
+ self._actions.append(record)
273
+ self._flush()
274
+ self._emit_live(record)
275
+ return record
276
+
277
+ def close(self, outcome: str = "") -> Session:
278
+ """Finalize the session.
279
+
280
+ If outcome is not specified, it is derived from action results:
281
+ - all verified -> "complete"
282
+ - any failed -> "partial" (if also some verified) or "failed"
283
+ - no actions -> "complete"
284
+ """
285
+ if not outcome:
286
+ outcome = self._derive_outcome()
287
+ session = self._flush(
288
+ ended_at=datetime.now(timezone.utc),
289
+ outcome=outcome,
290
+ )
291
+ return session
292
+
293
+ @property
294
+ def file_path(self) -> str:
295
+ """Path to the session JSON file."""
296
+ return self._file
297
+
298
+ @property
299
+ def action_count(self) -> int:
300
+ """Number of actions recorded so far."""
301
+ return len(self._actions)
302
+
303
+ # ------------------------------------------------------------------
304
+ # Internal
305
+ # ------------------------------------------------------------------
306
+
307
+ def _flush(self, ended_at=None, outcome=None) -> Session:
308
+ """Atomic write of the full session to disk."""
309
+ duration_ms = None
310
+ if ended_at:
311
+ duration_ms = int((ended_at - self._started_at).total_seconds() * 1000)
312
+
313
+ session = Session(
314
+ session_id=self.session_id,
315
+ urn=build_action_urn(
316
+ self._kernel_name,
317
+ "session",
318
+ int(self._started_at.timestamp() * 1000),
319
+ ),
320
+ label=self.label,
321
+ kernel_action=self.kernel_action,
322
+ started_at=self._started_at,
323
+ ended_at=ended_at,
324
+ was_associated_with=self.kernel_urn,
325
+ duration_ms=duration_ms,
326
+ outcome=outcome or "in_progress",
327
+ kernels_involved=sorted(self._kernels_involved),
328
+ actions=self._actions,
329
+ )
330
+
331
+ data = _to_dict(session)
332
+ content = json.dumps(data, indent=2, default=_json_default)
333
+
334
+ # Atomic write via temp file + rename
335
+ tmp = self._file + ".tmp"
336
+ with open(tmp, "w") as f:
337
+ f.write(content)
338
+ os.replace(tmp, self._file)
339
+
340
+ return session
341
+
342
+ def _derive_outcome(self) -> str:
343
+ """Derive session outcome from recorded actions."""
344
+ if not self._actions:
345
+ return "complete"
346
+ n_failed = sum(1 for a in self._actions if a.outcome == "failed")
347
+ n_verified = sum(1 for a in self._actions if a.outcome == "verified")
348
+ if n_failed > 0 and n_verified == 0:
349
+ return "failed"
350
+ if n_failed > 0:
351
+ return "partial"
352
+ return "complete"
353
+
354
+ def _emit_live(self, action: ActionRecord):
355
+ """Emit a live provenance line to stderr for observability."""
356
+ ts = datetime.now(timezone.utc).strftime("%H:%M:%S.%f")[:12]
357
+
358
+ kernel_name = action.kernel or self._kernel_name
359
+ if "Kernel#" in kernel_name:
360
+ kernel_name = kernel_name.split("Kernel#")[1].split(":")[0]
361
+
362
+ sym = {
363
+ "verified": "+",
364
+ "failed": "x",
365
+ "retry": "~",
366
+ "skipped": "-",
367
+ }.get(action.outcome, "?")
368
+
369
+ detail = action.diff_summary or action.note or ""
370
+
371
+ attempt_str = ""
372
+ if action.attempt > 1:
373
+ attempt_str = " (attempt %d)" % action.attempt
374
+
375
+ print(
376
+ "[%s] %-24s %-24s %s %-8s %s%s" % (
377
+ ts, kernel_name, action.action_type,
378
+ sym, action.outcome, detail, attempt_str,
379
+ ),
380
+ file=sys.stderr,
381
+ )
382
+
383
+ def _enforce_retention(self):
384
+ """Delete session files older than retention_hours."""
385
+ cutoff = datetime.now(timezone.utc) - timedelta(hours=self.retention_hours)
386
+ for f in glob.glob(os.path.join(self._sessions_dir, "*.json")):
387
+ try:
388
+ mtime_ts = os.path.getmtime(f)
389
+ mtime = datetime.fromtimestamp(mtime_ts, tz=timezone.utc)
390
+ if mtime < cutoff:
391
+ os.remove(f)
392
+ except OSError:
393
+ pass
394
+
395
+ @staticmethod
396
+ def _name_from_urn(urn: str) -> str:
397
+ """Extract kernel short name from a CKP Kernel URN."""
398
+ # ckp://Kernel#MyKernel:v1.0 -> MyKernel
399
+ if "Kernel#" in urn:
400
+ name = urn.split("Kernel#")[1]
401
+ if ":" in name:
402
+ name = name.split(":")[0]
403
+ return name
404
+ return urn
405
+
406
+
407
+ # ---------------------------------------------------------------------------
408
+ # Standalone helpers
409
+ # ---------------------------------------------------------------------------
410
+
411
+ def _compute_state_diff(before: dict, after: dict) -> str:
412
+ """Compute a human-readable diff summary between two state dicts.
413
+
414
+ Compares top-level keys. For nested dicts, reports changed keys.
415
+ Returns a pipe-separated summary string.
416
+ """
417
+ parts = []
418
+ all_keys = set(list(before.keys()) + list(after.keys()))
419
+ for key in sorted(all_keys):
420
+ bval = before.get(key)
421
+ aval = after.get(key)
422
+ if bval != aval:
423
+ if isinstance(bval, (int, float, str, bool)) or isinstance(aval, (int, float, str, bool)):
424
+ parts.append("%s: %s -> %s" % (key, bval, aval))
425
+ else:
426
+ parts.append("%s: changed" % key)
427
+ return " | ".join(parts) if parts else "no change"
428
+
429
+
430
+ def list_sessions(storage_dir: str) -> list[dict]:
431
+ """Load all sessions from storage/sessions/, newest first.
432
+
433
+ Args:
434
+ storage_dir: the CK root directory (e.g. concepts/MyKernel/)
435
+
436
+ Returns:
437
+ List of session dicts, sorted by started_at descending.
438
+ """
439
+ sessions_dir = os.path.join(storage_dir, "storage", "sessions")
440
+ if not os.path.isdir(sessions_dir):
441
+ return []
442
+
443
+ sessions = []
444
+ for fpath in glob.glob(os.path.join(sessions_dir, "*.json")):
445
+ if fpath.endswith(".tmp"):
446
+ continue
447
+ try:
448
+ with open(fpath) as f:
449
+ data = json.load(f)
450
+ sessions.append(data)
451
+ except Exception:
452
+ continue
453
+
454
+ sessions.sort(
455
+ key=lambda s: s.get("started_at", ""),
456
+ reverse=True,
457
+ )
458
+ return sessions
459
+
460
+
461
+ def get_session(storage_dir: str, session_id: str) -> dict | None:
462
+ """Load a session by ID or prefix match.
463
+
464
+ Args:
465
+ storage_dir: the CK root directory
466
+ session_id: full or prefix of session_id
467
+
468
+ Returns:
469
+ Session dict or None.
470
+ """
471
+ sessions_dir = os.path.join(storage_dir, "storage", "sessions")
472
+ if not os.path.isdir(sessions_dir):
473
+ return None
474
+
475
+ # Exact match
476
+ exact = os.path.join(sessions_dir, "%s.json" % session_id)
477
+ if os.path.exists(exact):
478
+ with open(exact) as f:
479
+ return json.load(f)
480
+
481
+ # Prefix match
482
+ for fpath in glob.glob(os.path.join(sessions_dir, "%s*.json" % session_id)):
483
+ if fpath.endswith(".tmp"):
484
+ continue
485
+ try:
486
+ with open(fpath) as f:
487
+ return json.load(f)
488
+ except Exception:
489
+ continue
490
+ return None
491
+
492
+
493
+ # ---------------------------------------------------------------------------
494
+ # Verified action helper
495
+ # ---------------------------------------------------------------------------
496
+
497
+ def verified_action(
498
+ action_fn: Callable,
499
+ verify_fn: Callable[[], bool],
500
+ prov: ProvChain,
501
+ action_type: str,
502
+ kernel: str,
503
+ via_edge: str | None = None,
504
+ intent: dict | None = None,
505
+ max_attempts: int = 3,
506
+ settle_delay: float = 0.5,
507
+ capture_state_fn: Callable | None = None,
508
+ ) -> bool:
509
+ """Execute an action, verify it worked, record provenance.
510
+
511
+ Pattern: execute -> wait -> verify -> record outcome -> retry if needed.
512
+
513
+ This is the GENERIC version of YB.CLI's verified_focus_space /
514
+ verified_move_window / verified_bar_binding pattern. The caller provides:
515
+
516
+ - action_fn: callable that performs the action (no return needed)
517
+ - verify_fn: callable returning True if the action succeeded
518
+ - capture_state_fn: optional callable returning a dict snapshot of state
519
+
520
+ On success, records outcome="verified". On failure after all attempts,
521
+ records outcome="failed". Returns True/False.
522
+
523
+ Args:
524
+ action_fn: the action to execute
525
+ verify_fn: returns True if the action succeeded
526
+ prov: ProvChain session to record into
527
+ action_type: dotted action name
528
+ kernel: kernel performing the action
529
+ via_edge: edge predicate if crossing kernel boundary
530
+ intent: dict describing what the action wants
531
+ max_attempts: max retry count (default 3)
532
+ settle_delay: seconds to wait after action before verify (default 0.5)
533
+ capture_state_fn: optional callable returning state dict
534
+
535
+ Returns:
536
+ True if verified within max_attempts, False otherwise.
537
+ """
538
+ import time
539
+
540
+ for attempt in range(1, max_attempts + 1):
541
+ # Capture before state
542
+ before_state = None
543
+ if capture_state_fn:
544
+ try:
545
+ before_state = capture_state_fn()
546
+ except Exception:
547
+ before_state = None
548
+
549
+ # Execute
550
+ try:
551
+ action_fn()
552
+ except Exception as e:
553
+ prov.record_action(
554
+ action_type=action_type,
555
+ outcome="failed",
556
+ kernel=kernel,
557
+ via_edge=via_edge,
558
+ intent=intent,
559
+ evidence={"error": str(e), "exception_type": type(e).__name__},
560
+ before_state=before_state,
561
+ attempt=attempt,
562
+ note="action raised: %s" % str(e),
563
+ )
564
+ if attempt < max_attempts:
565
+ time.sleep(settle_delay)
566
+ continue
567
+ return False
568
+
569
+ # Settle
570
+ if settle_delay > 0:
571
+ time.sleep(settle_delay)
572
+
573
+ # Capture after state
574
+ after_state = None
575
+ if capture_state_fn:
576
+ try:
577
+ after_state = capture_state_fn()
578
+ except Exception:
579
+ after_state = None
580
+
581
+ # Verify
582
+ try:
583
+ verified = verify_fn()
584
+ except Exception as e:
585
+ prov.record_action(
586
+ action_type=action_type,
587
+ outcome="failed",
588
+ kernel=kernel,
589
+ via_edge=via_edge,
590
+ intent=intent,
591
+ evidence={"verify_error": str(e)},
592
+ before_state=before_state,
593
+ after_state=after_state,
594
+ attempt=attempt,
595
+ note="verify raised: %s" % str(e),
596
+ )
597
+ if attempt < max_attempts:
598
+ time.sleep(settle_delay)
599
+ continue
600
+ return False
601
+
602
+ if verified:
603
+ prov.record_action(
604
+ action_type=action_type,
605
+ outcome="verified",
606
+ kernel=kernel,
607
+ via_edge=via_edge,
608
+ intent=intent,
609
+ evidence={"verified": True},
610
+ before_state=before_state,
611
+ after_state=after_state,
612
+ attempt=attempt,
613
+ )
614
+ return True
615
+ else:
616
+ prov.record_action(
617
+ action_type=action_type,
618
+ outcome="failed" if attempt == max_attempts else "retry",
619
+ kernel=kernel,
620
+ via_edge=via_edge,
621
+ intent=intent,
622
+ evidence={"verified": False},
623
+ before_state=before_state,
624
+ after_state=after_state,
625
+ attempt=attempt,
626
+ note="verification failed" if attempt == max_attempts else "retrying",
627
+ )
628
+ if attempt < max_attempts:
629
+ time.sleep(settle_delay)
630
+
631
+ return False