memuron 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. memuron/__init__.py +3 -0
  2. memuron/actions/__init__.py +12 -0
  3. memuron/actions/context.py +63 -0
  4. memuron/actions/helpers.py +88 -0
  5. memuron/actions/memory.py +340 -0
  6. memuron/actions/memory_write.py +290 -0
  7. memuron/actions/nodes.py +340 -0
  8. memuron/actions/registry.py +5 -0
  9. memuron/actions/runtime.py +37 -0
  10. memuron/actions/spaces_documents.py +720 -0
  11. memuron/actions/sync.py +155 -0
  12. memuron/application/__init__.py +1 -0
  13. memuron/application/api.py +206 -0
  14. memuron/application/app.py +103 -0
  15. memuron/application/capabilities.py +82 -0
  16. memuron/application/cli.py +35 -0
  17. memuron/application/config.py +176 -0
  18. memuron/application/mcp.py +44 -0
  19. memuron/application/mcp_oauth.py +290 -0
  20. memuron/application/registry.py +52 -0
  21. memuron/context.py +532 -0
  22. memuron/documents/__init__.py +1 -0
  23. memuron/documents/link_guardian.py +192 -0
  24. memuron/documents/linking.py +292 -0
  25. memuron/documents/parser.py +1152 -0
  26. memuron/documents/storage.py +151 -0
  27. memuron/documents/url_ingest.py +375 -0
  28. memuron/domain/__init__.py +1 -0
  29. memuron/domain/decoders.py +1 -0
  30. memuron/domain/encoders.py +185 -0
  31. memuron/domain/lifecycles.py +8 -0
  32. memuron/domain/limits.py +6 -0
  33. memuron/domain/representations.py +56 -0
  34. memuron/domain/schemas.py +581 -0
  35. memuron/domain/scope_filter.py +104 -0
  36. memuron/graphfs/__init__.py +1 -0
  37. memuron/graphfs/manual.py +635 -0
  38. memuron/graphfs/projection.py +578 -0
  39. memuron/graphfs/query.py +1782 -0
  40. memuron/graphfs/read_model.py +574 -0
  41. memuron/ingest/__init__.py +1 -0
  42. memuron/ingest/guardian.py +213 -0
  43. memuron/ingest/jobs.py +424 -0
  44. memuron/ingest/prompts.py +147 -0
  45. memuron/memory/__init__.py +1 -0
  46. memuron/memory/engine.py +35 -0
  47. memuron/memory/projections.py +452 -0
  48. memuron/memory/recipes.py +3247 -0
  49. memuron/persistence/__init__.py +1 -0
  50. memuron/persistence/db_pool.py +57 -0
  51. memuron/persistence/identity_store.py +918 -0
  52. memuron/persistence/store_helpers.py +16 -0
  53. memuron/search/__init__.py +1 -0
  54. memuron/search/fulltext.py +110 -0
  55. memuron/search/hybrid.py +284 -0
  56. memuron/search/pgvector.py +252 -0
  57. memuron/security/__init__.py +1 -0
  58. memuron/security/auth.py +143 -0
  59. memuron/security/auth_provider.py +119 -0
  60. memuron/security/authorization.py +53 -0
  61. memuron/security/clerk_scopes.py +94 -0
  62. memuron/security/clerk_webhooks.py +61 -0
  63. memuron/security/jwt_tokens.py +53 -0
  64. memuron/security/passwords.py +38 -0
  65. memuron/security/tenant.py +58 -0
  66. memuron/spaces/__init__.py +1 -0
  67. memuron/spaces/model.py +35 -0
  68. memuron/spaces/service.py +155 -0
  69. memuron/sync/__init__.py +25 -0
  70. memuron/sync/folder.py +828 -0
  71. memuron-0.1.1.dist-info/METADATA +242 -0
  72. memuron-0.1.1.dist-info/RECORD +74 -0
  73. memuron-0.1.1.dist-info/WHEEL +4 -0
  74. memuron-0.1.1.dist-info/entry_points.txt +4 -0
memuron/context.py ADDED
@@ -0,0 +1,532 @@
1
+ """Deterministic prompt context and profile assembly."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import Counter
6
+ from typing import Any
7
+
8
+ from artha_engine import ArthaEngine
9
+
10
+ from memuron.graphfs.read_model import (
11
+ containing_collection,
12
+ get_nodes,
13
+ list_entries,
14
+ node_edges,
15
+ space_edges,
16
+ )
17
+ from memuron.memory.recipes import collection_members, get_memory, search_memories
18
+ from memuron.security.tenant import org_scope_token
19
+
20
+ DEFAULT_CONTEXT_CHAR_BUDGET = 8_000
21
+ TOKEN_TO_CHAR_RATIO = 4
22
+ MAX_LINKS_PER_ITEM = 6
23
+ MAX_BREADCRUMB_DEPTH = 12
24
+
25
+
26
+ def _compact(text: object) -> str:
27
+ return " ".join(str(text or "").split())
28
+
29
+
30
+ def _space_token(scope: list[str]) -> str:
31
+ return next((token for token in scope if token.startswith("space.")), "")
32
+
33
+
34
+ def _char_budget(*, token_budget: int | None, char_budget: int | None) -> int:
35
+ if char_budget is not None:
36
+ return char_budget
37
+ if token_budget is not None:
38
+ return token_budget * TOKEN_TO_CHAR_RATIO
39
+ return DEFAULT_CONTEXT_CHAR_BUDGET
40
+
41
+
42
+ def _truncate(text: str, limit: int) -> tuple[str, bool]:
43
+ if len(text) <= limit:
44
+ return text, False
45
+ if limit <= 16:
46
+ return text[:limit], True
47
+ return text[: limit - 15].rstrip() + "\n[truncated]", True
48
+
49
+
50
+ def _node_space_token(memory: dict[str, Any], preferred_space_token: str | None) -> str | None:
51
+ scope = [str(token) for token in memory.get("scope") or []]
52
+ if preferred_space_token and preferred_space_token in scope:
53
+ return preferred_space_token
54
+ return _space_token(scope) or preferred_space_token
55
+
56
+
57
+ def _collection_breadcrumbs(
58
+ engine: ArthaEngine,
59
+ *,
60
+ node_id: str,
61
+ org_token: str,
62
+ space_token: str | None,
63
+ ) -> list[dict[str, str]]:
64
+ if not space_token:
65
+ return []
66
+ breadcrumbs: list[dict[str, str]] = []
67
+ current = node_id
68
+ seen = {node_id}
69
+ for _depth in range(MAX_BREADCRUMB_DEPTH):
70
+ parent_id = containing_collection(
71
+ engine.store,
72
+ current,
73
+ org_token=org_token,
74
+ space_token=space_token,
75
+ )
76
+ if not parent_id or parent_id in seen:
77
+ break
78
+ seen.add(parent_id)
79
+ nodes = get_nodes(
80
+ engine.store,
81
+ [parent_id],
82
+ org_token=org_token,
83
+ space_token=space_token,
84
+ )
85
+ display = str(nodes[0].get("display") or parent_id) if nodes else parent_id
86
+ breadcrumbs.append(
87
+ {
88
+ "id": parent_id,
89
+ "display": display,
90
+ "path": f"/spaces/{space_token}/collections/{parent_id}",
91
+ }
92
+ )
93
+ current = parent_id
94
+ breadcrumbs.reverse()
95
+ return breadcrumbs
96
+
97
+
98
+ def _semantic_links(
99
+ engine: ArthaEngine,
100
+ *,
101
+ node_id: str,
102
+ org_token: str,
103
+ space_token: str | None,
104
+ ) -> list[dict[str, str]]:
105
+ if not space_token:
106
+ return []
107
+ output: list[dict[str, str]] = []
108
+ for edge in node_edges(
109
+ engine.store,
110
+ {node_id},
111
+ org_token=org_token,
112
+ space_token=space_token,
113
+ ):
114
+ if str(edge.get("edge_type")) == "placement":
115
+ continue
116
+ source_id = str(edge.get("source_id"))
117
+ target_id = str(edge.get("target_id"))
118
+ output.append(
119
+ {
120
+ "id": str(edge.get("edge_id")),
121
+ "source_id": source_id,
122
+ "target_id": target_id,
123
+ "direction": "outbound" if source_id == node_id else "inbound",
124
+ "other_id": target_id if source_id == node_id else source_id,
125
+ "description": str(edge.get("description") or ""),
126
+ }
127
+ )
128
+ if len(output) >= MAX_LINKS_PER_ITEM:
129
+ break
130
+ return output
131
+
132
+
133
+ def _memory_item(
134
+ engine: ArthaEngine,
135
+ *,
136
+ result: dict[str, Any],
137
+ citation_id: str,
138
+ org_token: str,
139
+ preferred_space_token: str | None,
140
+ include_breadcrumbs: bool,
141
+ include_links: bool,
142
+ ) -> dict[str, Any] | None:
143
+ memory_id = str(result.get("id") or "")
144
+ if not memory_id:
145
+ return None
146
+ try:
147
+ memory = get_memory(engine, memory_id)
148
+ except KeyError:
149
+ return None
150
+ space_token = _node_space_token(memory, preferred_space_token)
151
+ breadcrumbs = (
152
+ _collection_breadcrumbs(
153
+ engine,
154
+ node_id=memory_id,
155
+ org_token=org_token,
156
+ space_token=space_token,
157
+ )
158
+ if include_breadcrumbs
159
+ else []
160
+ )
161
+ links = (
162
+ _semantic_links(
163
+ engine,
164
+ node_id=memory_id,
165
+ org_token=org_token,
166
+ space_token=space_token,
167
+ )
168
+ if include_links
169
+ else []
170
+ )
171
+ return {
172
+ "kind": "memory",
173
+ "citation_id": citation_id,
174
+ "memory_id": memory_id,
175
+ "score": float(result.get("semantic_score") or 0.0),
176
+ "type": memory.get("type") or memory.get("node_type") or "text",
177
+ "content": str(memory.get("content") or ""),
178
+ "scope": list(memory.get("scope") or []),
179
+ "metadata": {
180
+ "node_type": memory.get("node_type") or "text",
181
+ "encoding": memory.get("encoding") or "memory",
182
+ "payload": memory.get("payload") or {},
183
+ "timestamp": memory.get("timestamp"),
184
+ },
185
+ "breadcrumbs": breadcrumbs,
186
+ "links": links,
187
+ }
188
+
189
+
190
+ def _edge_item(
191
+ *,
192
+ result: dict[str, Any],
193
+ citation_id: str,
194
+ ) -> dict[str, Any]:
195
+ source = result.get("source") if isinstance(result.get("source"), dict) else {}
196
+ target = result.get("target") if isinstance(result.get("target"), dict) else {}
197
+ return {
198
+ "kind": "relationship",
199
+ "citation_id": citation_id,
200
+ "link_id": str(result.get("id") or ""),
201
+ "score": float(result.get("semantic_score") or 0.0),
202
+ "description": str(result.get("description") or ""),
203
+ "source": {
204
+ "id": str(source.get("id") or ""),
205
+ "content": str(source.get("content") or ""),
206
+ "scope": list(source.get("scope") or []),
207
+ },
208
+ "target": {
209
+ "id": str(target.get("id") or ""),
210
+ "content": str(target.get("content") or ""),
211
+ "scope": list(target.get("scope") or []),
212
+ },
213
+ }
214
+
215
+
216
+ def _citation(item: dict[str, Any]) -> dict[str, Any]:
217
+ if item["kind"] == "relationship":
218
+ return {
219
+ "citation_id": item["citation_id"],
220
+ "kind": "relationship",
221
+ "link_id": item["link_id"],
222
+ "score": item["score"],
223
+ "source_id": item["source"]["id"],
224
+ "target_id": item["target"]["id"],
225
+ "description": item["description"],
226
+ }
227
+ return {
228
+ "citation_id": item["citation_id"],
229
+ "kind": "memory",
230
+ "memory_id": item["memory_id"],
231
+ "score": item["score"],
232
+ "scope": item["scope"],
233
+ "breadcrumbs": item["breadcrumbs"],
234
+ "links": item["links"],
235
+ "metadata": item["metadata"],
236
+ }
237
+
238
+
239
+ def _item_block(item: dict[str, Any]) -> str:
240
+ if item["kind"] == "relationship":
241
+ source_preview = _compact(item["source"]["content"])[:220]
242
+ target_preview = _compact(item["target"]["content"])[:220]
243
+ heading = (
244
+ f"[{item['citation_id']}] Relationship {item['link_id']} "
245
+ f"score={item['score']:.4f}"
246
+ )
247
+ return "\n".join(
248
+ [
249
+ heading,
250
+ f"Description: {_compact(item['description'])}",
251
+ f"Source {item['source']['id']}: {source_preview}",
252
+ f"Target {item['target']['id']}: {target_preview}",
253
+ ]
254
+ )
255
+
256
+ lines = [
257
+ f"[{item['citation_id']}] Memory {item['memory_id']} score={item['score']:.4f}",
258
+ f"Type: {item['type']}",
259
+ ]
260
+ if item["breadcrumbs"]:
261
+ path = " > ".join(str(part["display"]) for part in item["breadcrumbs"])
262
+ lines.append(f"Collection path: {path}")
263
+ if item["links"]:
264
+ for link in item["links"]:
265
+ description = _compact(link["description"])
266
+ lines.append(
267
+ f"Link {link['direction']} {link['other_id']}: {description}"
268
+ )
269
+ lines.append("Content:")
270
+ lines.append(str(item["content"]))
271
+ return "\n".join(lines)
272
+
273
+
274
+ def _bounded_prompt(
275
+ *,
276
+ query: str,
277
+ items: list[dict[str, Any]],
278
+ char_budget: int,
279
+ ) -> tuple[str, dict[str, Any], list[dict[str, Any]]]:
280
+ header = (
281
+ "Memuron context block\n"
282
+ f"Query: {query}\n"
283
+ "Use bracketed citation IDs when citing this context.\n"
284
+ )
285
+ if not items:
286
+ text, header_truncated = _truncate(
287
+ header + "\nNo matching Memuron context found.",
288
+ char_budget,
289
+ )
290
+ return (
291
+ text,
292
+ {
293
+ "is_truncated": header_truncated,
294
+ "omitted_items": 0,
295
+ "truncated_items": [],
296
+ },
297
+ [],
298
+ )
299
+
300
+ prompt = header.rstrip()
301
+ included: list[dict[str, Any]] = []
302
+ truncated_items: list[str] = []
303
+ omitted = 0
304
+ for index, item in enumerate(items):
305
+ block = "\n\n" + _item_block(item)
306
+ remaining = char_budget - len(prompt)
307
+ if remaining <= 0:
308
+ omitted = len(items) - index
309
+ break
310
+ if len(block) > remaining:
311
+ partial, _did_truncate = _truncate(block, remaining)
312
+ prompt += partial
313
+ truncated_items.append(str(item["citation_id"]))
314
+ included_item = dict(item)
315
+ included_item["prompt_truncated"] = True
316
+ included.append(included_item)
317
+ omitted = len(items) - index - 1
318
+ break
319
+ included_item = dict(item)
320
+ included_item["prompt_truncated"] = False
321
+ included.append(included_item)
322
+ prompt += block
323
+ return (
324
+ prompt[:char_budget],
325
+ {
326
+ "is_truncated": bool(omitted or truncated_items),
327
+ "omitted_items": omitted,
328
+ "truncated_items": truncated_items,
329
+ },
330
+ included,
331
+ )
332
+
333
+
334
+ def assemble_context(
335
+ engine: ArthaEngine,
336
+ *,
337
+ query: str,
338
+ k: int,
339
+ scope: list[str] | None,
340
+ org_id: str,
341
+ preferred_space_token: str | None = None,
342
+ token_budget: int | None = None,
343
+ char_budget: int | None = None,
344
+ include_links: bool = True,
345
+ include_breadcrumbs: bool = True,
346
+ ) -> dict[str, Any]:
347
+ search_results, resolved_scope = search_memories(
348
+ engine,
349
+ query,
350
+ k=k,
351
+ scope=scope,
352
+ include_links=include_links,
353
+ )
354
+ org_token = org_scope_token(org_id)
355
+ items: list[dict[str, Any]] = []
356
+ memory_index = 1
357
+ link_index = 1
358
+ for result in search_results:
359
+ if result.get("type") == "relationship_edge":
360
+ items.append(_edge_item(result=result, citation_id=f"L{link_index}"))
361
+ link_index += 1
362
+ continue
363
+ item = _memory_item(
364
+ engine,
365
+ result=result,
366
+ citation_id=f"M{memory_index}",
367
+ org_token=org_token,
368
+ preferred_space_token=preferred_space_token,
369
+ include_breadcrumbs=include_breadcrumbs,
370
+ include_links=include_links,
371
+ )
372
+ if item is None:
373
+ continue
374
+ items.append(item)
375
+ memory_index += 1
376
+
377
+ budget_chars = _char_budget(token_budget=token_budget, char_budget=char_budget)
378
+ prompt_text, truncation, included_items = _bounded_prompt(
379
+ query=query,
380
+ items=items,
381
+ char_budget=budget_chars,
382
+ )
383
+ return {
384
+ "query": query,
385
+ "count": len(included_items),
386
+ "scope": resolved_scope,
387
+ "budget": {
388
+ "char_budget": budget_chars,
389
+ "token_budget": token_budget,
390
+ "used_chars": len(prompt_text),
391
+ "token_estimate": max(
392
+ 1,
393
+ (len(prompt_text) + TOKEN_TO_CHAR_RATIO - 1) // TOKEN_TO_CHAR_RATIO,
394
+ ),
395
+ },
396
+ "prompt_text": prompt_text,
397
+ "citations": [_citation(item) for item in included_items],
398
+ "items": included_items,
399
+ "truncated": truncation,
400
+ }
401
+
402
+
403
+ def space_profile(
404
+ engine: ArthaEngine,
405
+ *,
406
+ space: dict[str, Any],
407
+ org_id: str,
408
+ limit: int = 1000,
409
+ ) -> dict[str, Any]:
410
+ token = str(space["token"])
411
+ org_token = org_scope_token(org_id)
412
+ entries = list_entries(
413
+ engine.store,
414
+ org_token=org_token,
415
+ space_token=token,
416
+ limit=limit,
417
+ )
418
+ counts = Counter(
419
+ str(item.get("type") or item.get("node_type") or "text")
420
+ for item in entries
421
+ )
422
+ collections = [
423
+ {
424
+ "id": item["id"],
425
+ "display": item.get("display"),
426
+ "preview": item.get("preview"),
427
+ "path": f"/spaces/{token}/collections/{item['id']}",
428
+ }
429
+ for item in entries
430
+ if item.get("node_type") == "collection"
431
+ ][:10]
432
+ previews = [
433
+ {
434
+ "id": item["id"],
435
+ "type": item.get("node_type"),
436
+ "display": item.get("display"),
437
+ "preview": item.get("preview"),
438
+ }
439
+ for item in entries
440
+ if item.get("node_type") != "collection"
441
+ ][:10]
442
+ edges = space_edges(
443
+ engine.store,
444
+ org_token=org_token,
445
+ space_token=token,
446
+ include_placements=False,
447
+ )
448
+ profile = {
449
+ "space_token": token,
450
+ "name": space.get("name"),
451
+ "description": space.get("description") or "",
452
+ "guardian_prompt": space.get("guardian_prompt") or "",
453
+ "counts": {
454
+ "nodes": len(entries),
455
+ "semantic_links": len(edges),
456
+ "by_type": dict(sorted(counts.items())),
457
+ },
458
+ "collections": collections,
459
+ "previews": previews,
460
+ }
461
+ prompt_lines = [
462
+ f"Space profile: {space.get('name')} ({token})",
463
+ f"Description: {space.get('description') or '(none)'}",
464
+ f"Nodes: {len(entries)}; semantic links: {len(edges)}",
465
+ ]
466
+ if collections:
467
+ prompt_lines.append("Collections:")
468
+ prompt_lines.extend(f"- {item['display']} ({item['id']})" for item in collections)
469
+ if previews:
470
+ prompt_lines.append("Representative memories:")
471
+ prompt_lines.extend(
472
+ f"- [{item['id']}] {item['preview']}" for item in previews
473
+ )
474
+ return {"profile": profile, "prompt_text": "\n".join(prompt_lines)}
475
+
476
+
477
+ def collection_profile(
478
+ engine: ArthaEngine,
479
+ *,
480
+ collection_id: str,
481
+ org_id: str,
482
+ ) -> dict[str, Any]:
483
+ collection = get_memory(engine, collection_id)
484
+ if collection.get("node_type") != "collection":
485
+ raise ValueError("collection_id must refer to a collection node")
486
+ org_token = org_scope_token(org_id)
487
+ token = _space_token([str(token) for token in collection.get("scope") or []])
488
+ breadcrumbs = _collection_breadcrumbs(
489
+ engine,
490
+ node_id=collection_id,
491
+ org_token=org_token,
492
+ space_token=token,
493
+ )
494
+ members = collection_members(engine, collection_id)
495
+ counts = Counter(
496
+ str(member["node"].get("node_type") or "text") for member in members
497
+ )
498
+ member_previews = [
499
+ {
500
+ "id": member["node"]["id"],
501
+ "type": member["node"].get("node_type"),
502
+ "name": member["placement"].get("name"),
503
+ "preview": member["node"].get("preview"),
504
+ }
505
+ for member in members[:20]
506
+ ]
507
+ profile = {
508
+ "id": collection_id,
509
+ "name": (collection.get("payload") or {}).get("name")
510
+ or collection.get("preview"),
511
+ "summary": collection.get("content") or "",
512
+ "scope": collection.get("scope") or [],
513
+ "breadcrumbs": breadcrumbs,
514
+ "counts": {"members": len(members), "by_type": dict(sorted(counts.items()))},
515
+ "members": member_previews,
516
+ }
517
+ prompt_lines = [
518
+ f"Collection profile: {profile['name']} ({collection_id})",
519
+ f"Summary: {_compact(profile['summary']) or '(none)'}",
520
+ f"Members: {len(members)}",
521
+ ]
522
+ if breadcrumbs:
523
+ prompt_lines.append(
524
+ "Parent path: " + " > ".join(part["display"] for part in breadcrumbs)
525
+ )
526
+ if member_previews:
527
+ prompt_lines.append("Direct members:")
528
+ prompt_lines.extend(
529
+ f"- [{item['id']}] {item['name']}: {item['preview']}"
530
+ for item in member_previews
531
+ )
532
+ return {"profile": profile, "prompt_text": "\n".join(prompt_lines)}
@@ -0,0 +1 @@
1
+ """Document parsing, source-object storage, and document-link planning."""